denoise.c 17.7 KB
Newer Older
Jean-Marc Valin's avatar
Jean-Marc Valin committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
/* Copyright (c) 2017 Mozilla */
/*
   Redistribution and use in source and binary forms, with or without
   modification, are permitted provided that the following conditions
   are met:

   - Redistributions of source code must retain the above copyright
   notice, this list of conditions and the following disclaimer.

   - Redistributions in binary form must reproduce the above copyright
   notice, this list of conditions and the following disclaimer in the
   documentation and/or other materials provided with the distribution.

   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

31
32
33
34
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include "kiss_fft.h"
Jean-Marc Valin's avatar
Jean-Marc Valin committed
35
#include "common.h"
Jean-Marc Valin's avatar
add DCT    
Jean-Marc Valin committed
36
#include <math.h>
Jean-Marc Valin's avatar
Jean-Marc Valin committed
37
#include "rnnoise.h"
Jean-Marc Valin's avatar
Jean-Marc Valin committed
38
#include "pitch.h"
Jean-Marc Valin's avatar
Jean-Marc Valin committed
39
#include "arch.h"
Jean-Marc Valin's avatar
Jean-Marc Valin committed
40
41
#include "rnn.h"
#include "rnn_data.h"
42
43
44
45
46
47

#define FRAME_SIZE_SHIFT 2
#define FRAME_SIZE (120<<FRAME_SIZE_SHIFT)
#define WINDOW_SIZE (2*FRAME_SIZE)
#define FREQ_SIZE (FRAME_SIZE + 1)

Jean-Marc Valin's avatar
Jean-Marc Valin committed
48
49
#define PITCH_MIN_PERIOD 60
#define PITCH_MAX_PERIOD 768
Jean-Marc Valin's avatar
Jean-Marc Valin committed
50
51
#define PITCH_FRAME_SIZE 960
#define PITCH_BUF_SIZE (PITCH_MAX_PERIOD+PITCH_FRAME_SIZE)
52
53
54

#define SQUARE(x) ((x)*(x))

Jean-Marc Valin's avatar
add DCT    
Jean-Marc Valin committed
55
56
57
58
59
60
61
62
#define SMOOTH_BANDS 1

#if SMOOTH_BANDS
#define NB_BANDS 22
#else
#define NB_BANDS 21
#endif

Jean-Marc Valin's avatar
Jean-Marc Valin committed
63
#define CEPS_MEM 8
64
65
#define NB_DELTA_CEPS 6

Jean-Marc Valin's avatar
Jean-Marc Valin committed
66
#define NB_FEATURES (NB_BANDS+3*NB_DELTA_CEPS+2)
Jean-Marc Valin's avatar
Jean-Marc Valin committed
67

68

Jean-Marc Valin's avatar
Jean-Marc Valin committed
69
#ifndef TRAINING
70
#define TRAINING 0
Jean-Marc Valin's avatar
Jean-Marc Valin committed
71
#endif
72

73
74
75
76
77
78
79
80
81
static const opus_int16 eband5ms[] = {
/*0  200 400 600 800  1k 1.2 1.4 1.6  2k 2.4 2.8 3.2  4k 4.8 5.6 6.8  8k 9.6 12k 15.6 20k*/
  0,  1,  2,  3,  4,  5,  6,  7,  8, 10, 12, 14, 16, 20, 24, 28, 34, 40, 48, 60, 78, 100
};


typedef struct {
  int init;
  kiss_fft_state *kfft;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
82
  float half_window[FRAME_SIZE];
Jean-Marc Valin's avatar
add DCT    
Jean-Marc Valin committed
83
  float dct_table[NB_BANDS*NB_BANDS];
84
85
} CommonState;

Jean-Marc Valin's avatar
Jean-Marc Valin committed
86
struct DenoiseState {
87
  float analysis_mem[FRAME_SIZE];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
88
89
  float cepstral_mem[CEPS_MEM][NB_BANDS];
  int memid;
90
  float synthesis_mem[FRAME_SIZE];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
91
  float pitch_buf[PITCH_BUF_SIZE];
Jean-Marc Valin's avatar
wip    
Jean-Marc Valin committed
92
  float pitch_enh_buf[PITCH_BUF_SIZE];
Jean-Marc Valin's avatar
cleanup    
Jean-Marc Valin committed
93
94
  float last_gain;
  int last_period;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
95
  float mem_hp_x[2];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
96
  RNNState rnn;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
97
};
98

Jean-Marc Valin's avatar
add DCT    
Jean-Marc Valin committed
99
#if SMOOTH_BANDS
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
void compute_band_energy(float *bandE, const kiss_fft_cpx *X) {
  int i;
  float sum[NB_BANDS] = {0};
  for (i=0;i<NB_BANDS-1;i++)
  {
    int j;
    int band_size;
    band_size = (eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;
    for (j=0;j<band_size;j++) {
      float tmp;
      float frac = (float)j/band_size;
      tmp = SQUARE(X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].r);
      tmp += SQUARE(X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].i);
      sum[i] += (1-frac)*tmp;
      sum[i+1] += frac*tmp;
    }
  }
  sum[0] *= 2;
  sum[NB_BANDS-1] *= 2;
  for (i=0;i<NB_BANDS;i++)
  {
    bandE[i] = sum[i];
  }
}

Jean-Marc Valin's avatar
Jean-Marc Valin committed
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
void compute_band_corr(float *bandE, const kiss_fft_cpx *X, const kiss_fft_cpx *P) {
  int i;
  float sum[NB_BANDS] = {0};
  for (i=0;i<NB_BANDS-1;i++)
  {
    int j;
    int band_size;
    band_size = (eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;
    for (j=0;j<band_size;j++) {
      float tmp;
      float frac = (float)j/band_size;
      tmp = X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].r * P[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].r;
      tmp += X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].i * P[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].i;
      sum[i] += (1-frac)*tmp;
      sum[i+1] += frac*tmp;
    }
  }
  sum[0] *= 2;
  sum[NB_BANDS-1] *= 2;
  for (i=0;i<NB_BANDS;i++)
  {
    bandE[i] = sum[i];
  }
}

150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
void interp_band_gain(float *g, const float *bandE) {
  int i;
  memset(g, 0, FREQ_SIZE);
  for (i=0;i<NB_BANDS-1;i++)
  {
    int j;
    int band_size;
    band_size = (eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;
    for (j=0;j<band_size;j++) {
      float frac = (float)j/band_size;
      g[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j] = (1-frac)*bandE[i] + frac*bandE[i+1];
    }
  }
}
#else
165
166
167
168
169
void compute_band_energy(float *bandE, const kiss_fft_cpx *X) {
  int i;
  for (i=0;i<NB_BANDS;i++)
  {
    int j;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
170
    opus_val32 sum = 0;
171
172
173
174
    for (j=0;j<(eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;j++) {
      sum += SQUARE(X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].r);
      sum += SQUARE(X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].i);
    }
175
    bandE[i] = sum;
176
177
178
179
180
  }
}

void interp_band_gain(float *g, const float *bandE) {
  int i;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
181
  memset(g, 0, FREQ_SIZE);
182
183
184
185
186
187
188
  for (i=0;i<NB_BANDS;i++)
  {
    int j;
    for (j=0;j<(eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;j++)
      g[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j] = bandE[i];
  }
}
189
190
#endif

191
192
193
194

CommonState common;

static void check_init() {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
195
  int i;
196
197
  if (common.init) return;
  common.kfft = opus_fft_alloc_twiddles(2*FRAME_SIZE, NULL, NULL, NULL, 0);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
198
199
  for (i=0;i<FRAME_SIZE;i++)
    common.half_window[i] = sin(.5*M_PI*sin(.5*M_PI*(i+.5)/FRAME_SIZE) * sin(.5*M_PI*(i+.5)/FRAME_SIZE));
Jean-Marc Valin's avatar
add DCT    
Jean-Marc Valin committed
200
201
202
203
204
205
206
  for (i=0;i<NB_BANDS;i++) {
    int j;
    for (j=0;j<NB_BANDS;j++) {
      common.dct_table[i*NB_BANDS + j] = cos((i+.5)*j*M_PI/NB_BANDS);
      if (j==0) common.dct_table[i*NB_BANDS + j] *= sqrt(.5);
    }
  }
207
208
209
  common.init = 1;
}

Jean-Marc Valin's avatar
add DCT    
Jean-Marc Valin committed
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
static void dct(float *out, const float *in) {
  int i;
  check_init();
  for (i=0;i<NB_BANDS;i++) {
    int j;
    float sum = 0;
    for (j=0;j<NB_BANDS;j++) {
      sum += in[j] * common.dct_table[j*NB_BANDS + i];
    }
    out[i] = sum*sqrt(2./22);
  }
}

#if 0
static void idct(float *out, const float *in) {
  int i;
  check_init();
  for (i=0;i<NB_BANDS;i++) {
    int j;
    float sum = 0;
    for (j=0;j<NB_BANDS;j++) {
      sum += in[j] * common.dct_table[i*NB_BANDS + j];
    }
    out[i] = sum*sqrt(2./22);
  }
}
#endif
237

Jean-Marc Valin's avatar
Jean-Marc Valin committed
238
static void forward_transform(kiss_fft_cpx *out, const float *in) {
239
240
241
242
243
244
245
246
247
248
249
250
251
252
  int i;
  kiss_fft_cpx x[WINDOW_SIZE];
  kiss_fft_cpx y[WINDOW_SIZE];
  check_init();
  for (i=0;i<WINDOW_SIZE;i++) {
    x[i].r = in[i];
    x[i].i = 0;
  }
  opus_fft(common.kfft, x, y, 0);
  for (i=0;i<FREQ_SIZE;i++) {
    out[i] = y[i];
  }
}

Jean-Marc Valin's avatar
Jean-Marc Valin committed
253
static void inverse_transform(float *out, const kiss_fft_cpx *in) {
254
255
256
  int i;
  kiss_fft_cpx x[WINDOW_SIZE];
  kiss_fft_cpx y[WINDOW_SIZE];
257
  check_init();
258
259
260
261
262
263
264
265
266
267
268
269
270
  for (i=0;i<FREQ_SIZE;i++) {
    x[i] = in[i];
  }
  for (;i<WINDOW_SIZE;i++) {
    x[i].r = x[WINDOW_SIZE - i].r;
    x[i].i = -x[WINDOW_SIZE - i].i;
  }
  opus_fft(common.kfft, x, y, 0);
  /* output in reverse order for IFFT. */
  out[0] = WINDOW_SIZE*y[0].r;
  for (i=1;i<WINDOW_SIZE;i++) {
    out[i] = WINDOW_SIZE*y[WINDOW_SIZE - i].r;
  }
271
272
}

Jean-Marc Valin's avatar
Jean-Marc Valin committed
273
274
275
276
277
278
279
280
281
static void apply_window(float *x) {
  int i;
  check_init();
  for (i=0;i<FRAME_SIZE;i++) {
    x[i] *= common.half_window[i];
    x[WINDOW_SIZE - 1 - i] *= common.half_window[i];
  }
}

Jean-Marc Valin's avatar
Jean-Marc Valin committed
282
283
284
285
int rnnoise_get_size() {
  return sizeof(DenoiseState);
}

Jean-Marc Valin's avatar
Jean-Marc Valin committed
286
287
288
289
290
291
292
int rnnoise_init(DenoiseState *st) {
  memset(st, 0, sizeof(*st));
  return 0;
}

DenoiseState *rnnoise_create() {
  DenoiseState *st;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
293
  st = malloc(rnnoise_get_size());
Jean-Marc Valin's avatar
Jean-Marc Valin committed
294
295
296
297
  rnnoise_init(st);
  return st;
}

Jean-Marc Valin's avatar
Jean-Marc Valin committed
298
299
300
301
void rnnoise_destroy(DenoiseState *st) {
  free(st);
}

Jean-Marc Valin's avatar
Jean-Marc Valin committed
302
303
#if TRAINING
int lowpass = FREQ_SIZE;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
304
int band_lp = NB_BANDS;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
305
306
#endif

307
308
static void frame_analysis(DenoiseState *st, kiss_fft_cpx *X, float *Ex, const float *in) {
  int i;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
309
  float x[WINDOW_SIZE];
310
311
312
313
314
  RNN_COPY(x, st->analysis_mem, FRAME_SIZE);
  for (i=0;i<FRAME_SIZE;i++) x[FRAME_SIZE + i] = in[i];
  RNN_COPY(st->analysis_mem, in, FRAME_SIZE);
  apply_window(x);
  forward_transform(X, x);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
315
316
317
318
#if TRAINING
  for (i=lowpass;i<FREQ_SIZE;i++)
    X[i].r = X[i].i = 0;
#endif
319
320
321
  compute_band_energy(Ex, X);
}

Jean-Marc Valin's avatar
Jean-Marc Valin committed
322
323
static int compute_frame_features(DenoiseState *st, kiss_fft_cpx *X, kiss_fft_cpx *P,
                                  float *Ex, float *Ep, float *Exp, float *features, const float *in) {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
324
  int i;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
325
  float E = 0;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
326
327
328
  float *ceps_0, *ceps_1, *ceps_2;
  float spec_variability = 0;
  float Ly[NB_BANDS];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
329
330
331
332
333
334
  float p[WINDOW_SIZE];
  float pitch_buf[PITCH_BUF_SIZE>>1];
  int pitch_index;
  float gain;
  float *(pre[1]);
  float tmp[NB_BANDS];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
335
  float follow, logMax;
336
  frame_analysis(st, X, Ex, in);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
337
338
339
340
341
342
343
  RNN_MOVE(st->pitch_buf, &st->pitch_buf[FRAME_SIZE], PITCH_BUF_SIZE-FRAME_SIZE);
  RNN_COPY(&st->pitch_buf[PITCH_BUF_SIZE-FRAME_SIZE], in, FRAME_SIZE);
  pre[0] = &st->pitch_buf[0];
  pitch_downsample(pre, pitch_buf, PITCH_BUF_SIZE, 1);
  pitch_search(pitch_buf+(PITCH_MAX_PERIOD>>1), pitch_buf, PITCH_FRAME_SIZE,
               PITCH_MAX_PERIOD-3*PITCH_MIN_PERIOD, &pitch_index);
  pitch_index = PITCH_MAX_PERIOD-pitch_index;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
344

Jean-Marc Valin's avatar
Jean-Marc Valin committed
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
  gain = remove_doubling(pitch_buf, PITCH_MAX_PERIOD, PITCH_MIN_PERIOD,
          PITCH_FRAME_SIZE, &pitch_index, st->last_period, st->last_gain);
  st->last_period = pitch_index;
  st->last_gain = gain;
  for (i=0;i<WINDOW_SIZE;i++)
    p[i] = st->pitch_buf[PITCH_BUF_SIZE-WINDOW_SIZE-pitch_index+i];
  apply_window(p);
  forward_transform(P, p);
  compute_band_energy(Ep, P);
  compute_band_corr(Exp, X, P);
  for (i=0;i<NB_BANDS;i++) Exp[i] = Exp[i]/sqrt(.001+Ex[i]*Ep[i]);
  dct(tmp, Exp);
  for (i=0;i<NB_DELTA_CEPS;i++) features[NB_BANDS+2*NB_DELTA_CEPS+i] = tmp[i];
  features[NB_BANDS+2*NB_DELTA_CEPS] -= 1.3;
  features[NB_BANDS+2*NB_DELTA_CEPS+1] -= 0.9;
  features[NB_BANDS+3*NB_DELTA_CEPS] = .01*(pitch_index-300);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
361
362
  logMax = -2;
  follow = -2;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
363
364
  for (i=0;i<NB_BANDS;i++) {
    Ly[i] = log10(1e-2+Ex[i]);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
365
366
367
    Ly[i] = MAX16(logMax-6, MAX16(follow-1.2, Ly[i]));
    logMax = MAX16(logMax, Ly[i]);
    follow = MAX16(follow-1, Ly[i]);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
    E += Ex[i];
  }
  if (!TRAINING && E < 0.04) {
    /* If there's no audio, avoid messing up the state. */
    RNN_CLEAR(features, NB_FEATURES);
    return 1;
  }
  dct(features, Ly);
  features[0] -= 12;
  features[1] -= 4;
  ceps_0 = st->cepstral_mem[st->memid];
  ceps_1 = (st->memid < 1) ? st->cepstral_mem[CEPS_MEM+st->memid-1] : st->cepstral_mem[st->memid-1];
  ceps_2 = (st->memid < 2) ? st->cepstral_mem[CEPS_MEM+st->memid-2] : st->cepstral_mem[st->memid-2];
  for (i=0;i<NB_BANDS;i++) ceps_0[i] = features[i];
  st->memid++;
  for (i=0;i<NB_DELTA_CEPS;i++) {
    features[i] = ceps_0[i] + ceps_1[i] + ceps_2[i];
    features[NB_BANDS+i] = ceps_0[i] - ceps_2[i];
    features[NB_BANDS+NB_DELTA_CEPS+i] =  ceps_0[i] - 2*ceps_1[i] + ceps_2[i];
  }
  /* Spectral variability features. */
  if (st->memid == CEPS_MEM) st->memid = 0;
  for (i=0;i<CEPS_MEM;i++)
Jean-Marc Valin's avatar
wip    
Jean-Marc Valin committed
391
  {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
392
393
394
395
396
397
398
    int j;
    float mindist = 1e15f;
    for (j=0;j<CEPS_MEM;j++)
    {
      int k;
      float dist=0;
      for (k=0;k<NB_BANDS;k++)
Jean-Marc Valin's avatar
Jean-Marc Valin committed
399
      {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
400
401
402
        float tmp;
        tmp = st->cepstral_mem[i][k] - st->cepstral_mem[j][k];
        dist += tmp*tmp;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
403
      }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
404
405
      if (j!=i)
        mindist = MIN32(mindist, dist);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
406
    }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
407
    spec_variability += mindist;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
408
  }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
409
  features[NB_BANDS+3*NB_DELTA_CEPS+1] = spec_variability/CEPS_MEM-2.1;
410
  return TRAINING && E < 0.1;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
411
412
413
414
415
}

static void frame_synthesis(DenoiseState *st, float *out, const kiss_fft_cpx *y) {
  float x[WINDOW_SIZE];
  int i;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
416
417
418
419
420
421
  inverse_transform(x, y);
  apply_window(x);
  for (i=0;i<FRAME_SIZE;i++) out[i] = x[i] + st->synthesis_mem[i];
  RNN_COPY(st->synthesis_mem, &x[FRAME_SIZE], FRAME_SIZE);
}

Jean-Marc Valin's avatar
Jean-Marc Valin committed
422
static void biquad(float *y, float mem[2], const float *x, const float *b, const float *a, int N) {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
423
424
425
426
427
428
429
430
431
432
433
  int i;
  for (i=0;i<N;i++) {
    float xi, yi;
    xi = x[i];
    yi = x[i] + mem[0];
    mem[0] = mem[1] + (b[0]*(double)xi - a[0]*(double)yi);
    mem[1] = (b[1]*(double)xi - a[1]*(double)yi);
    y[i] = yi;
  }
}

Jean-Marc Valin's avatar
Jean-Marc Valin committed
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
void pitch_filter(kiss_fft_cpx *X, const kiss_fft_cpx *P, const float *Ex, const float *Ep,
                  const float *Exp, const float *g) {
  int i;
  float r[NB_BANDS];
  float rf[FREQ_SIZE] = {0};
  for (i=0;i<NB_BANDS;i++) {
#if 0
    if (Exp[i]>g[i]) r[i] = 1;
    else r[i] = Exp[i]*(1-g[i])/(.001 + g[i]*(1-Exp[i]));
    r[i] = MIN16(1, MAX16(0, r[i]));
#else
    if (Exp[i]>g[i]) r[i] = 1;
    else r[i] = SQUARE(Exp[i])*(1-SQUARE(g[i]))/(.001 + SQUARE(g[i])*(1-SQUARE(Exp[i])));
    r[i] = sqrt(MIN16(1, MAX16(0, r[i])));
#endif
    r[i] *= sqrt(Ex[i]/(1e-8+Ep[i]));
  }
  interp_band_gain(rf, r);
  for (i=0;i<FREQ_SIZE;i++) {
    X[i].r += rf[i]*P[i].r;
    X[i].i += rf[i]*P[i].i;
  }
  float newE[NB_BANDS];
  compute_band_energy(newE, X);
  float norm[NB_BANDS];
  float normf[FREQ_SIZE]={0};
  for (i=0;i<NB_BANDS;i++) {
    norm[i] = sqrt(Ex[i]/(1e-8+newE[i]));
  }
  interp_band_gain(normf, norm);
  for (i=0;i<FREQ_SIZE;i++) {
    X[i].r *= normf[i];
    X[i].i *= normf[i];
  }
}
Jean-Marc Valin's avatar
Jean-Marc Valin committed
469

Jean-Marc Valin's avatar
Jean-Marc Valin committed
470
471
void rnnoise_process_frame(DenoiseState *st, float *out, const float *in) {
  int i;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
472
473
  kiss_fft_cpx X[FREQ_SIZE];
  kiss_fft_cpx P[WINDOW_SIZE];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
474
  float x[FRAME_SIZE];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
475
  float Ex[NB_BANDS], Ep[NB_BANDS];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
476
  float Exp[NB_BANDS];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
477
478
479
480
  float features[NB_FEATURES];
  float g[NB_BANDS];
  float gf[FREQ_SIZE]={1};
  float vad_prob;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
481
  int silence;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
482
483
484
  static const float a_hp[2] = {-1.99599, 0.99600};
  static const float b_hp[2] = {-2, 1};
  biquad(x, st->mem_hp_x, in, b_hp, a_hp, FRAME_SIZE);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
485
  silence = compute_frame_features(st, X, P, Ex, Ep, Exp, features, x);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
486

487
  if (!silence) {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
488
    compute_rnn(&st->rnn, g, &vad_prob, features);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
489
    pitch_filter(X, P, Ex, Ep, Exp, g);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
490
    interp_band_gain(gf, g);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
491
#if 1
Jean-Marc Valin's avatar
Jean-Marc Valin committed
492
    for (i=0;i<FREQ_SIZE;i++) {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
493
494
      X[i].r *= gf[i];
      X[i].i *= gf[i];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
495
    }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
496
#endif
Jean-Marc Valin's avatar
Jean-Marc Valin committed
497
  }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
498

Jean-Marc Valin's avatar
Jean-Marc Valin committed
499
  frame_synthesis(st, out, X);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
500
501
}

502
#if TRAINING
Jean-Marc Valin's avatar
Jean-Marc Valin committed
503

Jean-Marc Valin's avatar
Jean-Marc Valin committed
504
505
506
507
508
509
510
511
512
513
514
static float uni_rand() {
  return rand()/(double)RAND_MAX-.5;
}

static void rand_resp(float *a, float *b) {
  a[0] = .75*uni_rand();
  a[1] = .75*uni_rand();
  b[0] = .75*uni_rand();
  b[1] = .75*uni_rand();
}

Jean-Marc Valin's avatar
Jean-Marc Valin committed
515
int main(int argc, char **argv) {
516
  int i;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
517
  int count=0;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
518
519
  static const float a_hp[2] = {-1.99599, 0.99600};
  static const float b_hp[2] = {-2, 1};
Jean-Marc Valin's avatar
Jean-Marc Valin committed
520
521
522
523
  float a_noise[2] = {0};
  float b_noise[2] = {0};
  float a_sig[2] = {0};
  float b_sig[2] = {0};
Jean-Marc Valin's avatar
Jean-Marc Valin committed
524
525
  float mem_hp_x[2]={0};
  float mem_hp_n[2]={0};
Jean-Marc Valin's avatar
Jean-Marc Valin committed
526
527
  float mem_resp_x[2]={0};
  float mem_resp_n[2]={0};
Jean-Marc Valin's avatar
Jean-Marc Valin committed
528
  float x[FRAME_SIZE];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
529
530
  float n[FRAME_SIZE];
  float xn[FRAME_SIZE];
531
  int vad_cnt=0;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
532
533
  int gain_change_count=0;
  float speech_gain = 1, noise_gain = 1;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
534
  FILE *f1, *f2, *fout;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
535
  DenoiseState *st;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
536
  DenoiseState *noise_state;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
537
  DenoiseState *noisy;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
538
  st = rnnoise_create();
Jean-Marc Valin's avatar
Jean-Marc Valin committed
539
  noise_state = rnnoise_create();
Jean-Marc Valin's avatar
Jean-Marc Valin committed
540
  noisy = rnnoise_create();
Jean-Marc Valin's avatar
cleanup    
Jean-Marc Valin committed
541
542
543
544
  if (argc!=4) {
    fprintf(stderr, "usage: %s <speech> <noise> <output denoised>\n", argv[0]);
    return 1;
  }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
545
546
547
548
549
550
551
552
  f1 = fopen(argv[1], "r");
  f2 = fopen(argv[2], "r");
  fout = fopen(argv[3], "w");
  for(i=0;i<150;i++) {
    short tmp[FRAME_SIZE];
    fread(tmp, sizeof(short), FRAME_SIZE, f2);
  }
  while (1) {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
553
554
    kiss_fft_cpx X[FREQ_SIZE], Y[FREQ_SIZE], N[FREQ_SIZE], P[WINDOW_SIZE];
    float Ex[NB_BANDS], Ey[NB_BANDS], En[NB_BANDS], Ep[NB_BANDS];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
555
    float Exp[NB_BANDS];
Jean-Marc Valin's avatar
oops    
Jean-Marc Valin committed
556
    float Ln[NB_BANDS];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
557
    float features[NB_FEATURES];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
558
    float g[NB_BANDS];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
559
    float gf[FREQ_SIZE]={1};
Jean-Marc Valin's avatar
Jean-Marc Valin committed
560
    short tmp[FRAME_SIZE];
561
    float vad=0;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
562
    float vad_prob;
563
    float E=0;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
564
    if (++gain_change_count > 101*300) {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
565
      speech_gain = pow(10., (-40+(rand()%60))/20.);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
566
567
568
      noise_gain = pow(10., (-30+(rand()%40))/20.);
      if (rand()%10==0) noise_gain = 0;
      noise_gain *= speech_gain;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
569
      if (rand()%10==0) speech_gain = 0;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
570
      gain_change_count = 0;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
571
572
      rand_resp(a_noise, b_noise);
      rand_resp(a_sig, b_sig);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
573
      lowpass = FREQ_SIZE * 3000./24000. * pow(10., rand()/(double)RAND_MAX);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
574
575
576
577
578
579
      for (i=0;i<NB_BANDS;i++) {
        if (eband5ms[i]<<FRAME_SIZE_SHIFT > lowpass) {
          band_lp = i;
          break;
        }
      }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
580
    }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
581
582
    fread(tmp, sizeof(short), FRAME_SIZE, f1);
    if (feof(f1)) break;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
583
    for (i=0;i<FRAME_SIZE;i++) x[i] = speech_gain*tmp[i];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
584
585
    fread(tmp, sizeof(short), FRAME_SIZE, f2);
    if (feof(f2)) break;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
586
    for (i=0;i<FRAME_SIZE;i++) n[i] = noise_gain*tmp[i];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
587
    biquad(x, mem_hp_x, x, b_hp, a_hp, FRAME_SIZE);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
588
    biquad(x, mem_resp_x, x, b_sig, a_sig, FRAME_SIZE);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
589
    biquad(n, mem_hp_n, n, b_hp, a_hp, FRAME_SIZE);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
590
    biquad(n, mem_resp_n, n, b_noise, a_noise, FRAME_SIZE);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
591
    for (i=0;i<FRAME_SIZE;i++) xn[i] = x[i] + n[i];
592
    for (i=0;i<FRAME_SIZE;i++) E += x[i]*(float)x[i];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
593
    if (E > 1e9f*speech_gain*speech_gain) {
594
      vad_cnt=0;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
595
    } else if (E > 1e8f*speech_gain*speech_gain) {
596
597
598
599
600
601
602
603
604
      vad_cnt -= 5;
      if (vad_cnt < 0) vad_cnt = 0;
    } else {
      vad_cnt++;
      if (vad_cnt > 15) vad_cnt = 15;
    }
    if (vad_cnt >= 10) vad = 0;
    else if (vad_cnt > 0) vad = 0.5f;
    else vad = 1.f;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
605

Jean-Marc Valin's avatar
Jean-Marc Valin committed
606
    frame_analysis(st, Y, Ey, x);
607
    frame_analysis(noise_state, N, En, n);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
608
    for (i=0;i<NB_BANDS;i++) Ln[i] = log10(1e-2+En[i]);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
609
610
    int silence = compute_frame_features(noisy, X, P, Ex, Ep, Exp, features, xn);
    pitch_filter(X, P, Ex, Ep, Exp, g);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
611
    //printf("%f %d\n", noisy->last_gain, noisy->last_period);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
612
    for (i=0;i<NB_BANDS;i++) {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
613
      g[i] = sqrt((Ey[i]+1e-2)/(Ex[i]+1e-2));
Jean-Marc Valin's avatar
Jean-Marc Valin committed
614
      if (g[i] > 1) g[i] = 1;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
615
      if (silence || i > band_lp) g[i] = -1;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
616
    }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
617
618
    count++;
#if 0
Jean-Marc Valin's avatar
Jean-Marc Valin committed
619
    for (i=0;i<NB_FEATURES;i++) printf("%f ", features[i]);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
620
    for (i=0;i<NB_BANDS;i++) printf("%f ", g[i]);
Jean-Marc Valin's avatar
oops    
Jean-Marc Valin committed
621
    for (i=0;i<NB_BANDS;i++) printf("%f ", Ln[i]);
622
    printf("%f\n", vad);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
623
#endif
Jean-Marc Valin's avatar
Jean-Marc Valin committed
624
#if 1
Jean-Marc Valin's avatar
Jean-Marc Valin committed
625
626
627
628
    fwrite(features, sizeof(float), NB_FEATURES, stdout);
    fwrite(g, sizeof(float), NB_BANDS, stdout);
    fwrite(Ln, sizeof(float), NB_BANDS, stdout);
    fwrite(&vad, sizeof(float), 1, stdout);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
629
#endif
Jean-Marc Valin's avatar
Jean-Marc Valin committed
630
#if 0
Jean-Marc Valin's avatar
Jean-Marc Valin committed
631
    compute_rnn(&noisy->rnn, g, &vad_prob, features);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
632
    interp_band_gain(gf, g);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
633
634
#if 1
    for (i=0;i<FREQ_SIZE;i++) {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
635
636
      X[i].r *= gf[i];
      X[i].i *= gf[i];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
637
638
    }
#endif
Jean-Marc Valin's avatar
Jean-Marc Valin committed
639
    frame_synthesis(noisy, xn, X);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
640
641
642

    for (i=0;i<FRAME_SIZE;i++) tmp[i] = xn[i];
    fwrite(tmp, sizeof(short), FRAME_SIZE, fout);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
643
#endif
Jean-Marc Valin's avatar
Jean-Marc Valin committed
644
  }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
645
  fprintf(stderr, "matrix size: %d x %d\n", count, NB_FEATURES + 2*NB_BANDS + 1);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
646
647
648
  fclose(f1);
  fclose(f2);
  fclose(fout);
649
650
  return 0;
}
Jean-Marc Valin's avatar
Jean-Marc Valin committed
651
652

#endif