denoise.c 15.4 KB
Newer Older
1
2
3
4
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include "kiss_fft.h"
Jean-Marc Valin's avatar
Jean-Marc Valin committed
5
#include "common.h"
Jean-Marc Valin's avatar
add DCT    
Jean-Marc Valin committed
6
#include <math.h>
Jean-Marc Valin's avatar
Jean-Marc Valin committed
7
#include "pitch.h"
Jean-Marc Valin's avatar
Jean-Marc Valin committed
8
9
#include "rnn.h"
#include "rnn_data.h"
10
11
12
13
14
15

#define FRAME_SIZE_SHIFT 2
#define FRAME_SIZE (120<<FRAME_SIZE_SHIFT)
#define WINDOW_SIZE (2*FRAME_SIZE)
#define FREQ_SIZE (FRAME_SIZE + 1)

Jean-Marc Valin's avatar
Jean-Marc Valin committed
16
17
#define PITCH_MIN_PERIOD 60
#define PITCH_MAX_PERIOD 768
Jean-Marc Valin's avatar
Jean-Marc Valin committed
18
19
#define PITCH_FRAME_SIZE 960
#define PITCH_BUF_SIZE (PITCH_MAX_PERIOD+PITCH_FRAME_SIZE)
20
21
22

#define SQUARE(x) ((x)*(x))

Jean-Marc Valin's avatar
add DCT    
Jean-Marc Valin committed
23
24
25
26
27
28
29
30
#define SMOOTH_BANDS 1

#if SMOOTH_BANDS
#define NB_BANDS 22
#else
#define NB_BANDS 21
#endif

Jean-Marc Valin's avatar
Jean-Marc Valin committed
31
#define CEPS_MEM 8
32
33
#define NB_DELTA_CEPS 6

Jean-Marc Valin's avatar
Jean-Marc Valin committed
34
#define NB_FEATURES (NB_BANDS+3*NB_DELTA_CEPS+2)
Jean-Marc Valin's avatar
Jean-Marc Valin committed
35

36
37
38
39

#define TRAINING 0


40
41
42
43
44
45
46
47
48
static const opus_int16 eband5ms[] = {
/*0  200 400 600 800  1k 1.2 1.4 1.6  2k 2.4 2.8 3.2  4k 4.8 5.6 6.8  8k 9.6 12k 15.6 20k*/
  0,  1,  2,  3,  4,  5,  6,  7,  8, 10, 12, 14, 16, 20, 24, 28, 34, 40, 48, 60, 78, 100
};


typedef struct {
  int init;
  kiss_fft_state *kfft;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
49
  float half_window[FRAME_SIZE];
Jean-Marc Valin's avatar
add DCT    
Jean-Marc Valin committed
50
  float dct_table[NB_BANDS*NB_BANDS];
51
52
53
54
} CommonState;

typedef struct {
  float analysis_mem[FRAME_SIZE];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
55
56
  float cepstral_mem[CEPS_MEM][NB_BANDS];
  int memid;
57
  float synthesis_mem[FRAME_SIZE];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
58
  float pitch_buf[PITCH_BUF_SIZE];
Jean-Marc Valin's avatar
wip    
Jean-Marc Valin committed
59
  float pitch_enh_buf[PITCH_BUF_SIZE];
Jean-Marc Valin's avatar
cleanup    
Jean-Marc Valin committed
60
61
  float last_gain;
  int last_period;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
62
  float mem_hp_x[2];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
63
  RNNState rnn;
64
65
} DenoiseState;

Jean-Marc Valin's avatar
add DCT    
Jean-Marc Valin committed
66
#if SMOOTH_BANDS
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
void compute_band_energy(float *bandE, const kiss_fft_cpx *X) {
  int i;
  float sum[NB_BANDS] = {0};
  for (i=0;i<NB_BANDS-1;i++)
  {
    int j;
    int band_size;
    band_size = (eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;
    for (j=0;j<band_size;j++) {
      float tmp;
      float frac = (float)j/band_size;
      tmp = SQUARE(X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].r);
      tmp += SQUARE(X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].i);
      sum[i] += (1-frac)*tmp;
      sum[i+1] += frac*tmp;
    }
  }
  sum[0] *= 2;
  sum[NB_BANDS-1] *= 2;
  for (i=0;i<NB_BANDS;i++)
  {
    bandE[i] = sum[i];
  }
}

Jean-Marc Valin's avatar
Jean-Marc Valin committed
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
void compute_band_corr(float *bandE, const kiss_fft_cpx *X, const kiss_fft_cpx *P) {
  int i;
  float sum[NB_BANDS] = {0};
  for (i=0;i<NB_BANDS-1;i++)
  {
    int j;
    int band_size;
    band_size = (eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;
    for (j=0;j<band_size;j++) {
      float tmp;
      float frac = (float)j/band_size;
      tmp = X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].r * P[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].r;
      tmp += X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].i * P[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].i;
      sum[i] += (1-frac)*tmp;
      sum[i+1] += frac*tmp;
    }
  }
  sum[0] *= 2;
  sum[NB_BANDS-1] *= 2;
  for (i=0;i<NB_BANDS;i++)
  {
    bandE[i] = sum[i];
  }
}

117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
void interp_band_gain(float *g, const float *bandE) {
  int i;
  memset(g, 0, FREQ_SIZE);
  for (i=0;i<NB_BANDS-1;i++)
  {
    int j;
    int band_size;
    band_size = (eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;
    for (j=0;j<band_size;j++) {
      float frac = (float)j/band_size;
      g[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j] = (1-frac)*bandE[i] + frac*bandE[i+1];
    }
  }
}
#else
132
133
134
135
136
void compute_band_energy(float *bandE, const kiss_fft_cpx *X) {
  int i;
  for (i=0;i<NB_BANDS;i++)
  {
    int j;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
137
    opus_val32 sum = 0;
138
139
140
141
    for (j=0;j<(eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;j++) {
      sum += SQUARE(X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].r);
      sum += SQUARE(X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].i);
    }
142
    bandE[i] = sum;
143
144
145
146
147
  }
}

void interp_band_gain(float *g, const float *bandE) {
  int i;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
148
  memset(g, 0, FREQ_SIZE);
149
150
151
152
153
154
155
  for (i=0;i<NB_BANDS;i++)
  {
    int j;
    for (j=0;j<(eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;j++)
      g[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j] = bandE[i];
  }
}
156
157
#endif

158
159
160
161

CommonState common;

static void check_init() {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
162
  int i;
163
164
  if (common.init) return;
  common.kfft = opus_fft_alloc_twiddles(2*FRAME_SIZE, NULL, NULL, NULL, 0);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
165
166
  for (i=0;i<FRAME_SIZE;i++)
    common.half_window[i] = sin(.5*M_PI*sin(.5*M_PI*(i+.5)/FRAME_SIZE) * sin(.5*M_PI*(i+.5)/FRAME_SIZE));
Jean-Marc Valin's avatar
add DCT    
Jean-Marc Valin committed
167
168
169
170
171
172
173
  for (i=0;i<NB_BANDS;i++) {
    int j;
    for (j=0;j<NB_BANDS;j++) {
      common.dct_table[i*NB_BANDS + j] = cos((i+.5)*j*M_PI/NB_BANDS);
      if (j==0) common.dct_table[i*NB_BANDS + j] *= sqrt(.5);
    }
  }
174
175
176
  common.init = 1;
}

Jean-Marc Valin's avatar
add DCT    
Jean-Marc Valin committed
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
static void dct(float *out, const float *in) {
  int i;
  check_init();
  for (i=0;i<NB_BANDS;i++) {
    int j;
    float sum = 0;
    for (j=0;j<NB_BANDS;j++) {
      sum += in[j] * common.dct_table[j*NB_BANDS + i];
    }
    out[i] = sum*sqrt(2./22);
  }
}

#if 0
static void idct(float *out, const float *in) {
  int i;
  check_init();
  for (i=0;i<NB_BANDS;i++) {
    int j;
    float sum = 0;
    for (j=0;j<NB_BANDS;j++) {
      sum += in[j] * common.dct_table[i*NB_BANDS + j];
    }
    out[i] = sum*sqrt(2./22);
  }
}
#endif
204

Jean-Marc Valin's avatar
Jean-Marc Valin committed
205
static void forward_transform(kiss_fft_cpx *out, const float *in) {
206
207
208
209
210
211
212
213
214
215
216
217
218
219
  int i;
  kiss_fft_cpx x[WINDOW_SIZE];
  kiss_fft_cpx y[WINDOW_SIZE];
  check_init();
  for (i=0;i<WINDOW_SIZE;i++) {
    x[i].r = in[i];
    x[i].i = 0;
  }
  opus_fft(common.kfft, x, y, 0);
  for (i=0;i<FREQ_SIZE;i++) {
    out[i] = y[i];
  }
}

Jean-Marc Valin's avatar
Jean-Marc Valin committed
220
static void inverse_transform(float *out, const kiss_fft_cpx *in) {
221
222
223
  int i;
  kiss_fft_cpx x[WINDOW_SIZE];
  kiss_fft_cpx y[WINDOW_SIZE];
224
  check_init();
225
226
227
228
229
230
231
232
233
234
235
236
237
  for (i=0;i<FREQ_SIZE;i++) {
    x[i] = in[i];
  }
  for (;i<WINDOW_SIZE;i++) {
    x[i].r = x[WINDOW_SIZE - i].r;
    x[i].i = -x[WINDOW_SIZE - i].i;
  }
  opus_fft(common.kfft, x, y, 0);
  /* output in reverse order for IFFT. */
  out[0] = WINDOW_SIZE*y[0].r;
  for (i=1;i<WINDOW_SIZE;i++) {
    out[i] = WINDOW_SIZE*y[WINDOW_SIZE - i].r;
  }
238
239
}

Jean-Marc Valin's avatar
Jean-Marc Valin committed
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
static void apply_window(float *x) {
  int i;
  check_init();
  for (i=0;i<FRAME_SIZE;i++) {
    x[i] *= common.half_window[i];
    x[WINDOW_SIZE - 1 - i] *= common.half_window[i];
  }
}

int rnnoise_init(DenoiseState *st) {
  memset(st, 0, sizeof(*st));
  return 0;
}

DenoiseState *rnnoise_create() {
  DenoiseState *st;
  st = malloc(sizeof(DenoiseState));
  rnnoise_init(st);
  return st;
}

Jean-Marc Valin's avatar
Jean-Marc Valin committed
261

Jean-Marc Valin's avatar
Jean-Marc Valin committed
262
static int frame_analysis(DenoiseState *st, kiss_fft_cpx *y, float *Ey, float *features, const float *in) {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
263
264
265
266
267
268
269
  float x[WINDOW_SIZE];
  int i;
  RNN_COPY(x, st->analysis_mem, FRAME_SIZE);
  for (i=0;i<FRAME_SIZE;i++) x[FRAME_SIZE + i] = in[i];
  RNN_COPY(st->analysis_mem, in, FRAME_SIZE);
  apply_window(x);
  forward_transform(y, x);
Jean-Marc Valin's avatar
wip    
Jean-Marc Valin committed
270
  compute_band_energy(Ey, y);
Jean-Marc Valin's avatar
cleanup    
Jean-Marc Valin committed
271
  if (1) {
Jean-Marc Valin's avatar
wip    
Jean-Marc Valin committed
272
273
    float p[WINDOW_SIZE];
    kiss_fft_cpx P[WINDOW_SIZE];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
274
    float Ep[NB_BANDS], Exp[NB_BANDS];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
275
276
277
278
279
280
281
282
283
284
285
286
287
    float pitch_buf[PITCH_BUF_SIZE>>1];
    int pitch_index;
    float gain;
    float *(pre[1]);
    RNN_MOVE(st->pitch_buf, &st->pitch_buf[FRAME_SIZE], PITCH_BUF_SIZE-FRAME_SIZE);
    RNN_COPY(&st->pitch_buf[PITCH_BUF_SIZE-FRAME_SIZE], in, FRAME_SIZE);
    pre[0] = &st->pitch_buf[0];
    pitch_downsample(pre, pitch_buf, PITCH_BUF_SIZE, 1);
    pitch_search(pitch_buf+(PITCH_MAX_PERIOD>>1), pitch_buf, PITCH_FRAME_SIZE,
                 PITCH_MAX_PERIOD-3*PITCH_MIN_PERIOD, &pitch_index);
    pitch_index = PITCH_MAX_PERIOD-pitch_index;

    gain = remove_doubling(pitch_buf, PITCH_MAX_PERIOD, PITCH_MIN_PERIOD,
Jean-Marc Valin's avatar
cleanup    
Jean-Marc Valin committed
288
            PITCH_FRAME_SIZE, &pitch_index, st->last_period, st->last_gain);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
289
290
    st->last_period = pitch_index;
    st->last_gain = gain;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
291
292
293
    for (i=0;i<WINDOW_SIZE;i++)
      p[i] = st->pitch_buf[PITCH_BUF_SIZE-WINDOW_SIZE-pitch_index+i];
    apply_window(p);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
294
295
296
    forward_transform(P, p);
    compute_band_energy(Ep, P);
    compute_band_corr(Exp, y, P);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
297
    for (i=0;i<NB_BANDS;i++) Exp[i] = Exp[i]/sqrt(.001+Ey[i]*Ep[i]);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
298
    if (features) {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
299
300
301
302
303
304
      float tmp[NB_BANDS];
      dct(tmp, Exp);
      for (i=0;i<NB_DELTA_CEPS;i++) features[NB_BANDS+2*NB_DELTA_CEPS+i] = tmp[i];
      features[NB_BANDS+2*NB_DELTA_CEPS] -= 1.3;
      features[NB_BANDS+2*NB_DELTA_CEPS+1] -= 0.9;
      features[NB_BANDS+3*NB_DELTA_CEPS] = .01*(pitch_index-300);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
305
    }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
306
  }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
307
  float E = 0;
Jean-Marc Valin's avatar
wip    
Jean-Marc Valin committed
308
  {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
309
    if (features != NULL) {
310
      float *ceps_0, *ceps_1, *ceps_2;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
311
      float spec_variability = 0;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
312
      float Ly[NB_BANDS];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
313
314
315
316
317
      E = 0;
      for (i=0;i<NB_BANDS;i++) {
        Ly[i] = log10(1e-2+Ey[i]);
        E += Ey[i];
      }
318
      if (!TRAINING && E < 0.04) {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
319
320
321
322
        /* If there's no audio, avoid messing up the state. */
        RNN_CLEAR(features, NB_FEATURES);
        return 1;
      }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
323
      dct(features, Ly);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
324
325
      features[0] -= 12;
      features[1] -= 4;
326
327
328
329
      ceps_0 = st->cepstral_mem[st->memid];
      ceps_1 = (st->memid < 1) ? st->cepstral_mem[CEPS_MEM+st->memid-1] : st->cepstral_mem[st->memid-1];
      ceps_2 = (st->memid < 2) ? st->cepstral_mem[CEPS_MEM+st->memid-2] : st->cepstral_mem[st->memid-2];
      for (i=0;i<NB_BANDS;i++) ceps_0[i] = features[i];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
330
      st->memid++;
331
332
333
334
335
336
      for (i=0;i<NB_DELTA_CEPS;i++) {
        features[i] = ceps_0[i] + ceps_1[i] + ceps_2[i];
        features[NB_BANDS+i] = ceps_0[i] - ceps_2[i];
        features[NB_BANDS+NB_DELTA_CEPS+i] =  ceps_0[i] - 2*ceps_1[i] + ceps_2[i];
      }
      /* Spectral variability features. */
Jean-Marc Valin's avatar
Jean-Marc Valin committed
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
      if (st->memid == CEPS_MEM) st->memid = 0;
      for (i=0;i<CEPS_MEM;i++)
      {
        int j;
        float mindist = 1e15f;
        for (j=0;j<CEPS_MEM;j++)
        {
          int k;
          float dist=0;
          for (k=0;k<NB_BANDS;k++)
          {
            float tmp;
            tmp = st->cepstral_mem[i][k] - st->cepstral_mem[j][k];
            dist += tmp*tmp;
          }
          if (j!=i)
            mindist = MIN32(mindist, dist);
        }
        spec_variability += mindist;
      }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
357
      features[NB_BANDS+3*NB_DELTA_CEPS+1] = spec_variability/CEPS_MEM-2.1;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
358
359
    }
  }
360
  return TRAINING && E < 0.1;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
361
362
363
364
365
}

static void frame_synthesis(DenoiseState *st, float *out, const kiss_fft_cpx *y) {
  float x[WINDOW_SIZE];
  int i;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
366
367
368
369
370
371
  inverse_transform(x, y);
  apply_window(x);
  for (i=0;i<FRAME_SIZE;i++) out[i] = x[i] + st->synthesis_mem[i];
  RNN_COPY(st->synthesis_mem, &x[FRAME_SIZE], FRAME_SIZE);
}

Jean-Marc Valin's avatar
Jean-Marc Valin committed
372
static void biquad(float *y, float mem[2], const float *x, const float *b, const float *a, int N) {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
373
374
375
376
377
378
379
380
381
382
383
  int i;
  for (i=0;i<N;i++) {
    float xi, yi;
    xi = x[i];
    yi = x[i] + mem[0];
    mem[0] = mem[1] + (b[0]*(double)xi - a[0]*(double)yi);
    mem[1] = (b[1]*(double)xi - a[1]*(double)yi);
    y[i] = yi;
  }
}

Jean-Marc Valin's avatar
Jean-Marc Valin committed
384

Jean-Marc Valin's avatar
Jean-Marc Valin committed
385
386
387
388
389
390
391
392
393
void rnnoise_process_frame(DenoiseState *st, float *out, const float *in) {
  int i;
  kiss_fft_cpx Y[FREQ_SIZE];
  float x[FRAME_SIZE];
  float Ey[NB_BANDS];
  float features[NB_FEATURES];
  float g[NB_BANDS];
  float gf[FREQ_SIZE]={1};
  float vad_prob;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
394
  int silence;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
395
396
397
  static const float a_hp[2] = {-1.99599, 0.99600};
  static const float b_hp[2] = {-2, 1};
  biquad(x, st->mem_hp_x, in, b_hp, a_hp, FRAME_SIZE);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
398
  silence = frame_analysis(st, Y, Ey, features, x);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
399

400
  if (!silence) {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
401
402
    compute_rnn(&st->rnn, g, &vad_prob, features);
    interp_band_gain(gf, g);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
403
#if 1
Jean-Marc Valin's avatar
Jean-Marc Valin committed
404
405
406
407
    for (i=0;i<FREQ_SIZE;i++) {
      Y[i].r *= gf[i];
      Y[i].i *= gf[i];
    }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
408
#endif
Jean-Marc Valin's avatar
Jean-Marc Valin committed
409
  }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
410
411
412
413

  frame_synthesis(st, out, Y);
}

414
#if TRAINING
Jean-Marc Valin's avatar
Jean-Marc Valin committed
415

Jean-Marc Valin's avatar
Jean-Marc Valin committed
416
417
418
419
420
421
422
423
424
425
426
static float uni_rand() {
  return rand()/(double)RAND_MAX-.5;
}

static void rand_resp(float *a, float *b) {
  a[0] = .75*uni_rand();
  a[1] = .75*uni_rand();
  b[0] = .75*uni_rand();
  b[1] = .75*uni_rand();
}

Jean-Marc Valin's avatar
Jean-Marc Valin committed
427
int main(int argc, char **argv) {
428
  int i;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
429
  int count=0;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
430
431
  static const float a_hp[2] = {-1.99599, 0.99600};
  static const float b_hp[2] = {-2, 1};
Jean-Marc Valin's avatar
Jean-Marc Valin committed
432
433
434
435
  float a_noise[2] = {0};
  float b_noise[2] = {0};
  float a_sig[2] = {0};
  float b_sig[2] = {0};
Jean-Marc Valin's avatar
Jean-Marc Valin committed
436
437
  float mem_hp_x[2]={0};
  float mem_hp_n[2]={0};
Jean-Marc Valin's avatar
Jean-Marc Valin committed
438
439
  float mem_resp_x[2]={0};
  float mem_resp_n[2]={0};
Jean-Marc Valin's avatar
Jean-Marc Valin committed
440
  float x[FRAME_SIZE];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
441
442
  float n[FRAME_SIZE];
  float xn[FRAME_SIZE];
443
  int vad_cnt=0;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
444
445
  int gain_change_count=0;
  float speech_gain = 1, noise_gain = 1;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
446
  FILE *f1, *f2, *fout;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
447
  DenoiseState *st;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
448
  DenoiseState *noise_state;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
449
  DenoiseState *noisy;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
450
  st = rnnoise_create();
Jean-Marc Valin's avatar
Jean-Marc Valin committed
451
  noise_state = rnnoise_create();
Jean-Marc Valin's avatar
Jean-Marc Valin committed
452
  noisy = rnnoise_create();
Jean-Marc Valin's avatar
cleanup    
Jean-Marc Valin committed
453
454
455
456
  if (argc!=4) {
    fprintf(stderr, "usage: %s <speech> <noise> <output denoised>\n", argv[0]);
    return 1;
  }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
457
458
459
460
461
462
463
464
  f1 = fopen(argv[1], "r");
  f2 = fopen(argv[2], "r");
  fout = fopen(argv[3], "w");
  for(i=0;i<150;i++) {
    short tmp[FRAME_SIZE];
    fread(tmp, sizeof(short), FRAME_SIZE, f2);
  }
  while (1) {
465
466
    kiss_fft_cpx X[FREQ_SIZE], Y[FREQ_SIZE], N[FREQ_SIZE];
    float Ex[NB_BANDS], Ey[NB_BANDS], En[NB_BANDS];
Jean-Marc Valin's avatar
oops    
Jean-Marc Valin committed
467
    float Ln[NB_BANDS];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
468
    float features[NB_FEATURES];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
469
    float g[NB_BANDS];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
470
    float gf[FREQ_SIZE]={1};
Jean-Marc Valin's avatar
Jean-Marc Valin committed
471
    short tmp[FRAME_SIZE];
472
    float vad=0;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
473
    float vad_prob;
474
    float E=0;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
475
    if (++gain_change_count > 101*300) {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
476
      speech_gain = pow(10., (-40+(rand()%60))/20.);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
477
478
479
      noise_gain = pow(10., (-30+(rand()%40))/20.);
      if (rand()%10==0) noise_gain = 0;
      noise_gain *= speech_gain;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
480
      if (rand()%10==0) speech_gain = 0;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
481
      gain_change_count = 0;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
482
483
      rand_resp(a_noise, b_noise);
      rand_resp(a_sig, b_sig);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
484
    }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
485
486
    fread(tmp, sizeof(short), FRAME_SIZE, f1);
    if (feof(f1)) break;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
487
    for (i=0;i<FRAME_SIZE;i++) x[i] = speech_gain*tmp[i];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
488
489
    fread(tmp, sizeof(short), FRAME_SIZE, f2);
    if (feof(f2)) break;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
490
    for (i=0;i<FRAME_SIZE;i++) n[i] = noise_gain*tmp[i];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
491
    biquad(x, mem_hp_x, x, b_hp, a_hp, FRAME_SIZE);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
492
    biquad(x, mem_resp_x, x, b_sig, a_sig, FRAME_SIZE);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
493
    biquad(n, mem_hp_n, n, b_hp, a_hp, FRAME_SIZE);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
494
    biquad(n, mem_resp_n, n, b_noise, a_noise, FRAME_SIZE);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
495
    for (i=0;i<FRAME_SIZE;i++) xn[i] = x[i] + n[i];
496
    for (i=0;i<FRAME_SIZE;i++) E += x[i]*(float)x[i];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
497
    if (E > 1e9f*speech_gain*speech_gain) {
498
      vad_cnt=0;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
499
    } else if (E > 1e8f*speech_gain*speech_gain) {
500
501
502
503
504
505
506
507
508
      vad_cnt -= 5;
      if (vad_cnt < 0) vad_cnt = 0;
    } else {
      vad_cnt++;
      if (vad_cnt > 15) vad_cnt = 15;
    }
    if (vad_cnt >= 10) vad = 0;
    else if (vad_cnt > 0) vad = 0.5f;
    else vad = 1.f;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
509

Jean-Marc Valin's avatar
Jean-Marc Valin committed
510
    frame_analysis(st, X, Ex, NULL, x);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
511
    frame_analysis(noise_state, N, En, NULL, n);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
512
513
    for (i=0;i<NB_BANDS;i++) Ln[i] = log10(1e-2+En[i]);
    int silence = frame_analysis(noisy, Y, Ey, features, xn);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
514
    //printf("%f %d\n", noisy->last_gain, noisy->last_period);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
515
    for (i=0;i<NB_BANDS;i++) {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
516
      g[i] = sqrt((Ex[i]+1e-2)/(Ey[i]+1e-2));
Jean-Marc Valin's avatar
Jean-Marc Valin committed
517
      if (g[i] > 1) g[i] = 1;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
518
      if (silence) g[i] = -1;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
519
    }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
520
521
    count++;
#if 0
Jean-Marc Valin's avatar
Jean-Marc Valin committed
522
    for (i=0;i<NB_FEATURES;i++) printf("%f ", features[i]);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
523
    for (i=0;i<NB_BANDS;i++) printf("%f ", g[i]);
Jean-Marc Valin's avatar
oops    
Jean-Marc Valin committed
524
    for (i=0;i<NB_BANDS;i++) printf("%f ", Ln[i]);
525
    printf("%f\n", vad);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
526
#endif
Jean-Marc Valin's avatar
Jean-Marc Valin committed
527
#if 1
Jean-Marc Valin's avatar
Jean-Marc Valin committed
528
529
530
531
    fwrite(features, sizeof(float), NB_FEATURES, stdout);
    fwrite(g, sizeof(float), NB_BANDS, stdout);
    fwrite(Ln, sizeof(float), NB_BANDS, stdout);
    fwrite(&vad, sizeof(float), 1, stdout);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
532
#endif
Jean-Marc Valin's avatar
Jean-Marc Valin committed
533
#if 0
Jean-Marc Valin's avatar
Jean-Marc Valin committed
534
    compute_rnn(&noisy->rnn, g, &vad_prob, features);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
535
536
    //for (i=0;i<NB_BANDS;i++) scanf("%f", &g[i]);
    interp_band_gain(gf, g);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
537
538
539
540
541
542
543
544
545
546
#if 1
    for (i=0;i<FREQ_SIZE;i++) {
      Y[i].r *= gf[i];
      Y[i].i *= gf[i];
    }
#endif
    frame_synthesis(noisy, xn, Y);

    for (i=0;i<FRAME_SIZE;i++) tmp[i] = xn[i];
    fwrite(tmp, sizeof(short), FRAME_SIZE, fout);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
547
#endif
Jean-Marc Valin's avatar
Jean-Marc Valin committed
548
  }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
549
  fprintf(stderr, "matrix size: %d x %d\n", count, NB_FEATURES + 2*NB_BANDS + 1);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
550
551
552
  fclose(f1);
  fclose(f2);
  fclose(fout);
553
554
  return 0;
}
Jean-Marc Valin's avatar
Jean-Marc Valin committed
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584

#else

int main(int argc, char **argv) {
  int i;
  float x[FRAME_SIZE];
  FILE *f1, *fout;
  DenoiseState *st;
  st = rnnoise_create();
  if (argc!=3) {
    fprintf(stderr, "usage: %s <noisy speech> <output denoised>\n", argv[0]);
    return 1;
  }
  f1 = fopen(argv[1], "r");
  fout = fopen(argv[2], "w");
  while (1) {
    short tmp[FRAME_SIZE];
    fread(tmp, sizeof(short), FRAME_SIZE, f1);
    if (feof(f1)) break;
    for (i=0;i<FRAME_SIZE;i++) x[i] = tmp[i];
    rnnoise_process_frame(st, x, x);
    for (i=0;i<FRAME_SIZE;i++) tmp[i] = x[i];
    fwrite(tmp, sizeof(short), FRAME_SIZE, fout);
  }
  fclose(f1);
  fclose(fout);
  return 0;
}

#endif