denoise.c 15.1 KB
Newer Older
1
2
3
4
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include "kiss_fft.h"
Jean-Marc Valin's avatar
Jean-Marc Valin committed
5
#include "common.h"
Jean-Marc Valin's avatar
add DCT    
Jean-Marc Valin committed
6
#include <math.h>
Jean-Marc Valin's avatar
Jean-Marc Valin committed
7
#include "pitch.h"
Jean-Marc Valin's avatar
Jean-Marc Valin committed
8
9
#include "rnn.h"
#include "rnn_data.h"
10
11
12
13
14
15

#define FRAME_SIZE_SHIFT 2
#define FRAME_SIZE (120<<FRAME_SIZE_SHIFT)
#define WINDOW_SIZE (2*FRAME_SIZE)
#define FREQ_SIZE (FRAME_SIZE + 1)

Jean-Marc Valin's avatar
Jean-Marc Valin committed
16
17
#define PITCH_MIN_PERIOD 60
#define PITCH_MAX_PERIOD 768
Jean-Marc Valin's avatar
Jean-Marc Valin committed
18
19
#define PITCH_FRAME_SIZE 960
#define PITCH_BUF_SIZE (PITCH_MAX_PERIOD+PITCH_FRAME_SIZE)
20
21
22

#define SQUARE(x) ((x)*(x))

Jean-Marc Valin's avatar
add DCT    
Jean-Marc Valin committed
23
24
25
26
27
28
29
30
#define SMOOTH_BANDS 1

#if SMOOTH_BANDS
#define NB_BANDS 22
#else
#define NB_BANDS 21
#endif

Jean-Marc Valin's avatar
Jean-Marc Valin committed
31
#define CEPS_MEM 8
32
33
#define NB_DELTA_CEPS 6

Jean-Marc Valin's avatar
Jean-Marc Valin committed
34
#define NB_FEATURES (NB_BANDS+3*NB_DELTA_CEPS+2)
Jean-Marc Valin's avatar
Jean-Marc Valin committed
35

36
37
38
39

#define TRAINING 0


40
41
42
43
44
45
46
47
48
static const opus_int16 eband5ms[] = {
/*0  200 400 600 800  1k 1.2 1.4 1.6  2k 2.4 2.8 3.2  4k 4.8 5.6 6.8  8k 9.6 12k 15.6 20k*/
  0,  1,  2,  3,  4,  5,  6,  7,  8, 10, 12, 14, 16, 20, 24, 28, 34, 40, 48, 60, 78, 100
};


typedef struct {
  int init;
  kiss_fft_state *kfft;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
49
  float half_window[FRAME_SIZE];
Jean-Marc Valin's avatar
add DCT    
Jean-Marc Valin committed
50
  float dct_table[NB_BANDS*NB_BANDS];
51
52
53
54
} CommonState;

typedef struct {
  float analysis_mem[FRAME_SIZE];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
55
56
  float cepstral_mem[CEPS_MEM][NB_BANDS];
  int memid;
57
  float synthesis_mem[FRAME_SIZE];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
58
  float pitch_buf[PITCH_BUF_SIZE];
Jean-Marc Valin's avatar
wip    
Jean-Marc Valin committed
59
  float pitch_enh_buf[PITCH_BUF_SIZE];
Jean-Marc Valin's avatar
cleanup    
Jean-Marc Valin committed
60
61
  float last_gain;
  int last_period;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
62
  float mem_hp_x[2];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
63
  RNNState rnn;
64
65
} DenoiseState;

Jean-Marc Valin's avatar
add DCT    
Jean-Marc Valin committed
66
#if SMOOTH_BANDS
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
void compute_band_energy(float *bandE, const kiss_fft_cpx *X) {
  int i;
  float sum[NB_BANDS] = {0};
  for (i=0;i<NB_BANDS-1;i++)
  {
    int j;
    int band_size;
    band_size = (eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;
    for (j=0;j<band_size;j++) {
      float tmp;
      float frac = (float)j/band_size;
      tmp = SQUARE(X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].r);
      tmp += SQUARE(X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].i);
      sum[i] += (1-frac)*tmp;
      sum[i+1] += frac*tmp;
    }
  }
  sum[0] *= 2;
  sum[NB_BANDS-1] *= 2;
  for (i=0;i<NB_BANDS;i++)
  {
    bandE[i] = sum[i];
  }
}

Jean-Marc Valin's avatar
Jean-Marc Valin committed
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
void compute_band_corr(float *bandE, const kiss_fft_cpx *X, const kiss_fft_cpx *P) {
  int i;
  float sum[NB_BANDS] = {0};
  for (i=0;i<NB_BANDS-1;i++)
  {
    int j;
    int band_size;
    band_size = (eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;
    for (j=0;j<band_size;j++) {
      float tmp;
      float frac = (float)j/band_size;
      tmp = X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].r * P[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].r;
      tmp += X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].i * P[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].i;
      sum[i] += (1-frac)*tmp;
      sum[i+1] += frac*tmp;
    }
  }
  sum[0] *= 2;
  sum[NB_BANDS-1] *= 2;
  for (i=0;i<NB_BANDS;i++)
  {
    bandE[i] = sum[i];
  }
}

117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
void interp_band_gain(float *g, const float *bandE) {
  int i;
  memset(g, 0, FREQ_SIZE);
  for (i=0;i<NB_BANDS-1;i++)
  {
    int j;
    int band_size;
    band_size = (eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;
    for (j=0;j<band_size;j++) {
      float frac = (float)j/band_size;
      g[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j] = (1-frac)*bandE[i] + frac*bandE[i+1];
    }
  }
}
#else
132
133
134
135
136
void compute_band_energy(float *bandE, const kiss_fft_cpx *X) {
  int i;
  for (i=0;i<NB_BANDS;i++)
  {
    int j;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
137
    opus_val32 sum = 0;
138
139
140
141
    for (j=0;j<(eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;j++) {
      sum += SQUARE(X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].r);
      sum += SQUARE(X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].i);
    }
142
    bandE[i] = sum;
143
144
145
146
147
  }
}

void interp_band_gain(float *g, const float *bandE) {
  int i;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
148
  memset(g, 0, FREQ_SIZE);
149
150
151
152
153
154
155
  for (i=0;i<NB_BANDS;i++)
  {
    int j;
    for (j=0;j<(eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;j++)
      g[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j] = bandE[i];
  }
}
156
157
#endif

158
159
160
161

CommonState common;

static void check_init() {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
162
  int i;
163
164
  if (common.init) return;
  common.kfft = opus_fft_alloc_twiddles(2*FRAME_SIZE, NULL, NULL, NULL, 0);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
165
166
  for (i=0;i<FRAME_SIZE;i++)
    common.half_window[i] = sin(.5*M_PI*sin(.5*M_PI*(i+.5)/FRAME_SIZE) * sin(.5*M_PI*(i+.5)/FRAME_SIZE));
Jean-Marc Valin's avatar
add DCT    
Jean-Marc Valin committed
167
168
169
170
171
172
173
  for (i=0;i<NB_BANDS;i++) {
    int j;
    for (j=0;j<NB_BANDS;j++) {
      common.dct_table[i*NB_BANDS + j] = cos((i+.5)*j*M_PI/NB_BANDS);
      if (j==0) common.dct_table[i*NB_BANDS + j] *= sqrt(.5);
    }
  }
174
175
176
  common.init = 1;
}

Jean-Marc Valin's avatar
add DCT    
Jean-Marc Valin committed
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
static void dct(float *out, const float *in) {
  int i;
  check_init();
  for (i=0;i<NB_BANDS;i++) {
    int j;
    float sum = 0;
    for (j=0;j<NB_BANDS;j++) {
      sum += in[j] * common.dct_table[j*NB_BANDS + i];
    }
    out[i] = sum*sqrt(2./22);
  }
}

#if 0
static void idct(float *out, const float *in) {
  int i;
  check_init();
  for (i=0;i<NB_BANDS;i++) {
    int j;
    float sum = 0;
    for (j=0;j<NB_BANDS;j++) {
      sum += in[j] * common.dct_table[i*NB_BANDS + j];
    }
    out[i] = sum*sqrt(2./22);
  }
}
#endif
204

Jean-Marc Valin's avatar
Jean-Marc Valin committed
205
static void forward_transform(kiss_fft_cpx *out, const float *in) {
206
207
208
209
210
211
212
213
214
215
216
217
218
219
  int i;
  kiss_fft_cpx x[WINDOW_SIZE];
  kiss_fft_cpx y[WINDOW_SIZE];
  check_init();
  for (i=0;i<WINDOW_SIZE;i++) {
    x[i].r = in[i];
    x[i].i = 0;
  }
  opus_fft(common.kfft, x, y, 0);
  for (i=0;i<FREQ_SIZE;i++) {
    out[i] = y[i];
  }
}

Jean-Marc Valin's avatar
Jean-Marc Valin committed
220
static void inverse_transform(float *out, const kiss_fft_cpx *in) {
221
222
223
  int i;
  kiss_fft_cpx x[WINDOW_SIZE];
  kiss_fft_cpx y[WINDOW_SIZE];
224
  check_init();
225
226
227
228
229
230
231
232
233
234
235
236
237
  for (i=0;i<FREQ_SIZE;i++) {
    x[i] = in[i];
  }
  for (;i<WINDOW_SIZE;i++) {
    x[i].r = x[WINDOW_SIZE - i].r;
    x[i].i = -x[WINDOW_SIZE - i].i;
  }
  opus_fft(common.kfft, x, y, 0);
  /* output in reverse order for IFFT. */
  out[0] = WINDOW_SIZE*y[0].r;
  for (i=1;i<WINDOW_SIZE;i++) {
    out[i] = WINDOW_SIZE*y[WINDOW_SIZE - i].r;
  }
238
239
}

Jean-Marc Valin's avatar
Jean-Marc Valin committed
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
static void apply_window(float *x) {
  int i;
  check_init();
  for (i=0;i<FRAME_SIZE;i++) {
    x[i] *= common.half_window[i];
    x[WINDOW_SIZE - 1 - i] *= common.half_window[i];
  }
}

int rnnoise_init(DenoiseState *st) {
  memset(st, 0, sizeof(*st));
  return 0;
}

DenoiseState *rnnoise_create() {
  DenoiseState *st;
  st = malloc(sizeof(DenoiseState));
  rnnoise_init(st);
  return st;
}

Jean-Marc Valin's avatar
Jean-Marc Valin committed
261

Jean-Marc Valin's avatar
Jean-Marc Valin committed
262
static int frame_analysis(DenoiseState *st, kiss_fft_cpx *X, kiss_fft_cpx *P, float *Ex, float *Ep, float *features, const float *in) {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
263
264
  float x[WINDOW_SIZE];
  int i;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
265
  float E = 0;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
266
267
268
  float *ceps_0, *ceps_1, *ceps_2;
  float spec_variability = 0;
  float Ly[NB_BANDS];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
269
  float p[WINDOW_SIZE];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
270
  float Exp[NB_BANDS];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
271
272
273
274
275
  float pitch_buf[PITCH_BUF_SIZE>>1];
  int pitch_index;
  float gain;
  float *(pre[1]);
  float tmp[NB_BANDS];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
276
277
278
279
  RNN_COPY(x, st->analysis_mem, FRAME_SIZE);
  for (i=0;i<FRAME_SIZE;i++) x[FRAME_SIZE + i] = in[i];
  RNN_COPY(st->analysis_mem, in, FRAME_SIZE);
  apply_window(x);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
280
281
  forward_transform(X, x);
  compute_band_energy(Ex, X);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
282
  if (features == NULL) return 1;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
283
284
285
286
287
288
289
  RNN_MOVE(st->pitch_buf, &st->pitch_buf[FRAME_SIZE], PITCH_BUF_SIZE-FRAME_SIZE);
  RNN_COPY(&st->pitch_buf[PITCH_BUF_SIZE-FRAME_SIZE], in, FRAME_SIZE);
  pre[0] = &st->pitch_buf[0];
  pitch_downsample(pre, pitch_buf, PITCH_BUF_SIZE, 1);
  pitch_search(pitch_buf+(PITCH_MAX_PERIOD>>1), pitch_buf, PITCH_FRAME_SIZE,
               PITCH_MAX_PERIOD-3*PITCH_MIN_PERIOD, &pitch_index);
  pitch_index = PITCH_MAX_PERIOD-pitch_index;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
290

Jean-Marc Valin's avatar
Jean-Marc Valin committed
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
  gain = remove_doubling(pitch_buf, PITCH_MAX_PERIOD, PITCH_MIN_PERIOD,
          PITCH_FRAME_SIZE, &pitch_index, st->last_period, st->last_gain);
  st->last_period = pitch_index;
  st->last_gain = gain;
  for (i=0;i<WINDOW_SIZE;i++)
    p[i] = st->pitch_buf[PITCH_BUF_SIZE-WINDOW_SIZE-pitch_index+i];
  apply_window(p);
  forward_transform(P, p);
  compute_band_energy(Ep, P);
  compute_band_corr(Exp, X, P);
  for (i=0;i<NB_BANDS;i++) Exp[i] = Exp[i]/sqrt(.001+Ex[i]*Ep[i]);
  dct(tmp, Exp);
  for (i=0;i<NB_DELTA_CEPS;i++) features[NB_BANDS+2*NB_DELTA_CEPS+i] = tmp[i];
  features[NB_BANDS+2*NB_DELTA_CEPS] -= 1.3;
  features[NB_BANDS+2*NB_DELTA_CEPS+1] -= 0.9;
  features[NB_BANDS+3*NB_DELTA_CEPS] = .01*(pitch_index-300);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
  for (i=0;i<NB_BANDS;i++) {
    Ly[i] = log10(1e-2+Ex[i]);
    E += Ex[i];
  }
  if (!TRAINING && E < 0.04) {
    /* If there's no audio, avoid messing up the state. */
    RNN_CLEAR(features, NB_FEATURES);
    return 1;
  }
  dct(features, Ly);
  features[0] -= 12;
  features[1] -= 4;
  ceps_0 = st->cepstral_mem[st->memid];
  ceps_1 = (st->memid < 1) ? st->cepstral_mem[CEPS_MEM+st->memid-1] : st->cepstral_mem[st->memid-1];
  ceps_2 = (st->memid < 2) ? st->cepstral_mem[CEPS_MEM+st->memid-2] : st->cepstral_mem[st->memid-2];
  for (i=0;i<NB_BANDS;i++) ceps_0[i] = features[i];
  st->memid++;
  for (i=0;i<NB_DELTA_CEPS;i++) {
    features[i] = ceps_0[i] + ceps_1[i] + ceps_2[i];
    features[NB_BANDS+i] = ceps_0[i] - ceps_2[i];
    features[NB_BANDS+NB_DELTA_CEPS+i] =  ceps_0[i] - 2*ceps_1[i] + ceps_2[i];
  }
  /* Spectral variability features. */
  if (st->memid == CEPS_MEM) st->memid = 0;
  for (i=0;i<CEPS_MEM;i++)
Jean-Marc Valin's avatar
wip    
Jean-Marc Valin committed
332
  {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
333
334
335
336
337
338
339
    int j;
    float mindist = 1e15f;
    for (j=0;j<CEPS_MEM;j++)
    {
      int k;
      float dist=0;
      for (k=0;k<NB_BANDS;k++)
Jean-Marc Valin's avatar
Jean-Marc Valin committed
340
      {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
341
342
343
        float tmp;
        tmp = st->cepstral_mem[i][k] - st->cepstral_mem[j][k];
        dist += tmp*tmp;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
344
      }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
345
346
      if (j!=i)
        mindist = MIN32(mindist, dist);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
347
    }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
348
    spec_variability += mindist;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
349
  }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
350
  features[NB_BANDS+3*NB_DELTA_CEPS+1] = spec_variability/CEPS_MEM-2.1;
351
  return TRAINING && E < 0.1;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
352
353
354
355
356
}

static void frame_synthesis(DenoiseState *st, float *out, const kiss_fft_cpx *y) {
  float x[WINDOW_SIZE];
  int i;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
357
358
359
360
361
362
  inverse_transform(x, y);
  apply_window(x);
  for (i=0;i<FRAME_SIZE;i++) out[i] = x[i] + st->synthesis_mem[i];
  RNN_COPY(st->synthesis_mem, &x[FRAME_SIZE], FRAME_SIZE);
}

Jean-Marc Valin's avatar
Jean-Marc Valin committed
363
static void biquad(float *y, float mem[2], const float *x, const float *b, const float *a, int N) {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
364
365
366
367
368
369
370
371
372
373
374
  int i;
  for (i=0;i<N;i++) {
    float xi, yi;
    xi = x[i];
    yi = x[i] + mem[0];
    mem[0] = mem[1] + (b[0]*(double)xi - a[0]*(double)yi);
    mem[1] = (b[1]*(double)xi - a[1]*(double)yi);
    y[i] = yi;
  }
}

Jean-Marc Valin's avatar
Jean-Marc Valin committed
375

Jean-Marc Valin's avatar
Jean-Marc Valin committed
376
377
void rnnoise_process_frame(DenoiseState *st, float *out, const float *in) {
  int i;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
378
379
  kiss_fft_cpx X[FREQ_SIZE];
  kiss_fft_cpx P[WINDOW_SIZE];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
380
  float x[FRAME_SIZE];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
381
  float Ex[NB_BANDS], Ep[NB_BANDS];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
382
383
384
385
  float features[NB_FEATURES];
  float g[NB_BANDS];
  float gf[FREQ_SIZE]={1};
  float vad_prob;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
386
  int silence;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
387
388
389
  static const float a_hp[2] = {-1.99599, 0.99600};
  static const float b_hp[2] = {-2, 1};
  biquad(x, st->mem_hp_x, in, b_hp, a_hp, FRAME_SIZE);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
390
  silence = frame_analysis(st, X, P, Ex, Ep, features, x);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
391

392
  if (!silence) {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
393
394
    compute_rnn(&st->rnn, g, &vad_prob, features);
    interp_band_gain(gf, g);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
395
#if 1
Jean-Marc Valin's avatar
Jean-Marc Valin committed
396
    for (i=0;i<FREQ_SIZE;i++) {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
397
398
      X[i].r *= gf[i];
      X[i].i *= gf[i];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
399
    }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
400
#endif
Jean-Marc Valin's avatar
Jean-Marc Valin committed
401
  }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
402

Jean-Marc Valin's avatar
Jean-Marc Valin committed
403
  frame_synthesis(st, out, X);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
404
405
}

406
#if TRAINING
Jean-Marc Valin's avatar
Jean-Marc Valin committed
407

Jean-Marc Valin's avatar
Jean-Marc Valin committed
408
409
410
411
412
413
414
415
416
417
418
static float uni_rand() {
  return rand()/(double)RAND_MAX-.5;
}

static void rand_resp(float *a, float *b) {
  a[0] = .75*uni_rand();
  a[1] = .75*uni_rand();
  b[0] = .75*uni_rand();
  b[1] = .75*uni_rand();
}

Jean-Marc Valin's avatar
Jean-Marc Valin committed
419
int main(int argc, char **argv) {
420
  int i;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
421
  int count=0;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
422
423
  static const float a_hp[2] = {-1.99599, 0.99600};
  static const float b_hp[2] = {-2, 1};
Jean-Marc Valin's avatar
Jean-Marc Valin committed
424
425
426
427
  float a_noise[2] = {0};
  float b_noise[2] = {0};
  float a_sig[2] = {0};
  float b_sig[2] = {0};
Jean-Marc Valin's avatar
Jean-Marc Valin committed
428
429
  float mem_hp_x[2]={0};
  float mem_hp_n[2]={0};
Jean-Marc Valin's avatar
Jean-Marc Valin committed
430
431
  float mem_resp_x[2]={0};
  float mem_resp_n[2]={0};
Jean-Marc Valin's avatar
Jean-Marc Valin committed
432
  float x[FRAME_SIZE];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
433
434
  float n[FRAME_SIZE];
  float xn[FRAME_SIZE];
435
  int vad_cnt=0;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
436
437
  int gain_change_count=0;
  float speech_gain = 1, noise_gain = 1;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
438
  FILE *f1, *f2, *fout;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
439
  DenoiseState *st;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
440
  DenoiseState *noise_state;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
441
  DenoiseState *noisy;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
442
  st = rnnoise_create();
Jean-Marc Valin's avatar
Jean-Marc Valin committed
443
  noise_state = rnnoise_create();
Jean-Marc Valin's avatar
Jean-Marc Valin committed
444
  noisy = rnnoise_create();
Jean-Marc Valin's avatar
cleanup    
Jean-Marc Valin committed
445
446
447
448
  if (argc!=4) {
    fprintf(stderr, "usage: %s <speech> <noise> <output denoised>\n", argv[0]);
    return 1;
  }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
449
450
451
452
453
454
455
456
  f1 = fopen(argv[1], "r");
  f2 = fopen(argv[2], "r");
  fout = fopen(argv[3], "w");
  for(i=0;i<150;i++) {
    short tmp[FRAME_SIZE];
    fread(tmp, sizeof(short), FRAME_SIZE, f2);
  }
  while (1) {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
457
458
    kiss_fft_cpx X[FREQ_SIZE], Y[FREQ_SIZE], N[FREQ_SIZE], P[WINDOW_SIZE];
    float Ex[NB_BANDS], Ey[NB_BANDS], En[NB_BANDS], Ep[NB_BANDS];
Jean-Marc Valin's avatar
oops    
Jean-Marc Valin committed
459
    float Ln[NB_BANDS];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
460
    float features[NB_FEATURES];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
461
    float g[NB_BANDS];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
462
    float gf[FREQ_SIZE]={1};
Jean-Marc Valin's avatar
Jean-Marc Valin committed
463
    short tmp[FRAME_SIZE];
464
    float vad=0;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
465
    float vad_prob;
466
    float E=0;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
467
    if (++gain_change_count > 101*300) {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
468
      speech_gain = pow(10., (-40+(rand()%60))/20.);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
469
470
471
      noise_gain = pow(10., (-30+(rand()%40))/20.);
      if (rand()%10==0) noise_gain = 0;
      noise_gain *= speech_gain;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
472
      if (rand()%10==0) speech_gain = 0;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
473
      gain_change_count = 0;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
474
475
      rand_resp(a_noise, b_noise);
      rand_resp(a_sig, b_sig);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
476
    }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
477
478
    fread(tmp, sizeof(short), FRAME_SIZE, f1);
    if (feof(f1)) break;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
479
    for (i=0;i<FRAME_SIZE;i++) x[i] = speech_gain*tmp[i];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
480
481
    fread(tmp, sizeof(short), FRAME_SIZE, f2);
    if (feof(f2)) break;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
482
    for (i=0;i<FRAME_SIZE;i++) n[i] = noise_gain*tmp[i];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
483
    biquad(x, mem_hp_x, x, b_hp, a_hp, FRAME_SIZE);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
484
    biquad(x, mem_resp_x, x, b_sig, a_sig, FRAME_SIZE);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
485
    biquad(n, mem_hp_n, n, b_hp, a_hp, FRAME_SIZE);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
486
    biquad(n, mem_resp_n, n, b_noise, a_noise, FRAME_SIZE);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
487
    for (i=0;i<FRAME_SIZE;i++) xn[i] = x[i] + n[i];
488
    for (i=0;i<FRAME_SIZE;i++) E += x[i]*(float)x[i];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
489
    if (E > 1e9f*speech_gain*speech_gain) {
490
      vad_cnt=0;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
491
    } else if (E > 1e8f*speech_gain*speech_gain) {
492
493
494
495
496
497
498
499
500
      vad_cnt -= 5;
      if (vad_cnt < 0) vad_cnt = 0;
    } else {
      vad_cnt++;
      if (vad_cnt > 15) vad_cnt = 15;
    }
    if (vad_cnt >= 10) vad = 0;
    else if (vad_cnt > 0) vad = 0.5f;
    else vad = 1.f;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
501

Jean-Marc Valin's avatar
Jean-Marc Valin committed
502
503
    frame_analysis(st, X, NULL, Ex, NULL, NULL, x);
    frame_analysis(noise_state, N, NULL, En, NULL, NULL, n);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
504
    for (i=0;i<NB_BANDS;i++) Ln[i] = log10(1e-2+En[i]);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
505
    int silence = frame_analysis(noisy, Y, P, Ey, Ep, features, xn);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
506
    //printf("%f %d\n", noisy->last_gain, noisy->last_period);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
507
    for (i=0;i<NB_BANDS;i++) {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
508
      g[i] = sqrt((Ex[i]+1e-2)/(Ey[i]+1e-2));
Jean-Marc Valin's avatar
Jean-Marc Valin committed
509
      if (g[i] > 1) g[i] = 1;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
510
      if (silence) g[i] = -1;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
511
    }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
512
513
    count++;
#if 0
Jean-Marc Valin's avatar
Jean-Marc Valin committed
514
    for (i=0;i<NB_FEATURES;i++) printf("%f ", features[i]);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
515
    for (i=0;i<NB_BANDS;i++) printf("%f ", g[i]);
Jean-Marc Valin's avatar
oops    
Jean-Marc Valin committed
516
    for (i=0;i<NB_BANDS;i++) printf("%f ", Ln[i]);
517
    printf("%f\n", vad);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
518
#endif
Jean-Marc Valin's avatar
Jean-Marc Valin committed
519
#if 1
Jean-Marc Valin's avatar
Jean-Marc Valin committed
520
521
522
523
    fwrite(features, sizeof(float), NB_FEATURES, stdout);
    fwrite(g, sizeof(float), NB_BANDS, stdout);
    fwrite(Ln, sizeof(float), NB_BANDS, stdout);
    fwrite(&vad, sizeof(float), 1, stdout);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
524
#endif
Jean-Marc Valin's avatar
Jean-Marc Valin committed
525
#if 0
Jean-Marc Valin's avatar
Jean-Marc Valin committed
526
    compute_rnn(&noisy->rnn, g, &vad_prob, features);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
527
528
    //for (i=0;i<NB_BANDS;i++) scanf("%f", &g[i]);
    interp_band_gain(gf, g);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
529
530
531
532
533
534
535
536
537
538
#if 1
    for (i=0;i<FREQ_SIZE;i++) {
      Y[i].r *= gf[i];
      Y[i].i *= gf[i];
    }
#endif
    frame_synthesis(noisy, xn, Y);

    for (i=0;i<FRAME_SIZE;i++) tmp[i] = xn[i];
    fwrite(tmp, sizeof(short), FRAME_SIZE, fout);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
539
#endif
Jean-Marc Valin's avatar
Jean-Marc Valin committed
540
  }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
541
  fprintf(stderr, "matrix size: %d x %d\n", count, NB_FEATURES + 2*NB_BANDS + 1);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
542
543
544
  fclose(f1);
  fclose(f2);
  fclose(fout);
545
546
  return 0;
}
Jean-Marc Valin's avatar
Jean-Marc Valin committed
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576

#else

int main(int argc, char **argv) {
  int i;
  float x[FRAME_SIZE];
  FILE *f1, *fout;
  DenoiseState *st;
  st = rnnoise_create();
  if (argc!=3) {
    fprintf(stderr, "usage: %s <noisy speech> <output denoised>\n", argv[0]);
    return 1;
  }
  f1 = fopen(argv[1], "r");
  fout = fopen(argv[2], "w");
  while (1) {
    short tmp[FRAME_SIZE];
    fread(tmp, sizeof(short), FRAME_SIZE, f1);
    if (feof(f1)) break;
    for (i=0;i<FRAME_SIZE;i++) x[i] = tmp[i];
    rnnoise_process_frame(st, x, x);
    for (i=0;i<FRAME_SIZE;i++) tmp[i] = x[i];
    fwrite(tmp, sizeof(short), FRAME_SIZE, fout);
  }
  fclose(f1);
  fclose(fout);
  return 0;
}

#endif