denoise.c 15.2 KB
Newer Older
1
2
3
4
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include "kiss_fft.h"
Jean-Marc Valin's avatar
Jean-Marc Valin committed
5
#include "common.h"
Jean-Marc Valin's avatar
add DCT    
Jean-Marc Valin committed
6
#include <math.h>
Jean-Marc Valin's avatar
Jean-Marc Valin committed
7
#include "pitch.h"
Jean-Marc Valin's avatar
Jean-Marc Valin committed
8
9
#include "rnn.h"
#include "rnn_data.h"
10
11
12
13
14
15

#define FRAME_SIZE_SHIFT 2
#define FRAME_SIZE (120<<FRAME_SIZE_SHIFT)
#define WINDOW_SIZE (2*FRAME_SIZE)
#define FREQ_SIZE (FRAME_SIZE + 1)

Jean-Marc Valin's avatar
Jean-Marc Valin committed
16
17
#define PITCH_MIN_PERIOD 60
#define PITCH_MAX_PERIOD 768
Jean-Marc Valin's avatar
Jean-Marc Valin committed
18
19
#define PITCH_FRAME_SIZE 960
#define PITCH_BUF_SIZE (PITCH_MAX_PERIOD+PITCH_FRAME_SIZE)
20
21
22

#define SQUARE(x) ((x)*(x))

Jean-Marc Valin's avatar
add DCT    
Jean-Marc Valin committed
23
24
25
26
27
28
29
30
#define SMOOTH_BANDS 1

#if SMOOTH_BANDS
#define NB_BANDS 22
#else
#define NB_BANDS 21
#endif

Jean-Marc Valin's avatar
Jean-Marc Valin committed
31
#define CEPS_MEM 8
32
33
#define NB_DELTA_CEPS 6

Jean-Marc Valin's avatar
Jean-Marc Valin committed
34
#define NB_FEATURES (NB_BANDS+3*NB_DELTA_CEPS+2)
Jean-Marc Valin's avatar
Jean-Marc Valin committed
35

36
37
38
39

#define TRAINING 0


40
41
42
43
44
45
46
47
48
static const opus_int16 eband5ms[] = {
/*0  200 400 600 800  1k 1.2 1.4 1.6  2k 2.4 2.8 3.2  4k 4.8 5.6 6.8  8k 9.6 12k 15.6 20k*/
  0,  1,  2,  3,  4,  5,  6,  7,  8, 10, 12, 14, 16, 20, 24, 28, 34, 40, 48, 60, 78, 100
};


typedef struct {
  int init;
  kiss_fft_state *kfft;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
49
  float half_window[FRAME_SIZE];
Jean-Marc Valin's avatar
add DCT    
Jean-Marc Valin committed
50
  float dct_table[NB_BANDS*NB_BANDS];
51
52
53
54
} CommonState;

typedef struct {
  float analysis_mem[FRAME_SIZE];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
55
56
  float cepstral_mem[CEPS_MEM][NB_BANDS];
  int memid;
57
  float synthesis_mem[FRAME_SIZE];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
58
  float pitch_buf[PITCH_BUF_SIZE];
Jean-Marc Valin's avatar
wip    
Jean-Marc Valin committed
59
  float pitch_enh_buf[PITCH_BUF_SIZE];
Jean-Marc Valin's avatar
cleanup    
Jean-Marc Valin committed
60
61
  float last_gain;
  int last_period;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
62
  float mem_hp_x[2];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
63
  RNNState rnn;
64
65
} DenoiseState;

Jean-Marc Valin's avatar
add DCT    
Jean-Marc Valin committed
66
#if SMOOTH_BANDS
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
void compute_band_energy(float *bandE, const kiss_fft_cpx *X) {
  int i;
  float sum[NB_BANDS] = {0};
  for (i=0;i<NB_BANDS-1;i++)
  {
    int j;
    int band_size;
    band_size = (eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;
    for (j=0;j<band_size;j++) {
      float tmp;
      float frac = (float)j/band_size;
      tmp = SQUARE(X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].r);
      tmp += SQUARE(X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].i);
      sum[i] += (1-frac)*tmp;
      sum[i+1] += frac*tmp;
    }
  }
  sum[0] *= 2;
  sum[NB_BANDS-1] *= 2;
  for (i=0;i<NB_BANDS;i++)
  {
    bandE[i] = sum[i];
  }
}

Jean-Marc Valin's avatar
Jean-Marc Valin committed
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
void compute_band_corr(float *bandE, const kiss_fft_cpx *X, const kiss_fft_cpx *P) {
  int i;
  float sum[NB_BANDS] = {0};
  for (i=0;i<NB_BANDS-1;i++)
  {
    int j;
    int band_size;
    band_size = (eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;
    for (j=0;j<band_size;j++) {
      float tmp;
      float frac = (float)j/band_size;
      tmp = X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].r * P[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].r;
      tmp += X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].i * P[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].i;
      sum[i] += (1-frac)*tmp;
      sum[i+1] += frac*tmp;
    }
  }
  sum[0] *= 2;
  sum[NB_BANDS-1] *= 2;
  for (i=0;i<NB_BANDS;i++)
  {
    bandE[i] = sum[i];
  }
}

117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
void interp_band_gain(float *g, const float *bandE) {
  int i;
  memset(g, 0, FREQ_SIZE);
  for (i=0;i<NB_BANDS-1;i++)
  {
    int j;
    int band_size;
    band_size = (eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;
    for (j=0;j<band_size;j++) {
      float frac = (float)j/band_size;
      g[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j] = (1-frac)*bandE[i] + frac*bandE[i+1];
    }
  }
}
#else
132
133
134
135
136
void compute_band_energy(float *bandE, const kiss_fft_cpx *X) {
  int i;
  for (i=0;i<NB_BANDS;i++)
  {
    int j;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
137
    opus_val32 sum = 0;
138
139
140
141
    for (j=0;j<(eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;j++) {
      sum += SQUARE(X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].r);
      sum += SQUARE(X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].i);
    }
142
    bandE[i] = sum;
143
144
145
146
147
  }
}

void interp_band_gain(float *g, const float *bandE) {
  int i;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
148
  memset(g, 0, FREQ_SIZE);
149
150
151
152
153
154
155
  for (i=0;i<NB_BANDS;i++)
  {
    int j;
    for (j=0;j<(eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;j++)
      g[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j] = bandE[i];
  }
}
156
157
#endif

158
159
160
161

CommonState common;

static void check_init() {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
162
  int i;
163
164
  if (common.init) return;
  common.kfft = opus_fft_alloc_twiddles(2*FRAME_SIZE, NULL, NULL, NULL, 0);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
165
166
  for (i=0;i<FRAME_SIZE;i++)
    common.half_window[i] = sin(.5*M_PI*sin(.5*M_PI*(i+.5)/FRAME_SIZE) * sin(.5*M_PI*(i+.5)/FRAME_SIZE));
Jean-Marc Valin's avatar
add DCT    
Jean-Marc Valin committed
167
168
169
170
171
172
173
  for (i=0;i<NB_BANDS;i++) {
    int j;
    for (j=0;j<NB_BANDS;j++) {
      common.dct_table[i*NB_BANDS + j] = cos((i+.5)*j*M_PI/NB_BANDS);
      if (j==0) common.dct_table[i*NB_BANDS + j] *= sqrt(.5);
    }
  }
174
175
176
  common.init = 1;
}

Jean-Marc Valin's avatar
add DCT    
Jean-Marc Valin committed
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
static void dct(float *out, const float *in) {
  int i;
  check_init();
  for (i=0;i<NB_BANDS;i++) {
    int j;
    float sum = 0;
    for (j=0;j<NB_BANDS;j++) {
      sum += in[j] * common.dct_table[j*NB_BANDS + i];
    }
    out[i] = sum*sqrt(2./22);
  }
}

#if 0
static void idct(float *out, const float *in) {
  int i;
  check_init();
  for (i=0;i<NB_BANDS;i++) {
    int j;
    float sum = 0;
    for (j=0;j<NB_BANDS;j++) {
      sum += in[j] * common.dct_table[i*NB_BANDS + j];
    }
    out[i] = sum*sqrt(2./22);
  }
}
#endif
204

Jean-Marc Valin's avatar
Jean-Marc Valin committed
205
static void forward_transform(kiss_fft_cpx *out, const float *in) {
206
207
208
209
210
211
212
213
214
215
216
217
218
219
  int i;
  kiss_fft_cpx x[WINDOW_SIZE];
  kiss_fft_cpx y[WINDOW_SIZE];
  check_init();
  for (i=0;i<WINDOW_SIZE;i++) {
    x[i].r = in[i];
    x[i].i = 0;
  }
  opus_fft(common.kfft, x, y, 0);
  for (i=0;i<FREQ_SIZE;i++) {
    out[i] = y[i];
  }
}

Jean-Marc Valin's avatar
Jean-Marc Valin committed
220
static void inverse_transform(float *out, const kiss_fft_cpx *in) {
221
222
223
  int i;
  kiss_fft_cpx x[WINDOW_SIZE];
  kiss_fft_cpx y[WINDOW_SIZE];
224
  check_init();
225
226
227
228
229
230
231
232
233
234
235
236
237
  for (i=0;i<FREQ_SIZE;i++) {
    x[i] = in[i];
  }
  for (;i<WINDOW_SIZE;i++) {
    x[i].r = x[WINDOW_SIZE - i].r;
    x[i].i = -x[WINDOW_SIZE - i].i;
  }
  opus_fft(common.kfft, x, y, 0);
  /* output in reverse order for IFFT. */
  out[0] = WINDOW_SIZE*y[0].r;
  for (i=1;i<WINDOW_SIZE;i++) {
    out[i] = WINDOW_SIZE*y[WINDOW_SIZE - i].r;
  }
238
239
}

Jean-Marc Valin's avatar
Jean-Marc Valin committed
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
static void apply_window(float *x) {
  int i;
  check_init();
  for (i=0;i<FRAME_SIZE;i++) {
    x[i] *= common.half_window[i];
    x[WINDOW_SIZE - 1 - i] *= common.half_window[i];
  }
}

int rnnoise_init(DenoiseState *st) {
  memset(st, 0, sizeof(*st));
  return 0;
}

DenoiseState *rnnoise_create() {
  DenoiseState *st;
  st = malloc(sizeof(DenoiseState));
  rnnoise_init(st);
  return st;
}

261
262
static void frame_analysis(DenoiseState *st, kiss_fft_cpx *X, float *Ex, const float *in) {
  int i;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
263
  float x[WINDOW_SIZE];
264
265
266
267
268
269
270
271
272
  RNN_COPY(x, st->analysis_mem, FRAME_SIZE);
  for (i=0;i<FRAME_SIZE;i++) x[FRAME_SIZE + i] = in[i];
  RNN_COPY(st->analysis_mem, in, FRAME_SIZE);
  apply_window(x);
  forward_transform(X, x);
  compute_band_energy(Ex, X);
}

static int compute_frame_features(DenoiseState *st, kiss_fft_cpx *X, kiss_fft_cpx *P, float *Ex, float *Ep, float *features, const float *in) {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
273
  int i;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
274
  float E = 0;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
275
276
277
  float *ceps_0, *ceps_1, *ceps_2;
  float spec_variability = 0;
  float Ly[NB_BANDS];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
278
  float p[WINDOW_SIZE];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
279
  float Exp[NB_BANDS];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
280
281
282
283
284
  float pitch_buf[PITCH_BUF_SIZE>>1];
  int pitch_index;
  float gain;
  float *(pre[1]);
  float tmp[NB_BANDS];
285
  frame_analysis(st, X, Ex, in);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
286
287
288
289
290
291
292
  RNN_MOVE(st->pitch_buf, &st->pitch_buf[FRAME_SIZE], PITCH_BUF_SIZE-FRAME_SIZE);
  RNN_COPY(&st->pitch_buf[PITCH_BUF_SIZE-FRAME_SIZE], in, FRAME_SIZE);
  pre[0] = &st->pitch_buf[0];
  pitch_downsample(pre, pitch_buf, PITCH_BUF_SIZE, 1);
  pitch_search(pitch_buf+(PITCH_MAX_PERIOD>>1), pitch_buf, PITCH_FRAME_SIZE,
               PITCH_MAX_PERIOD-3*PITCH_MIN_PERIOD, &pitch_index);
  pitch_index = PITCH_MAX_PERIOD-pitch_index;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
293

Jean-Marc Valin's avatar
Jean-Marc Valin committed
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
  gain = remove_doubling(pitch_buf, PITCH_MAX_PERIOD, PITCH_MIN_PERIOD,
          PITCH_FRAME_SIZE, &pitch_index, st->last_period, st->last_gain);
  st->last_period = pitch_index;
  st->last_gain = gain;
  for (i=0;i<WINDOW_SIZE;i++)
    p[i] = st->pitch_buf[PITCH_BUF_SIZE-WINDOW_SIZE-pitch_index+i];
  apply_window(p);
  forward_transform(P, p);
  compute_band_energy(Ep, P);
  compute_band_corr(Exp, X, P);
  for (i=0;i<NB_BANDS;i++) Exp[i] = Exp[i]/sqrt(.001+Ex[i]*Ep[i]);
  dct(tmp, Exp);
  for (i=0;i<NB_DELTA_CEPS;i++) features[NB_BANDS+2*NB_DELTA_CEPS+i] = tmp[i];
  features[NB_BANDS+2*NB_DELTA_CEPS] -= 1.3;
  features[NB_BANDS+2*NB_DELTA_CEPS+1] -= 0.9;
  features[NB_BANDS+3*NB_DELTA_CEPS] = .01*(pitch_index-300);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
  for (i=0;i<NB_BANDS;i++) {
    Ly[i] = log10(1e-2+Ex[i]);
    E += Ex[i];
  }
  if (!TRAINING && E < 0.04) {
    /* If there's no audio, avoid messing up the state. */
    RNN_CLEAR(features, NB_FEATURES);
    return 1;
  }
  dct(features, Ly);
  features[0] -= 12;
  features[1] -= 4;
  ceps_0 = st->cepstral_mem[st->memid];
  ceps_1 = (st->memid < 1) ? st->cepstral_mem[CEPS_MEM+st->memid-1] : st->cepstral_mem[st->memid-1];
  ceps_2 = (st->memid < 2) ? st->cepstral_mem[CEPS_MEM+st->memid-2] : st->cepstral_mem[st->memid-2];
  for (i=0;i<NB_BANDS;i++) ceps_0[i] = features[i];
  st->memid++;
  for (i=0;i<NB_DELTA_CEPS;i++) {
    features[i] = ceps_0[i] + ceps_1[i] + ceps_2[i];
    features[NB_BANDS+i] = ceps_0[i] - ceps_2[i];
    features[NB_BANDS+NB_DELTA_CEPS+i] =  ceps_0[i] - 2*ceps_1[i] + ceps_2[i];
  }
  /* Spectral variability features. */
  if (st->memid == CEPS_MEM) st->memid = 0;
  for (i=0;i<CEPS_MEM;i++)
Jean-Marc Valin's avatar
wip    
Jean-Marc Valin committed
335
  {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
336
337
338
339
340
341
342
    int j;
    float mindist = 1e15f;
    for (j=0;j<CEPS_MEM;j++)
    {
      int k;
      float dist=0;
      for (k=0;k<NB_BANDS;k++)
Jean-Marc Valin's avatar
Jean-Marc Valin committed
343
      {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
344
345
346
        float tmp;
        tmp = st->cepstral_mem[i][k] - st->cepstral_mem[j][k];
        dist += tmp*tmp;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
347
      }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
348
349
      if (j!=i)
        mindist = MIN32(mindist, dist);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
350
    }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
351
    spec_variability += mindist;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
352
  }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
353
  features[NB_BANDS+3*NB_DELTA_CEPS+1] = spec_variability/CEPS_MEM-2.1;
354
  return TRAINING && E < 0.1;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
355
356
357
358
359
}

static void frame_synthesis(DenoiseState *st, float *out, const kiss_fft_cpx *y) {
  float x[WINDOW_SIZE];
  int i;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
360
361
362
363
364
365
  inverse_transform(x, y);
  apply_window(x);
  for (i=0;i<FRAME_SIZE;i++) out[i] = x[i] + st->synthesis_mem[i];
  RNN_COPY(st->synthesis_mem, &x[FRAME_SIZE], FRAME_SIZE);
}

Jean-Marc Valin's avatar
Jean-Marc Valin committed
366
static void biquad(float *y, float mem[2], const float *x, const float *b, const float *a, int N) {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
367
368
369
370
371
372
373
374
375
376
377
  int i;
  for (i=0;i<N;i++) {
    float xi, yi;
    xi = x[i];
    yi = x[i] + mem[0];
    mem[0] = mem[1] + (b[0]*(double)xi - a[0]*(double)yi);
    mem[1] = (b[1]*(double)xi - a[1]*(double)yi);
    y[i] = yi;
  }
}

Jean-Marc Valin's avatar
Jean-Marc Valin committed
378

Jean-Marc Valin's avatar
Jean-Marc Valin committed
379
380
void rnnoise_process_frame(DenoiseState *st, float *out, const float *in) {
  int i;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
381
382
  kiss_fft_cpx X[FREQ_SIZE];
  kiss_fft_cpx P[WINDOW_SIZE];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
383
  float x[FRAME_SIZE];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
384
  float Ex[NB_BANDS], Ep[NB_BANDS];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
385
386
387
388
  float features[NB_FEATURES];
  float g[NB_BANDS];
  float gf[FREQ_SIZE]={1};
  float vad_prob;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
389
  int silence;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
390
391
392
  static const float a_hp[2] = {-1.99599, 0.99600};
  static const float b_hp[2] = {-2, 1};
  biquad(x, st->mem_hp_x, in, b_hp, a_hp, FRAME_SIZE);
393
  silence = compute_frame_features(st, X, P, Ex, Ep, features, x);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
394

395
  if (!silence) {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
396
397
    compute_rnn(&st->rnn, g, &vad_prob, features);
    interp_band_gain(gf, g);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
398
#if 1
Jean-Marc Valin's avatar
Jean-Marc Valin committed
399
    for (i=0;i<FREQ_SIZE;i++) {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
400
401
      X[i].r *= gf[i];
      X[i].i *= gf[i];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
402
    }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
403
#endif
Jean-Marc Valin's avatar
Jean-Marc Valin committed
404
  }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
405

Jean-Marc Valin's avatar
Jean-Marc Valin committed
406
  frame_synthesis(st, out, X);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
407
408
}

409
#if TRAINING
Jean-Marc Valin's avatar
Jean-Marc Valin committed
410

Jean-Marc Valin's avatar
Jean-Marc Valin committed
411
412
413
414
415
416
417
418
419
420
421
static float uni_rand() {
  return rand()/(double)RAND_MAX-.5;
}

static void rand_resp(float *a, float *b) {
  a[0] = .75*uni_rand();
  a[1] = .75*uni_rand();
  b[0] = .75*uni_rand();
  b[1] = .75*uni_rand();
}

Jean-Marc Valin's avatar
Jean-Marc Valin committed
422
int main(int argc, char **argv) {
423
  int i;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
424
  int count=0;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
425
426
  static const float a_hp[2] = {-1.99599, 0.99600};
  static const float b_hp[2] = {-2, 1};
Jean-Marc Valin's avatar
Jean-Marc Valin committed
427
428
429
430
  float a_noise[2] = {0};
  float b_noise[2] = {0};
  float a_sig[2] = {0};
  float b_sig[2] = {0};
Jean-Marc Valin's avatar
Jean-Marc Valin committed
431
432
  float mem_hp_x[2]={0};
  float mem_hp_n[2]={0};
Jean-Marc Valin's avatar
Jean-Marc Valin committed
433
434
  float mem_resp_x[2]={0};
  float mem_resp_n[2]={0};
Jean-Marc Valin's avatar
Jean-Marc Valin committed
435
  float x[FRAME_SIZE];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
436
437
  float n[FRAME_SIZE];
  float xn[FRAME_SIZE];
438
  int vad_cnt=0;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
439
440
  int gain_change_count=0;
  float speech_gain = 1, noise_gain = 1;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
441
  FILE *f1, *f2, *fout;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
442
  DenoiseState *st;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
443
  DenoiseState *noise_state;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
444
  DenoiseState *noisy;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
445
  st = rnnoise_create();
Jean-Marc Valin's avatar
Jean-Marc Valin committed
446
  noise_state = rnnoise_create();
Jean-Marc Valin's avatar
Jean-Marc Valin committed
447
  noisy = rnnoise_create();
Jean-Marc Valin's avatar
cleanup    
Jean-Marc Valin committed
448
449
450
451
  if (argc!=4) {
    fprintf(stderr, "usage: %s <speech> <noise> <output denoised>\n", argv[0]);
    return 1;
  }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
452
453
454
455
456
457
458
459
  f1 = fopen(argv[1], "r");
  f2 = fopen(argv[2], "r");
  fout = fopen(argv[3], "w");
  for(i=0;i<150;i++) {
    short tmp[FRAME_SIZE];
    fread(tmp, sizeof(short), FRAME_SIZE, f2);
  }
  while (1) {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
460
461
    kiss_fft_cpx X[FREQ_SIZE], Y[FREQ_SIZE], N[FREQ_SIZE], P[WINDOW_SIZE];
    float Ex[NB_BANDS], Ey[NB_BANDS], En[NB_BANDS], Ep[NB_BANDS];
Jean-Marc Valin's avatar
oops    
Jean-Marc Valin committed
462
    float Ln[NB_BANDS];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
463
    float features[NB_FEATURES];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
464
    float g[NB_BANDS];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
465
    float gf[FREQ_SIZE]={1};
Jean-Marc Valin's avatar
Jean-Marc Valin committed
466
    short tmp[FRAME_SIZE];
467
    float vad=0;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
468
    float vad_prob;
469
    float E=0;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
470
    if (++gain_change_count > 101*300) {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
471
      speech_gain = pow(10., (-40+(rand()%60))/20.);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
472
473
474
      noise_gain = pow(10., (-30+(rand()%40))/20.);
      if (rand()%10==0) noise_gain = 0;
      noise_gain *= speech_gain;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
475
      if (rand()%10==0) speech_gain = 0;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
476
      gain_change_count = 0;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
477
478
      rand_resp(a_noise, b_noise);
      rand_resp(a_sig, b_sig);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
479
    }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
480
481
    fread(tmp, sizeof(short), FRAME_SIZE, f1);
    if (feof(f1)) break;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
482
    for (i=0;i<FRAME_SIZE;i++) x[i] = speech_gain*tmp[i];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
483
484
    fread(tmp, sizeof(short), FRAME_SIZE, f2);
    if (feof(f2)) break;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
485
    for (i=0;i<FRAME_SIZE;i++) n[i] = noise_gain*tmp[i];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
486
    biquad(x, mem_hp_x, x, b_hp, a_hp, FRAME_SIZE);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
487
    biquad(x, mem_resp_x, x, b_sig, a_sig, FRAME_SIZE);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
488
    biquad(n, mem_hp_n, n, b_hp, a_hp, FRAME_SIZE);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
489
    biquad(n, mem_resp_n, n, b_noise, a_noise, FRAME_SIZE);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
490
    for (i=0;i<FRAME_SIZE;i++) xn[i] = x[i] + n[i];
491
    for (i=0;i<FRAME_SIZE;i++) E += x[i]*(float)x[i];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
492
    if (E > 1e9f*speech_gain*speech_gain) {
493
      vad_cnt=0;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
494
    } else if (E > 1e8f*speech_gain*speech_gain) {
495
496
497
498
499
500
501
502
503
      vad_cnt -= 5;
      if (vad_cnt < 0) vad_cnt = 0;
    } else {
      vad_cnt++;
      if (vad_cnt > 15) vad_cnt = 15;
    }
    if (vad_cnt >= 10) vad = 0;
    else if (vad_cnt > 0) vad = 0.5f;
    else vad = 1.f;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
504

505
506
    frame_analysis(st, X, Ex, x);
    frame_analysis(noise_state, N, En, n);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
507
    for (i=0;i<NB_BANDS;i++) Ln[i] = log10(1e-2+En[i]);
508
    int silence = compute_frame_features(noisy, Y, P, Ey, Ep, features, xn);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
509
    //printf("%f %d\n", noisy->last_gain, noisy->last_period);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
510
    for (i=0;i<NB_BANDS;i++) {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
511
      g[i] = sqrt((Ex[i]+1e-2)/(Ey[i]+1e-2));
Jean-Marc Valin's avatar
Jean-Marc Valin committed
512
      if (g[i] > 1) g[i] = 1;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
513
      if (silence) g[i] = -1;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
514
    }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
515
516
    count++;
#if 0
Jean-Marc Valin's avatar
Jean-Marc Valin committed
517
    for (i=0;i<NB_FEATURES;i++) printf("%f ", features[i]);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
518
    for (i=0;i<NB_BANDS;i++) printf("%f ", g[i]);
Jean-Marc Valin's avatar
oops    
Jean-Marc Valin committed
519
    for (i=0;i<NB_BANDS;i++) printf("%f ", Ln[i]);
520
    printf("%f\n", vad);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
521
#endif
Jean-Marc Valin's avatar
Jean-Marc Valin committed
522
#if 1
Jean-Marc Valin's avatar
Jean-Marc Valin committed
523
524
525
526
    fwrite(features, sizeof(float), NB_FEATURES, stdout);
    fwrite(g, sizeof(float), NB_BANDS, stdout);
    fwrite(Ln, sizeof(float), NB_BANDS, stdout);
    fwrite(&vad, sizeof(float), 1, stdout);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
527
#endif
Jean-Marc Valin's avatar
Jean-Marc Valin committed
528
#if 0
Jean-Marc Valin's avatar
Jean-Marc Valin committed
529
    compute_rnn(&noisy->rnn, g, &vad_prob, features);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
530
531
    //for (i=0;i<NB_BANDS;i++) scanf("%f", &g[i]);
    interp_band_gain(gf, g);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
532
533
534
535
536
537
538
539
540
541
#if 1
    for (i=0;i<FREQ_SIZE;i++) {
      Y[i].r *= gf[i];
      Y[i].i *= gf[i];
    }
#endif
    frame_synthesis(noisy, xn, Y);

    for (i=0;i<FRAME_SIZE;i++) tmp[i] = xn[i];
    fwrite(tmp, sizeof(short), FRAME_SIZE, fout);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
542
#endif
Jean-Marc Valin's avatar
Jean-Marc Valin committed
543
  }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
544
  fprintf(stderr, "matrix size: %d x %d\n", count, NB_FEATURES + 2*NB_BANDS + 1);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
545
546
547
  fclose(f1);
  fclose(f2);
  fclose(fout);
548
549
  return 0;
}
Jean-Marc Valin's avatar
Jean-Marc Valin committed
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579

#else

int main(int argc, char **argv) {
  int i;
  float x[FRAME_SIZE];
  FILE *f1, *fout;
  DenoiseState *st;
  st = rnnoise_create();
  if (argc!=3) {
    fprintf(stderr, "usage: %s <noisy speech> <output denoised>\n", argv[0]);
    return 1;
  }
  f1 = fopen(argv[1], "r");
  fout = fopen(argv[2], "w");
  while (1) {
    short tmp[FRAME_SIZE];
    fread(tmp, sizeof(short), FRAME_SIZE, f1);
    if (feof(f1)) break;
    for (i=0;i<FRAME_SIZE;i++) x[i] = tmp[i];
    rnnoise_process_frame(st, x, x);
    for (i=0;i<FRAME_SIZE;i++) tmp[i] = x[i];
    fwrite(tmp, sizeof(short), FRAME_SIZE, fout);
  }
  fclose(f1);
  fclose(fout);
  return 0;
}

#endif