denoise.c 18 KB
Newer Older
Jean-Marc Valin's avatar
Jean-Marc Valin committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
/* Copyright (c) 2017 Mozilla */
/*
   Redistribution and use in source and binary forms, with or without
   modification, are permitted provided that the following conditions
   are met:

   - Redistributions of source code must retain the above copyright
   notice, this list of conditions and the following disclaimer.

   - Redistributions in binary form must reproduce the above copyright
   notice, this list of conditions and the following disclaimer in the
   documentation and/or other materials provided with the distribution.

   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

31
32
33
34
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include "kiss_fft.h"
Jean-Marc Valin's avatar
Jean-Marc Valin committed
35
#include "common.h"
Jean-Marc Valin's avatar
add DCT    
Jean-Marc Valin committed
36
#include <math.h>
Jean-Marc Valin's avatar
Jean-Marc Valin committed
37
#include "rnnoise.h"
Jean-Marc Valin's avatar
Jean-Marc Valin committed
38
#include "pitch.h"
Jean-Marc Valin's avatar
Jean-Marc Valin committed
39
#include "arch.h"
Jean-Marc Valin's avatar
Jean-Marc Valin committed
40
41
#include "rnn.h"
#include "rnn_data.h"
42
43
44
45
46
47

#define FRAME_SIZE_SHIFT 2
#define FRAME_SIZE (120<<FRAME_SIZE_SHIFT)
#define WINDOW_SIZE (2*FRAME_SIZE)
#define FREQ_SIZE (FRAME_SIZE + 1)

Jean-Marc Valin's avatar
Jean-Marc Valin committed
48
49
#define PITCH_MIN_PERIOD 60
#define PITCH_MAX_PERIOD 768
Jean-Marc Valin's avatar
Jean-Marc Valin committed
50
51
#define PITCH_FRAME_SIZE 960
#define PITCH_BUF_SIZE (PITCH_MAX_PERIOD+PITCH_FRAME_SIZE)
52
53
54

#define SQUARE(x) ((x)*(x))

Jean-Marc Valin's avatar
add DCT    
Jean-Marc Valin committed
55
56
57
58
59
60
61
62
#define SMOOTH_BANDS 1

#if SMOOTH_BANDS
#define NB_BANDS 22
#else
#define NB_BANDS 21
#endif

Jean-Marc Valin's avatar
Jean-Marc Valin committed
63
#define CEPS_MEM 8
64
65
#define NB_DELTA_CEPS 6

Jean-Marc Valin's avatar
Jean-Marc Valin committed
66
#define NB_FEATURES (NB_BANDS+3*NB_DELTA_CEPS+2)
Jean-Marc Valin's avatar
Jean-Marc Valin committed
67

68

Jean-Marc Valin's avatar
Jean-Marc Valin committed
69
#ifndef TRAINING
70
#define TRAINING 0
Jean-Marc Valin's avatar
Jean-Marc Valin committed
71
#endif
72

73
74
75
76
77
78
79
80
81
static const opus_int16 eband5ms[] = {
/*0  200 400 600 800  1k 1.2 1.4 1.6  2k 2.4 2.8 3.2  4k 4.8 5.6 6.8  8k 9.6 12k 15.6 20k*/
  0,  1,  2,  3,  4,  5,  6,  7,  8, 10, 12, 14, 16, 20, 24, 28, 34, 40, 48, 60, 78, 100
};


typedef struct {
  int init;
  kiss_fft_state *kfft;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
82
  float half_window[FRAME_SIZE];
Jean-Marc Valin's avatar
add DCT    
Jean-Marc Valin committed
83
  float dct_table[NB_BANDS*NB_BANDS];
84
85
} CommonState;

Jean-Marc Valin's avatar
Jean-Marc Valin committed
86
struct DenoiseState {
87
  float analysis_mem[FRAME_SIZE];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
88
89
  float cepstral_mem[CEPS_MEM][NB_BANDS];
  int memid;
90
  float synthesis_mem[FRAME_SIZE];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
91
  float pitch_buf[PITCH_BUF_SIZE];
Jean-Marc Valin's avatar
wip    
Jean-Marc Valin committed
92
  float pitch_enh_buf[PITCH_BUF_SIZE];
Jean-Marc Valin's avatar
cleanup    
Jean-Marc Valin committed
93
94
  float last_gain;
  int last_period;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
95
  float mem_hp_x[2];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
96
  RNNState rnn;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
97
};
98

Jean-Marc Valin's avatar
add DCT    
Jean-Marc Valin committed
99
#if SMOOTH_BANDS
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
void compute_band_energy(float *bandE, const kiss_fft_cpx *X) {
  int i;
  float sum[NB_BANDS] = {0};
  for (i=0;i<NB_BANDS-1;i++)
  {
    int j;
    int band_size;
    band_size = (eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;
    for (j=0;j<band_size;j++) {
      float tmp;
      float frac = (float)j/band_size;
      tmp = SQUARE(X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].r);
      tmp += SQUARE(X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].i);
      sum[i] += (1-frac)*tmp;
      sum[i+1] += frac*tmp;
    }
  }
  sum[0] *= 2;
  sum[NB_BANDS-1] *= 2;
  for (i=0;i<NB_BANDS;i++)
  {
    bandE[i] = sum[i];
  }
}

Jean-Marc Valin's avatar
Jean-Marc Valin committed
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
void compute_band_corr(float *bandE, const kiss_fft_cpx *X, const kiss_fft_cpx *P) {
  int i;
  float sum[NB_BANDS] = {0};
  for (i=0;i<NB_BANDS-1;i++)
  {
    int j;
    int band_size;
    band_size = (eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;
    for (j=0;j<band_size;j++) {
      float tmp;
      float frac = (float)j/band_size;
      tmp = X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].r * P[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].r;
      tmp += X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].i * P[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].i;
      sum[i] += (1-frac)*tmp;
      sum[i+1] += frac*tmp;
    }
  }
  sum[0] *= 2;
  sum[NB_BANDS-1] *= 2;
  for (i=0;i<NB_BANDS;i++)
  {
    bandE[i] = sum[i];
  }
}

150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
void interp_band_gain(float *g, const float *bandE) {
  int i;
  memset(g, 0, FREQ_SIZE);
  for (i=0;i<NB_BANDS-1;i++)
  {
    int j;
    int band_size;
    band_size = (eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;
    for (j=0;j<band_size;j++) {
      float frac = (float)j/band_size;
      g[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j] = (1-frac)*bandE[i] + frac*bandE[i+1];
    }
  }
}
#else
165
166
167
168
169
void compute_band_energy(float *bandE, const kiss_fft_cpx *X) {
  int i;
  for (i=0;i<NB_BANDS;i++)
  {
    int j;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
170
    opus_val32 sum = 0;
171
172
173
174
    for (j=0;j<(eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;j++) {
      sum += SQUARE(X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].r);
      sum += SQUARE(X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].i);
    }
175
    bandE[i] = sum;
176
177
178
179
180
  }
}

void interp_band_gain(float *g, const float *bandE) {
  int i;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
181
  memset(g, 0, FREQ_SIZE);
182
183
184
185
186
187
188
  for (i=0;i<NB_BANDS;i++)
  {
    int j;
    for (j=0;j<(eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;j++)
      g[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j] = bandE[i];
  }
}
189
190
#endif

191
192
193
194

CommonState common;

static void check_init() {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
195
  int i;
196
197
  if (common.init) return;
  common.kfft = opus_fft_alloc_twiddles(2*FRAME_SIZE, NULL, NULL, NULL, 0);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
198
199
  for (i=0;i<FRAME_SIZE;i++)
    common.half_window[i] = sin(.5*M_PI*sin(.5*M_PI*(i+.5)/FRAME_SIZE) * sin(.5*M_PI*(i+.5)/FRAME_SIZE));
Jean-Marc Valin's avatar
add DCT    
Jean-Marc Valin committed
200
201
202
203
204
205
206
  for (i=0;i<NB_BANDS;i++) {
    int j;
    for (j=0;j<NB_BANDS;j++) {
      common.dct_table[i*NB_BANDS + j] = cos((i+.5)*j*M_PI/NB_BANDS);
      if (j==0) common.dct_table[i*NB_BANDS + j] *= sqrt(.5);
    }
  }
207
208
209
  common.init = 1;
}

Jean-Marc Valin's avatar
add DCT    
Jean-Marc Valin committed
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
static void dct(float *out, const float *in) {
  int i;
  check_init();
  for (i=0;i<NB_BANDS;i++) {
    int j;
    float sum = 0;
    for (j=0;j<NB_BANDS;j++) {
      sum += in[j] * common.dct_table[j*NB_BANDS + i];
    }
    out[i] = sum*sqrt(2./22);
  }
}

#if 0
static void idct(float *out, const float *in) {
  int i;
  check_init();
  for (i=0;i<NB_BANDS;i++) {
    int j;
    float sum = 0;
    for (j=0;j<NB_BANDS;j++) {
      sum += in[j] * common.dct_table[i*NB_BANDS + j];
    }
    out[i] = sum*sqrt(2./22);
  }
}
#endif
237

Jean-Marc Valin's avatar
Jean-Marc Valin committed
238
static void forward_transform(kiss_fft_cpx *out, const float *in) {
239
240
241
242
243
244
245
246
247
248
249
250
251
252
  int i;
  kiss_fft_cpx x[WINDOW_SIZE];
  kiss_fft_cpx y[WINDOW_SIZE];
  check_init();
  for (i=0;i<WINDOW_SIZE;i++) {
    x[i].r = in[i];
    x[i].i = 0;
  }
  opus_fft(common.kfft, x, y, 0);
  for (i=0;i<FREQ_SIZE;i++) {
    out[i] = y[i];
  }
}

Jean-Marc Valin's avatar
Jean-Marc Valin committed
253
static void inverse_transform(float *out, const kiss_fft_cpx *in) {
254
255
256
  int i;
  kiss_fft_cpx x[WINDOW_SIZE];
  kiss_fft_cpx y[WINDOW_SIZE];
257
  check_init();
258
259
260
261
262
263
264
265
266
267
268
269
270
  for (i=0;i<FREQ_SIZE;i++) {
    x[i] = in[i];
  }
  for (;i<WINDOW_SIZE;i++) {
    x[i].r = x[WINDOW_SIZE - i].r;
    x[i].i = -x[WINDOW_SIZE - i].i;
  }
  opus_fft(common.kfft, x, y, 0);
  /* output in reverse order for IFFT. */
  out[0] = WINDOW_SIZE*y[0].r;
  for (i=1;i<WINDOW_SIZE;i++) {
    out[i] = WINDOW_SIZE*y[WINDOW_SIZE - i].r;
  }
271
272
}

Jean-Marc Valin's avatar
Jean-Marc Valin committed
273
274
275
276
277
278
279
280
281
static void apply_window(float *x) {
  int i;
  check_init();
  for (i=0;i<FRAME_SIZE;i++) {
    x[i] *= common.half_window[i];
    x[WINDOW_SIZE - 1 - i] *= common.half_window[i];
  }
}

Jean-Marc Valin's avatar
Jean-Marc Valin committed
282
283
284
285
int rnnoise_get_size() {
  return sizeof(DenoiseState);
}

Jean-Marc Valin's avatar
Jean-Marc Valin committed
286
287
288
289
290
291
292
int rnnoise_init(DenoiseState *st) {
  memset(st, 0, sizeof(*st));
  return 0;
}

DenoiseState *rnnoise_create() {
  DenoiseState *st;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
293
  st = malloc(rnnoise_get_size());
Jean-Marc Valin's avatar
Jean-Marc Valin committed
294
295
296
297
  rnnoise_init(st);
  return st;
}

Jean-Marc Valin's avatar
Jean-Marc Valin committed
298
299
300
301
void rnnoise_destroy(DenoiseState *st) {
  free(st);
}

Jean-Marc Valin's avatar
Jean-Marc Valin committed
302
303
#if TRAINING
int lowpass = FREQ_SIZE;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
304
int band_lp = NB_BANDS;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
305
306
#endif

307
308
static void frame_analysis(DenoiseState *st, kiss_fft_cpx *X, float *Ex, const float *in) {
  int i;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
309
  float x[WINDOW_SIZE];
310
311
312
313
314
  RNN_COPY(x, st->analysis_mem, FRAME_SIZE);
  for (i=0;i<FRAME_SIZE;i++) x[FRAME_SIZE + i] = in[i];
  RNN_COPY(st->analysis_mem, in, FRAME_SIZE);
  apply_window(x);
  forward_transform(X, x);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
315
316
317
318
#if TRAINING
  for (i=lowpass;i<FREQ_SIZE;i++)
    X[i].r = X[i].i = 0;
#endif
319
320
321
  compute_band_energy(Ex, X);
}

Jean-Marc Valin's avatar
Jean-Marc Valin committed
322
323
static int compute_frame_features(DenoiseState *st, kiss_fft_cpx *X, kiss_fft_cpx *P,
                                  float *Ex, float *Ep, float *Exp, float *features, const float *in) {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
324
  int i;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
325
  float E = 0;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
326
327
328
  float *ceps_0, *ceps_1, *ceps_2;
  float spec_variability = 0;
  float Ly[NB_BANDS];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
329
330
331
332
333
334
  float p[WINDOW_SIZE];
  float pitch_buf[PITCH_BUF_SIZE>>1];
  int pitch_index;
  float gain;
  float *(pre[1]);
  float tmp[NB_BANDS];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
335
  float follow, logMax;
336
  frame_analysis(st, X, Ex, in);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
337
338
339
340
341
342
343
  RNN_MOVE(st->pitch_buf, &st->pitch_buf[FRAME_SIZE], PITCH_BUF_SIZE-FRAME_SIZE);
  RNN_COPY(&st->pitch_buf[PITCH_BUF_SIZE-FRAME_SIZE], in, FRAME_SIZE);
  pre[0] = &st->pitch_buf[0];
  pitch_downsample(pre, pitch_buf, PITCH_BUF_SIZE, 1);
  pitch_search(pitch_buf+(PITCH_MAX_PERIOD>>1), pitch_buf, PITCH_FRAME_SIZE,
               PITCH_MAX_PERIOD-3*PITCH_MIN_PERIOD, &pitch_index);
  pitch_index = PITCH_MAX_PERIOD-pitch_index;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
344

Jean-Marc Valin's avatar
Jean-Marc Valin committed
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
  gain = remove_doubling(pitch_buf, PITCH_MAX_PERIOD, PITCH_MIN_PERIOD,
          PITCH_FRAME_SIZE, &pitch_index, st->last_period, st->last_gain);
  st->last_period = pitch_index;
  st->last_gain = gain;
  for (i=0;i<WINDOW_SIZE;i++)
    p[i] = st->pitch_buf[PITCH_BUF_SIZE-WINDOW_SIZE-pitch_index+i];
  apply_window(p);
  forward_transform(P, p);
  compute_band_energy(Ep, P);
  compute_band_corr(Exp, X, P);
  for (i=0;i<NB_BANDS;i++) Exp[i] = Exp[i]/sqrt(.001+Ex[i]*Ep[i]);
  dct(tmp, Exp);
  for (i=0;i<NB_DELTA_CEPS;i++) features[NB_BANDS+2*NB_DELTA_CEPS+i] = tmp[i];
  features[NB_BANDS+2*NB_DELTA_CEPS] -= 1.3;
  features[NB_BANDS+2*NB_DELTA_CEPS+1] -= 0.9;
  features[NB_BANDS+3*NB_DELTA_CEPS] = .01*(pitch_index-300);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
361
362
  logMax = -2;
  follow = -2;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
363
364
  for (i=0;i<NB_BANDS;i++) {
    Ly[i] = log10(1e-2+Ex[i]);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
365
366
367
    Ly[i] = MAX16(logMax-6, MAX16(follow-1.2, Ly[i]));
    logMax = MAX16(logMax, Ly[i]);
    follow = MAX16(follow-1, Ly[i]);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
    E += Ex[i];
  }
  if (!TRAINING && E < 0.04) {
    /* If there's no audio, avoid messing up the state. */
    RNN_CLEAR(features, NB_FEATURES);
    return 1;
  }
  dct(features, Ly);
  features[0] -= 12;
  features[1] -= 4;
  ceps_0 = st->cepstral_mem[st->memid];
  ceps_1 = (st->memid < 1) ? st->cepstral_mem[CEPS_MEM+st->memid-1] : st->cepstral_mem[st->memid-1];
  ceps_2 = (st->memid < 2) ? st->cepstral_mem[CEPS_MEM+st->memid-2] : st->cepstral_mem[st->memid-2];
  for (i=0;i<NB_BANDS;i++) ceps_0[i] = features[i];
  st->memid++;
  for (i=0;i<NB_DELTA_CEPS;i++) {
    features[i] = ceps_0[i] + ceps_1[i] + ceps_2[i];
    features[NB_BANDS+i] = ceps_0[i] - ceps_2[i];
    features[NB_BANDS+NB_DELTA_CEPS+i] =  ceps_0[i] - 2*ceps_1[i] + ceps_2[i];
  }
  /* Spectral variability features. */
  if (st->memid == CEPS_MEM) st->memid = 0;
  for (i=0;i<CEPS_MEM;i++)
Jean-Marc Valin's avatar
wip    
Jean-Marc Valin committed
391
  {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
392
393
394
395
396
397
398
    int j;
    float mindist = 1e15f;
    for (j=0;j<CEPS_MEM;j++)
    {
      int k;
      float dist=0;
      for (k=0;k<NB_BANDS;k++)
Jean-Marc Valin's avatar
Jean-Marc Valin committed
399
      {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
400
401
402
        float tmp;
        tmp = st->cepstral_mem[i][k] - st->cepstral_mem[j][k];
        dist += tmp*tmp;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
403
      }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
404
405
      if (j!=i)
        mindist = MIN32(mindist, dist);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
406
    }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
407
    spec_variability += mindist;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
408
  }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
409
  features[NB_BANDS+3*NB_DELTA_CEPS+1] = spec_variability/CEPS_MEM-2.1;
410
  return TRAINING && E < 0.1;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
411
412
413
414
415
}

static void frame_synthesis(DenoiseState *st, float *out, const kiss_fft_cpx *y) {
  float x[WINDOW_SIZE];
  int i;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
416
417
418
419
420
421
  inverse_transform(x, y);
  apply_window(x);
  for (i=0;i<FRAME_SIZE;i++) out[i] = x[i] + st->synthesis_mem[i];
  RNN_COPY(st->synthesis_mem, &x[FRAME_SIZE], FRAME_SIZE);
}

Jean-Marc Valin's avatar
Jean-Marc Valin committed
422
static void biquad(float *y, float mem[2], const float *x, const float *b, const float *a, int N) {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
423
424
425
426
427
428
429
430
431
432
433
  int i;
  for (i=0;i<N;i++) {
    float xi, yi;
    xi = x[i];
    yi = x[i] + mem[0];
    mem[0] = mem[1] + (b[0]*(double)xi - a[0]*(double)yi);
    mem[1] = (b[1]*(double)xi - a[1]*(double)yi);
    y[i] = yi;
  }
}

Jean-Marc Valin's avatar
Jean-Marc Valin committed
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
void pitch_filter(kiss_fft_cpx *X, const kiss_fft_cpx *P, const float *Ex, const float *Ep,
                  const float *Exp, const float *g) {
  int i;
  float r[NB_BANDS];
  float rf[FREQ_SIZE] = {0};
  for (i=0;i<NB_BANDS;i++) {
#if 0
    if (Exp[i]>g[i]) r[i] = 1;
    else r[i] = Exp[i]*(1-g[i])/(.001 + g[i]*(1-Exp[i]));
    r[i] = MIN16(1, MAX16(0, r[i]));
#else
    if (Exp[i]>g[i]) r[i] = 1;
    else r[i] = SQUARE(Exp[i])*(1-SQUARE(g[i]))/(.001 + SQUARE(g[i])*(1-SQUARE(Exp[i])));
    r[i] = sqrt(MIN16(1, MAX16(0, r[i])));
#endif
    r[i] *= sqrt(Ex[i]/(1e-8+Ep[i]));
  }
  interp_band_gain(rf, r);
  for (i=0;i<FREQ_SIZE;i++) {
    X[i].r += rf[i]*P[i].r;
    X[i].i += rf[i]*P[i].i;
  }
  float newE[NB_BANDS];
  compute_band_energy(newE, X);
  float norm[NB_BANDS];
  float normf[FREQ_SIZE]={0};
  for (i=0;i<NB_BANDS;i++) {
    norm[i] = sqrt(Ex[i]/(1e-8+newE[i]));
  }
  interp_band_gain(normf, norm);
  for (i=0;i<FREQ_SIZE;i++) {
    X[i].r *= normf[i];
    X[i].i *= normf[i];
  }
}
Jean-Marc Valin's avatar
Jean-Marc Valin committed
469

Jean-Marc Valin's avatar
Jean-Marc Valin committed
470
float rnnoise_process_frame(DenoiseState *st, float *out, const float *in) {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
471
  int i;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
472
473
  kiss_fft_cpx X[FREQ_SIZE];
  kiss_fft_cpx P[WINDOW_SIZE];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
474
  float x[FRAME_SIZE];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
475
  float Ex[NB_BANDS], Ep[NB_BANDS];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
476
  float Exp[NB_BANDS];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
477
478
479
  float features[NB_FEATURES];
  float g[NB_BANDS];
  float gf[FREQ_SIZE]={1};
Jean-Marc Valin's avatar
Jean-Marc Valin committed
480
  float vad_prob = 0;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
481
  int silence;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
482
483
484
  static const float a_hp[2] = {-1.99599, 0.99600};
  static const float b_hp[2] = {-2, 1};
  biquad(x, st->mem_hp_x, in, b_hp, a_hp, FRAME_SIZE);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
485
  silence = compute_frame_features(st, X, P, Ex, Ep, Exp, features, x);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
486

487
  if (!silence) {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
488
    compute_rnn(&st->rnn, g, &vad_prob, features);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
489
    pitch_filter(X, P, Ex, Ep, Exp, g);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
490
    interp_band_gain(gf, g);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
491
#if 1
Jean-Marc Valin's avatar
Jean-Marc Valin committed
492
    for (i=0;i<FREQ_SIZE;i++) {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
493
494
      X[i].r *= gf[i];
      X[i].i *= gf[i];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
495
    }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
496
#endif
Jean-Marc Valin's avatar
Jean-Marc Valin committed
497
  }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
498

Jean-Marc Valin's avatar
Jean-Marc Valin committed
499
  frame_synthesis(st, out, X);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
500
  return vad_prob;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
501
502
}

503
#if TRAINING
Jean-Marc Valin's avatar
Jean-Marc Valin committed
504

Jean-Marc Valin's avatar
Jean-Marc Valin committed
505
506
507
508
509
510
511
512
513
514
515
static float uni_rand() {
  return rand()/(double)RAND_MAX-.5;
}

static void rand_resp(float *a, float *b) {
  a[0] = .75*uni_rand();
  a[1] = .75*uni_rand();
  b[0] = .75*uni_rand();
  b[1] = .75*uni_rand();
}

Jean-Marc Valin's avatar
Jean-Marc Valin committed
516
int main(int argc, char **argv) {
517
  int i;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
518
  int count=0;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
519
520
  static const float a_hp[2] = {-1.99599, 0.99600};
  static const float b_hp[2] = {-2, 1};
Jean-Marc Valin's avatar
Jean-Marc Valin committed
521
522
523
524
  float a_noise[2] = {0};
  float b_noise[2] = {0};
  float a_sig[2] = {0};
  float b_sig[2] = {0};
Jean-Marc Valin's avatar
Jean-Marc Valin committed
525
526
  float mem_hp_x[2]={0};
  float mem_hp_n[2]={0};
Jean-Marc Valin's avatar
Jean-Marc Valin committed
527
528
  float mem_resp_x[2]={0};
  float mem_resp_n[2]={0};
Jean-Marc Valin's avatar
Jean-Marc Valin committed
529
  float x[FRAME_SIZE];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
530
531
  float n[FRAME_SIZE];
  float xn[FRAME_SIZE];
532
  int vad_cnt=0;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
533
534
  int gain_change_count=0;
  float speech_gain = 1, noise_gain = 1;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
535
  FILE *f1, *f2, *fout;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
536
  DenoiseState *st;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
537
  DenoiseState *noise_state;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
538
  DenoiseState *noisy;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
539
  st = rnnoise_create();
Jean-Marc Valin's avatar
Jean-Marc Valin committed
540
  noise_state = rnnoise_create();
Jean-Marc Valin's avatar
Jean-Marc Valin committed
541
  noisy = rnnoise_create();
Jean-Marc Valin's avatar
cleanup    
Jean-Marc Valin committed
542
543
544
545
  if (argc!=4) {
    fprintf(stderr, "usage: %s <speech> <noise> <output denoised>\n", argv[0]);
    return 1;
  }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
546
547
548
549
550
551
552
553
  f1 = fopen(argv[1], "r");
  f2 = fopen(argv[2], "r");
  fout = fopen(argv[3], "w");
  for(i=0;i<150;i++) {
    short tmp[FRAME_SIZE];
    fread(tmp, sizeof(short), FRAME_SIZE, f2);
  }
  while (1) {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
554
555
    kiss_fft_cpx X[FREQ_SIZE], Y[FREQ_SIZE], N[FREQ_SIZE], P[WINDOW_SIZE];
    float Ex[NB_BANDS], Ey[NB_BANDS], En[NB_BANDS], Ep[NB_BANDS];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
556
    float Exp[NB_BANDS];
Jean-Marc Valin's avatar
oops    
Jean-Marc Valin committed
557
    float Ln[NB_BANDS];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
558
    float features[NB_FEATURES];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
559
    float g[NB_BANDS];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
560
    float gf[FREQ_SIZE]={1};
Jean-Marc Valin's avatar
Jean-Marc Valin committed
561
    short tmp[FRAME_SIZE];
562
    float vad=0;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
563
    float vad_prob;
564
    float E=0;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
565
    if (++gain_change_count > 101*300) {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
566
      speech_gain = pow(10., (-40+(rand()%60))/20.);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
567
      noise_gain = pow(10., (-30+(rand()%50))/20.);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
568
569
      if (rand()%10==0) noise_gain = 0;
      noise_gain *= speech_gain;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
570
      if (rand()%4==0) speech_gain = 0;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
571
      gain_change_count = 0;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
572
573
      rand_resp(a_noise, b_noise);
      rand_resp(a_sig, b_sig);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
574
      lowpass = FREQ_SIZE * 3000./24000. * pow(10., rand()/(double)RAND_MAX);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
575
576
577
578
579
580
      for (i=0;i<NB_BANDS;i++) {
        if (eband5ms[i]<<FRAME_SIZE_SHIFT > lowpass) {
          band_lp = i;
          break;
        }
      }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
581
    }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
    if (speech_gain != 0) {
      fread(tmp, sizeof(short), FRAME_SIZE, f1);
      if (feof(f1)) break;
      for (i=0;i<FRAME_SIZE;i++) x[i] = speech_gain*tmp[i];
      for (i=0;i<FRAME_SIZE;i++) E += tmp[i]*(float)tmp[i];
    } else {
      for (i=0;i<FRAME_SIZE;i++) x[i] = 0;
      E = 0;
    }
    if (noise_gain!=0) {
      fread(tmp, sizeof(short), FRAME_SIZE, f2);
      if (feof(f2)) break;
      for (i=0;i<FRAME_SIZE;i++) n[i] = noise_gain*tmp[i];
    } else {
      for (i=0;i<FRAME_SIZE;i++) n[i] = 0;
    }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
598
    biquad(x, mem_hp_x, x, b_hp, a_hp, FRAME_SIZE);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
599
    biquad(x, mem_resp_x, x, b_sig, a_sig, FRAME_SIZE);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
600
    biquad(n, mem_hp_n, n, b_hp, a_hp, FRAME_SIZE);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
601
    biquad(n, mem_resp_n, n, b_noise, a_noise, FRAME_SIZE);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
602
    for (i=0;i<FRAME_SIZE;i++) xn[i] = x[i] + n[i];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
603
    if (E > 1e9f) {
604
      vad_cnt=0;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
605
    } else if (E > 1e8f) {
606
607
608
609
610
611
612
613
614
      vad_cnt -= 5;
      if (vad_cnt < 0) vad_cnt = 0;
    } else {
      vad_cnt++;
      if (vad_cnt > 15) vad_cnt = 15;
    }
    if (vad_cnt >= 10) vad = 0;
    else if (vad_cnt > 0) vad = 0.5f;
    else vad = 1.f;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
615

Jean-Marc Valin's avatar
Jean-Marc Valin committed
616
    frame_analysis(st, Y, Ey, x);
617
    frame_analysis(noise_state, N, En, n);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
618
    for (i=0;i<NB_BANDS;i++) Ln[i] = log10(1e-2+En[i]);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
619
620
    int silence = compute_frame_features(noisy, X, P, Ex, Ep, Exp, features, xn);
    pitch_filter(X, P, Ex, Ep, Exp, g);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
621
    //printf("%f %d\n", noisy->last_gain, noisy->last_period);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
622
    for (i=0;i<NB_BANDS;i++) {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
623
      g[i] = sqrt((Ey[i]+1e-3)/(Ex[i]+1e-3));
Jean-Marc Valin's avatar
Jean-Marc Valin committed
624
      if (g[i] > 1) g[i] = 1;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
625
      if (silence || i > band_lp) g[i] = -1;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
626
      if (Ey[i] < 5e-2 && Ex[i] < 5e-2) g[i] = -1;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
627
      if (vad==0 && noise_gain==0) g[i] = -1;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
628
    }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
629
630
    count++;
#if 0
Jean-Marc Valin's avatar
Jean-Marc Valin committed
631
    for (i=0;i<NB_FEATURES;i++) printf("%f ", features[i]);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
632
    for (i=0;i<NB_BANDS;i++) printf("%f ", g[i]);
Jean-Marc Valin's avatar
oops    
Jean-Marc Valin committed
633
    for (i=0;i<NB_BANDS;i++) printf("%f ", Ln[i]);
634
    printf("%f\n", vad);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
635
#endif
Jean-Marc Valin's avatar
Jean-Marc Valin committed
636
#if 1
Jean-Marc Valin's avatar
Jean-Marc Valin committed
637
638
639
640
    fwrite(features, sizeof(float), NB_FEATURES, stdout);
    fwrite(g, sizeof(float), NB_BANDS, stdout);
    fwrite(Ln, sizeof(float), NB_BANDS, stdout);
    fwrite(&vad, sizeof(float), 1, stdout);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
641
#endif
Jean-Marc Valin's avatar
Jean-Marc Valin committed
642
#if 0
Jean-Marc Valin's avatar
Jean-Marc Valin committed
643
    compute_rnn(&noisy->rnn, g, &vad_prob, features);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
644
    interp_band_gain(gf, g);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
645
646
#if 1
    for (i=0;i<FREQ_SIZE;i++) {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
647
648
      X[i].r *= gf[i];
      X[i].i *= gf[i];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
649
650
    }
#endif
Jean-Marc Valin's avatar
Jean-Marc Valin committed
651
    frame_synthesis(noisy, xn, X);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
652
653
654

    for (i=0;i<FRAME_SIZE;i++) tmp[i] = xn[i];
    fwrite(tmp, sizeof(short), FRAME_SIZE, fout);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
655
#endif
Jean-Marc Valin's avatar
Jean-Marc Valin committed
656
  }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
657
  fprintf(stderr, "matrix size: %d x %d\n", count, NB_FEATURES + 2*NB_BANDS + 1);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
658
659
660
  fclose(f1);
  fclose(f2);
  fclose(fout);
661
662
  return 0;
}
Jean-Marc Valin's avatar
Jean-Marc Valin committed
663
664

#endif