denoise.c 10.5 KB
Newer Older
1
2
3
4
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include "kiss_fft.h"
Jean-Marc Valin's avatar
Jean-Marc Valin committed
5
#include "common.h"
Jean-Marc Valin's avatar
add DCT    
Jean-Marc Valin committed
6
#include <math.h>
Jean-Marc Valin's avatar
Jean-Marc Valin committed
7
#include "pitch.h"
8
9
10
11
12
13

#define FRAME_SIZE_SHIFT 2
#define FRAME_SIZE (120<<FRAME_SIZE_SHIFT)
#define WINDOW_SIZE (2*FRAME_SIZE)
#define FREQ_SIZE (FRAME_SIZE + 1)

Jean-Marc Valin's avatar
Jean-Marc Valin committed
14
15
16
17
#define PITCH_MIN_PERIOD 15
#define PITCH_MAX_PERIOD 1024
#define PITCH_FRAME_SIZE 960
#define PITCH_BUF_SIZE (PITCH_MAX_PERIOD+PITCH_FRAME_SIZE)
18
19
20

#define SQUARE(x) ((x)*(x))

Jean-Marc Valin's avatar
add DCT    
Jean-Marc Valin committed
21
22
23
24
25
26
27
28
#define SMOOTH_BANDS 1

#if SMOOTH_BANDS
#define NB_BANDS 22
#else
#define NB_BANDS 21
#endif

Jean-Marc Valin's avatar
Jean-Marc Valin committed
29
#define CEPS_MEM 8
30
31
32
#define NB_DELTA_CEPS 6

#define NB_FEATURES (NB_BANDS+2*NB_DELTA_CEPS+1)
Jean-Marc Valin's avatar
Jean-Marc Valin committed
33

34
35
36
37
38
39
40
41
42
static const opus_int16 eband5ms[] = {
/*0  200 400 600 800  1k 1.2 1.4 1.6  2k 2.4 2.8 3.2  4k 4.8 5.6 6.8  8k 9.6 12k 15.6 20k*/
  0,  1,  2,  3,  4,  5,  6,  7,  8, 10, 12, 14, 16, 20, 24, 28, 34, 40, 48, 60, 78, 100
};


typedef struct {
  int init;
  kiss_fft_state *kfft;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
43
  float half_window[FRAME_SIZE];
Jean-Marc Valin's avatar
add DCT    
Jean-Marc Valin committed
44
  float dct_table[NB_BANDS*NB_BANDS];
45
46
47
48
} CommonState;

typedef struct {
  float analysis_mem[FRAME_SIZE];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
49
50
  float cepstral_mem[CEPS_MEM][NB_BANDS];
  int memid;
51
  float synthesis_mem[FRAME_SIZE];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
52
  float pitch_buf[PITCH_BUF_SIZE];
53
54
} DenoiseState;

Jean-Marc Valin's avatar
add DCT    
Jean-Marc Valin committed
55
#if SMOOTH_BANDS
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
void compute_band_energy(float *bandE, const kiss_fft_cpx *X) {
  int i;
  float sum[NB_BANDS] = {0};
  for (i=0;i<NB_BANDS-1;i++)
  {
    int j;
    int band_size;
    band_size = (eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;
    for (j=0;j<band_size;j++) {
      float tmp;
      float frac = (float)j/band_size;
      tmp = SQUARE(X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].r);
      tmp += SQUARE(X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].i);
      sum[i] += (1-frac)*tmp;
      sum[i+1] += frac*tmp;
    }
  }
  sum[0] *= 2;
  sum[NB_BANDS-1] *= 2;
  for (i=0;i<NB_BANDS;i++)
  {
    bandE[i] = sum[i];
  }
}

void interp_band_gain(float *g, const float *bandE) {
  int i;
  memset(g, 0, FREQ_SIZE);
  for (i=0;i<NB_BANDS-1;i++)
  {
    int j;
    int band_size;
    band_size = (eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;
    for (j=0;j<band_size;j++) {
      float frac = (float)j/band_size;
      g[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j] = (1-frac)*bandE[i] + frac*bandE[i+1];
    }
  }
}
#else
96
97
98
99
100
101
102
103
104
105
void compute_band_energy(float *bandE, const kiss_fft_cpx *X) {
  int i;
  for (i=0;i<NB_BANDS;i++)
  {
    int j;
    opus_val32 sum = 1e-27;
    for (j=0;j<(eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;j++) {
      sum += SQUARE(X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].r);
      sum += SQUARE(X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].i);
    }
106
    bandE[i] = sum;
107
108
109
110
111
  }
}

void interp_band_gain(float *g, const float *bandE) {
  int i;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
112
  memset(g, 0, FREQ_SIZE);
113
114
115
116
117
118
119
  for (i=0;i<NB_BANDS;i++)
  {
    int j;
    for (j=0;j<(eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;j++)
      g[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j] = bandE[i];
  }
}
120
121
#endif

122
123
124
125

CommonState common;

static void check_init() {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
126
  int i;
127
128
  if (common.init) return;
  common.kfft = opus_fft_alloc_twiddles(2*FRAME_SIZE, NULL, NULL, NULL, 0);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
129
130
  for (i=0;i<FRAME_SIZE;i++)
    common.half_window[i] = sin(.5*M_PI*sin(.5*M_PI*(i+.5)/FRAME_SIZE) * sin(.5*M_PI*(i+.5)/FRAME_SIZE));
Jean-Marc Valin's avatar
add DCT    
Jean-Marc Valin committed
131
132
133
134
135
136
137
  for (i=0;i<NB_BANDS;i++) {
    int j;
    for (j=0;j<NB_BANDS;j++) {
      common.dct_table[i*NB_BANDS + j] = cos((i+.5)*j*M_PI/NB_BANDS);
      if (j==0) common.dct_table[i*NB_BANDS + j] *= sqrt(.5);
    }
  }
138
139
140
  common.init = 1;
}

Jean-Marc Valin's avatar
add DCT    
Jean-Marc Valin committed
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
static void dct(float *out, const float *in) {
  int i;
  check_init();
  for (i=0;i<NB_BANDS;i++) {
    int j;
    float sum = 0;
    for (j=0;j<NB_BANDS;j++) {
      sum += in[j] * common.dct_table[j*NB_BANDS + i];
    }
    out[i] = sum*sqrt(2./22);
  }
}

#if 0
static void idct(float *out, const float *in) {
  int i;
  check_init();
  for (i=0;i<NB_BANDS;i++) {
    int j;
    float sum = 0;
    for (j=0;j<NB_BANDS;j++) {
      sum += in[j] * common.dct_table[i*NB_BANDS + j];
    }
    out[i] = sum*sqrt(2./22);
  }
}
#endif
168

Jean-Marc Valin's avatar
Jean-Marc Valin committed
169
static void forward_transform(kiss_fft_cpx *out, const float *in) {
170
171
172
173
174
175
176
177
178
179
180
181
182
183
  int i;
  kiss_fft_cpx x[WINDOW_SIZE];
  kiss_fft_cpx y[WINDOW_SIZE];
  check_init();
  for (i=0;i<WINDOW_SIZE;i++) {
    x[i].r = in[i];
    x[i].i = 0;
  }
  opus_fft(common.kfft, x, y, 0);
  for (i=0;i<FREQ_SIZE;i++) {
    out[i] = y[i];
  }
}

Jean-Marc Valin's avatar
Jean-Marc Valin committed
184
static void inverse_transform(float *out, const kiss_fft_cpx *in) {
185
186
187
  int i;
  kiss_fft_cpx x[WINDOW_SIZE];
  kiss_fft_cpx y[WINDOW_SIZE];
188
  check_init();
189
190
191
192
193
194
195
196
197
198
199
200
201
  for (i=0;i<FREQ_SIZE;i++) {
    x[i] = in[i];
  }
  for (;i<WINDOW_SIZE;i++) {
    x[i].r = x[WINDOW_SIZE - i].r;
    x[i].i = -x[WINDOW_SIZE - i].i;
  }
  opus_fft(common.kfft, x, y, 0);
  /* output in reverse order for IFFT. */
  out[0] = WINDOW_SIZE*y[0].r;
  for (i=1;i<WINDOW_SIZE;i++) {
    out[i] = WINDOW_SIZE*y[WINDOW_SIZE - i].r;
  }
202
203
}

Jean-Marc Valin's avatar
Jean-Marc Valin committed
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
static void apply_window(float *x) {
  int i;
  check_init();
  for (i=0;i<FRAME_SIZE;i++) {
    x[i] *= common.half_window[i];
    x[WINDOW_SIZE - 1 - i] *= common.half_window[i];
  }
}

int rnnoise_init(DenoiseState *st) {
  memset(st, 0, sizeof(*st));
  return 0;
}

DenoiseState *rnnoise_create() {
  DenoiseState *st;
  st = malloc(sizeof(DenoiseState));
  rnnoise_init(st);
  return st;
}

Jean-Marc Valin's avatar
Jean-Marc Valin committed
225

Jean-Marc Valin's avatar
Jean-Marc Valin committed
226
static void frame_analysis(DenoiseState *st, kiss_fft_cpx *y, float *Ey, float *features, const float *in) {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
227
228
229
230
231
232
233
  float x[WINDOW_SIZE];
  int i;
  RNN_COPY(x, st->analysis_mem, FRAME_SIZE);
  for (i=0;i<FRAME_SIZE;i++) x[FRAME_SIZE + i] = in[i];
  RNN_COPY(st->analysis_mem, in, FRAME_SIZE);
  apply_window(x);
  forward_transform(y, x);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
  if (features) {
    float pitch_buf[PITCH_BUF_SIZE>>1];
    int pitch_index;
    float gain;
    float *(pre[1]);
    static int last_period;
    static float last_gain;
    RNN_MOVE(st->pitch_buf, &st->pitch_buf[FRAME_SIZE], PITCH_BUF_SIZE-FRAME_SIZE);
    RNN_COPY(&st->pitch_buf[PITCH_BUF_SIZE-FRAME_SIZE], in, FRAME_SIZE);
    pre[0] = &st->pitch_buf[0];
    pitch_downsample(pre, pitch_buf, PITCH_BUF_SIZE, 1);
    pitch_search(pitch_buf+(PITCH_MAX_PERIOD>>1), pitch_buf, PITCH_FRAME_SIZE,
                 PITCH_MAX_PERIOD-3*PITCH_MIN_PERIOD, &pitch_index);
    pitch_index = PITCH_MAX_PERIOD-pitch_index;

    gain = remove_doubling(pitch_buf, PITCH_MAX_PERIOD, PITCH_MIN_PERIOD,
            PITCH_FRAME_SIZE, &pitch_index, last_period, last_gain);
    last_period = pitch_index;
    last_gain = gain;
    printf("%f %d\n", gain, pitch_index);
  }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
255
256
257
  if (Ey != NULL) {
    compute_band_energy(Ey, y);
    if (features != NULL) {
258
      float *ceps_0, *ceps_1, *ceps_2;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
259
      float spec_variability = 0;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
260
      float Ly[NB_BANDS];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
261
      for (i=0;i<NB_BANDS;i++) Ly[i] = log10(1e-10+Ey[i]);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
262
      dct(features, Ly);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
263
264
      features[0] -= 12;
      features[1] -= 4;
265
266
267
268
      ceps_0 = st->cepstral_mem[st->memid];
      ceps_1 = (st->memid < 1) ? st->cepstral_mem[CEPS_MEM+st->memid-1] : st->cepstral_mem[st->memid-1];
      ceps_2 = (st->memid < 2) ? st->cepstral_mem[CEPS_MEM+st->memid-2] : st->cepstral_mem[st->memid-2];
      for (i=0;i<NB_BANDS;i++) ceps_0[i] = features[i];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
269
      st->memid++;
270
271
272
273
274
275
      for (i=0;i<NB_DELTA_CEPS;i++) {
        features[i] = ceps_0[i] + ceps_1[i] + ceps_2[i];
        features[NB_BANDS+i] = ceps_0[i] - ceps_2[i];
        features[NB_BANDS+NB_DELTA_CEPS+i] =  ceps_0[i] - 2*ceps_1[i] + ceps_2[i];
      }
      /* Spectral variability features. */
Jean-Marc Valin's avatar
Jean-Marc Valin committed
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
      if (st->memid == CEPS_MEM) st->memid = 0;
      for (i=0;i<CEPS_MEM;i++)
      {
        int j;
        float mindist = 1e15f;
        for (j=0;j<CEPS_MEM;j++)
        {
          int k;
          float dist=0;
          for (k=0;k<NB_BANDS;k++)
          {
            float tmp;
            tmp = st->cepstral_mem[i][k] - st->cepstral_mem[j][k];
            dist += tmp*tmp;
          }
          if (j!=i)
            mindist = MIN32(mindist, dist);
        }
        spec_variability += mindist;
      }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
296
      features[NB_BANDS+2*NB_DELTA_CEPS] = spec_variability/CEPS_MEM-2.1;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
297
298
    }
  }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
299
300
301
302
303
}

static void frame_synthesis(DenoiseState *st, float *out, const kiss_fft_cpx *y) {
  float x[WINDOW_SIZE];
  int i;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
304
305
306
307
308
309
  inverse_transform(x, y);
  apply_window(x);
  for (i=0;i<FRAME_SIZE;i++) out[i] = x[i] + st->synthesis_mem[i];
  RNN_COPY(st->synthesis_mem, &x[FRAME_SIZE], FRAME_SIZE);
}

Jean-Marc Valin's avatar
cleanup    
Jean-Marc Valin committed
310
void rnnoise_process_frame(DenoiseState *st, float *out, const float *in) {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
311
  kiss_fft_cpx y[FREQ_SIZE];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
312
  frame_analysis(st, y, NULL, NULL, in);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
313
314
315
316
  /* Do processing here. */
  frame_synthesis(st, out, y);
}

Jean-Marc Valin's avatar
Jean-Marc Valin committed
317
int main(int argc, char **argv) {
318
  int i;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
319
  float x[FRAME_SIZE];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
320
321
  float n[FRAME_SIZE];
  float xn[FRAME_SIZE];
322
  int vad_cnt=0;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
323
  FILE *f1, *f2, *fout;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
324
  DenoiseState *st;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
325
  DenoiseState *noise_state;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
326
  DenoiseState *noisy;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
327
  st = rnnoise_create();
Jean-Marc Valin's avatar
Jean-Marc Valin committed
328
  noise_state = rnnoise_create();
Jean-Marc Valin's avatar
Jean-Marc Valin committed
329
  noisy = rnnoise_create();
Jean-Marc Valin's avatar
cleanup    
Jean-Marc Valin committed
330
331
332
333
  if (argc!=4) {
    fprintf(stderr, "usage: %s <speech> <noise> <output denoised>\n", argv[0]);
    return 1;
  }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
334
335
336
337
338
339
340
341
  f1 = fopen(argv[1], "r");
  f2 = fopen(argv[2], "r");
  fout = fopen(argv[3], "w");
  for(i=0;i<150;i++) {
    short tmp[FRAME_SIZE];
    fread(tmp, sizeof(short), FRAME_SIZE, f2);
  }
  while (1) {
342
343
    kiss_fft_cpx X[FREQ_SIZE], Y[FREQ_SIZE], N[FREQ_SIZE];
    float Ex[NB_BANDS], Ey[NB_BANDS], En[NB_BANDS];
Jean-Marc Valin's avatar
oops    
Jean-Marc Valin committed
344
    float Ln[NB_BANDS];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
345
    float features[NB_FEATURES];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
346
    float g[NB_BANDS];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
347
    float gf[FREQ_SIZE]={1};
Jean-Marc Valin's avatar
Jean-Marc Valin committed
348
    short tmp[FRAME_SIZE];
349
350
    float vad=0;
    float E=0;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
351
352
353
354
355
356
    fread(tmp, sizeof(short), FRAME_SIZE, f1);
    if (feof(f1)) break;
    for (i=0;i<FRAME_SIZE;i++) x[i] = tmp[i];
    fread(tmp, sizeof(short), FRAME_SIZE, f2);
    if (feof(f2)) break;
    for (i=0;i<FRAME_SIZE;i++) n[i] = tmp[i];
357
    for (i=0;i<FRAME_SIZE;i++) xn[i] = x[i] + n[i];
358
359
360
361
362
363
364
365
366
367
368
369
370
    for (i=0;i<FRAME_SIZE;i++) E += x[i]*(float)x[i];
    if (E > 1e9f) {
      vad_cnt=0;
    } else if (E > 1e8f) {
      vad_cnt -= 5;
      if (vad_cnt < 0) vad_cnt = 0;
    } else {
      vad_cnt++;
      if (vad_cnt > 15) vad_cnt = 15;
    }
    if (vad_cnt >= 10) vad = 0;
    else if (vad_cnt > 0) vad = 0.5f;
    else vad = 1.f;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
371

Jean-Marc Valin's avatar
Jean-Marc Valin committed
372
    frame_analysis(st, X, Ex, NULL, x);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
373
    frame_analysis(noise_state, N, En, NULL, n);
Jean-Marc Valin's avatar
oops    
Jean-Marc Valin committed
374
    for (i=0;i<NB_BANDS;i++) Ln[i] = log10(1e-10+En[i]);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
375
    frame_analysis(noisy, Y, Ey, features, xn);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
376
377
378
379
    for (i=0;i<NB_BANDS;i++) {
      g[i] = sqrt((Ex[i]+1e-15)/(Ey[i]+1e-15));
      if (g[i] > 1) g[i] = 1;
    }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
380
381
#if 0
    for (i=0;i<NB_FEATURES;i++) printf("%f ", features[i]);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
382
    for (i=0;i<NB_BANDS;i++) printf("%f ", g[i]);
Jean-Marc Valin's avatar
oops    
Jean-Marc Valin committed
383
    for (i=0;i<NB_BANDS;i++) printf("%f ", Ln[i]);
384
    printf("%f\n", vad);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
385
#endif
Jean-Marc Valin's avatar
Jean-Marc Valin committed
386
387
    //for (i=0;i<NB_BANDS;i++) scanf("%f", &g[i]);
    interp_band_gain(gf, g);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
388
389
390
391
392
393
394
395
396
397
398
399
400
401
#if 1
    for (i=0;i<FREQ_SIZE;i++) {
      Y[i].r *= gf[i];
      Y[i].i *= gf[i];
    }
#endif
    frame_synthesis(noisy, xn, Y);

    for (i=0;i<FRAME_SIZE;i++) tmp[i] = xn[i];
    fwrite(tmp, sizeof(short), FRAME_SIZE, fout);
  }
  fclose(f1);
  fclose(f2);
  fclose(fout);
402
403
  return 0;
}