denoise.c 9.48 KB
Newer Older
1
2
3
4
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include "kiss_fft.h"
Jean-Marc Valin's avatar
Jean-Marc Valin committed
5
#include "common.h"
Jean-Marc Valin's avatar
add DCT    
Jean-Marc Valin committed
6
#include <math.h>
7
8
9
10
11
12
13
14
15

#define FRAME_SIZE_SHIFT 2
#define FRAME_SIZE (120<<FRAME_SIZE_SHIFT)
#define WINDOW_SIZE (2*FRAME_SIZE)
#define FREQ_SIZE (FRAME_SIZE + 1)


#define SQUARE(x) ((x)*(x))

Jean-Marc Valin's avatar
add DCT    
Jean-Marc Valin committed
16
17
18
19
20
21
22
23
#define SMOOTH_BANDS 1

#if SMOOTH_BANDS
#define NB_BANDS 22
#else
#define NB_BANDS 21
#endif

Jean-Marc Valin's avatar
Jean-Marc Valin committed
24
#define CEPS_MEM 8
25
26
27
#define NB_DELTA_CEPS 6

#define NB_FEATURES (NB_BANDS+2*NB_DELTA_CEPS+1)
Jean-Marc Valin's avatar
Jean-Marc Valin committed
28

29
30
31
32
33
34
35
36
37
static const opus_int16 eband5ms[] = {
/*0  200 400 600 800  1k 1.2 1.4 1.6  2k 2.4 2.8 3.2  4k 4.8 5.6 6.8  8k 9.6 12k 15.6 20k*/
  0,  1,  2,  3,  4,  5,  6,  7,  8, 10, 12, 14, 16, 20, 24, 28, 34, 40, 48, 60, 78, 100
};


typedef struct {
  int init;
  kiss_fft_state *kfft;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
38
  float half_window[FRAME_SIZE];
Jean-Marc Valin's avatar
add DCT    
Jean-Marc Valin committed
39
  float dct_table[NB_BANDS*NB_BANDS];
40
41
42
43
} CommonState;

typedef struct {
  float analysis_mem[FRAME_SIZE];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
44
45
  float cepstral_mem[CEPS_MEM][NB_BANDS];
  int memid;
46
47
48
  float synthesis_mem[FRAME_SIZE];
} DenoiseState;

Jean-Marc Valin's avatar
add DCT    
Jean-Marc Valin committed
49
#if SMOOTH_BANDS
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
void compute_band_energy(float *bandE, const kiss_fft_cpx *X) {
  int i;
  float sum[NB_BANDS] = {0};
  for (i=0;i<NB_BANDS-1;i++)
  {
    int j;
    int band_size;
    band_size = (eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;
    for (j=0;j<band_size;j++) {
      float tmp;
      float frac = (float)j/band_size;
      tmp = SQUARE(X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].r);
      tmp += SQUARE(X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].i);
      sum[i] += (1-frac)*tmp;
      sum[i+1] += frac*tmp;
    }
  }
  sum[0] *= 2;
  sum[NB_BANDS-1] *= 2;
  for (i=0;i<NB_BANDS;i++)
  {
    bandE[i] = sum[i];
  }
}

void interp_band_gain(float *g, const float *bandE) {
  int i;
  memset(g, 0, FREQ_SIZE);
  for (i=0;i<NB_BANDS-1;i++)
  {
    int j;
    int band_size;
    band_size = (eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;
    for (j=0;j<band_size;j++) {
      float frac = (float)j/band_size;
      g[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j] = (1-frac)*bandE[i] + frac*bandE[i+1];
    }
  }
}
#else
90
91
92
93
94
95
96
97
98
99
void compute_band_energy(float *bandE, const kiss_fft_cpx *X) {
  int i;
  for (i=0;i<NB_BANDS;i++)
  {
    int j;
    opus_val32 sum = 1e-27;
    for (j=0;j<(eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;j++) {
      sum += SQUARE(X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].r);
      sum += SQUARE(X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].i);
    }
100
    bandE[i] = sum;
101
102
103
104
105
  }
}

void interp_band_gain(float *g, const float *bandE) {
  int i;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
106
  memset(g, 0, FREQ_SIZE);
107
108
109
110
111
112
113
  for (i=0;i<NB_BANDS;i++)
  {
    int j;
    for (j=0;j<(eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;j++)
      g[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j] = bandE[i];
  }
}
114
115
#endif

116
117
118
119

CommonState common;

static void check_init() {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
120
  int i;
121
122
  if (common.init) return;
  common.kfft = opus_fft_alloc_twiddles(2*FRAME_SIZE, NULL, NULL, NULL, 0);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
123
124
  for (i=0;i<FRAME_SIZE;i++)
    common.half_window[i] = sin(.5*M_PI*sin(.5*M_PI*(i+.5)/FRAME_SIZE) * sin(.5*M_PI*(i+.5)/FRAME_SIZE));
Jean-Marc Valin's avatar
add DCT    
Jean-Marc Valin committed
125
126
127
128
129
130
131
  for (i=0;i<NB_BANDS;i++) {
    int j;
    for (j=0;j<NB_BANDS;j++) {
      common.dct_table[i*NB_BANDS + j] = cos((i+.5)*j*M_PI/NB_BANDS);
      if (j==0) common.dct_table[i*NB_BANDS + j] *= sqrt(.5);
    }
  }
132
133
134
  common.init = 1;
}

Jean-Marc Valin's avatar
add DCT    
Jean-Marc Valin committed
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
static void dct(float *out, const float *in) {
  int i;
  check_init();
  for (i=0;i<NB_BANDS;i++) {
    int j;
    float sum = 0;
    for (j=0;j<NB_BANDS;j++) {
      sum += in[j] * common.dct_table[j*NB_BANDS + i];
    }
    out[i] = sum*sqrt(2./22);
  }
}

#if 0
static void idct(float *out, const float *in) {
  int i;
  check_init();
  for (i=0;i<NB_BANDS;i++) {
    int j;
    float sum = 0;
    for (j=0;j<NB_BANDS;j++) {
      sum += in[j] * common.dct_table[i*NB_BANDS + j];
    }
    out[i] = sum*sqrt(2./22);
  }
}
#endif
162

Jean-Marc Valin's avatar
Jean-Marc Valin committed
163
static void forward_transform(kiss_fft_cpx *out, const float *in) {
164
165
166
167
168
169
170
171
172
173
174
175
176
177
  int i;
  kiss_fft_cpx x[WINDOW_SIZE];
  kiss_fft_cpx y[WINDOW_SIZE];
  check_init();
  for (i=0;i<WINDOW_SIZE;i++) {
    x[i].r = in[i];
    x[i].i = 0;
  }
  opus_fft(common.kfft, x, y, 0);
  for (i=0;i<FREQ_SIZE;i++) {
    out[i] = y[i];
  }
}

Jean-Marc Valin's avatar
Jean-Marc Valin committed
178
static void inverse_transform(float *out, const kiss_fft_cpx *in) {
179
180
181
  int i;
  kiss_fft_cpx x[WINDOW_SIZE];
  kiss_fft_cpx y[WINDOW_SIZE];
182
  check_init();
183
184
185
186
187
188
189
190
191
192
193
194
195
  for (i=0;i<FREQ_SIZE;i++) {
    x[i] = in[i];
  }
  for (;i<WINDOW_SIZE;i++) {
    x[i].r = x[WINDOW_SIZE - i].r;
    x[i].i = -x[WINDOW_SIZE - i].i;
  }
  opus_fft(common.kfft, x, y, 0);
  /* output in reverse order for IFFT. */
  out[0] = WINDOW_SIZE*y[0].r;
  for (i=1;i<WINDOW_SIZE;i++) {
    out[i] = WINDOW_SIZE*y[WINDOW_SIZE - i].r;
  }
196
197
}

Jean-Marc Valin's avatar
Jean-Marc Valin committed
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
static void apply_window(float *x) {
  int i;
  check_init();
  for (i=0;i<FRAME_SIZE;i++) {
    x[i] *= common.half_window[i];
    x[WINDOW_SIZE - 1 - i] *= common.half_window[i];
  }
}

int rnnoise_init(DenoiseState *st) {
  memset(st, 0, sizeof(*st));
  return 0;
}

DenoiseState *rnnoise_create() {
  DenoiseState *st;
  st = malloc(sizeof(DenoiseState));
  rnnoise_init(st);
  return st;
}

Jean-Marc Valin's avatar
Jean-Marc Valin committed
219

Jean-Marc Valin's avatar
Jean-Marc Valin committed
220
static void frame_analysis(DenoiseState *st, kiss_fft_cpx *y, float *Ey, float *features, const float *in) {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
221
222
223
224
225
226
227
  float x[WINDOW_SIZE];
  int i;
  RNN_COPY(x, st->analysis_mem, FRAME_SIZE);
  for (i=0;i<FRAME_SIZE;i++) x[FRAME_SIZE + i] = in[i];
  RNN_COPY(st->analysis_mem, in, FRAME_SIZE);
  apply_window(x);
  forward_transform(y, x);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
228
229
230
  if (Ey != NULL) {
    compute_band_energy(Ey, y);
    if (features != NULL) {
231
      float *ceps_0, *ceps_1, *ceps_2;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
232
      float spec_variability = 0;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
233
      float Ly[NB_BANDS];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
234
      for (i=0;i<NB_BANDS;i++) Ly[i] = log10(1e-10+Ey[i]);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
235
      dct(features, Ly);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
236
237
      features[0] -= 12;
      features[1] -= 4;
238
239
240
241
      ceps_0 = st->cepstral_mem[st->memid];
      ceps_1 = (st->memid < 1) ? st->cepstral_mem[CEPS_MEM+st->memid-1] : st->cepstral_mem[st->memid-1];
      ceps_2 = (st->memid < 2) ? st->cepstral_mem[CEPS_MEM+st->memid-2] : st->cepstral_mem[st->memid-2];
      for (i=0;i<NB_BANDS;i++) ceps_0[i] = features[i];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
242
      st->memid++;
243
244
245
246
247
248
      for (i=0;i<NB_DELTA_CEPS;i++) {
        features[i] = ceps_0[i] + ceps_1[i] + ceps_2[i];
        features[NB_BANDS+i] = ceps_0[i] - ceps_2[i];
        features[NB_BANDS+NB_DELTA_CEPS+i] =  ceps_0[i] - 2*ceps_1[i] + ceps_2[i];
      }
      /* Spectral variability features. */
Jean-Marc Valin's avatar
Jean-Marc Valin committed
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
      if (st->memid == CEPS_MEM) st->memid = 0;
      for (i=0;i<CEPS_MEM;i++)
      {
        int j;
        float mindist = 1e15f;
        for (j=0;j<CEPS_MEM;j++)
        {
          int k;
          float dist=0;
          for (k=0;k<NB_BANDS;k++)
          {
            float tmp;
            tmp = st->cepstral_mem[i][k] - st->cepstral_mem[j][k];
            dist += tmp*tmp;
          }
          if (j!=i)
            mindist = MIN32(mindist, dist);
        }
        spec_variability += mindist;
      }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
269
      features[NB_BANDS+2*NB_DELTA_CEPS] = spec_variability/CEPS_MEM-2.1;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
270
271
    }
  }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
272
273
274
275
276
}

static void frame_synthesis(DenoiseState *st, float *out, const kiss_fft_cpx *y) {
  float x[WINDOW_SIZE];
  int i;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
277
278
279
280
281
282
  inverse_transform(x, y);
  apply_window(x);
  for (i=0;i<FRAME_SIZE;i++) out[i] = x[i] + st->synthesis_mem[i];
  RNN_COPY(st->synthesis_mem, &x[FRAME_SIZE], FRAME_SIZE);
}

Jean-Marc Valin's avatar
cleanup    
Jean-Marc Valin committed
283
void rnnoise_process_frame(DenoiseState *st, float *out, const float *in) {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
284
  kiss_fft_cpx y[FREQ_SIZE];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
285
  frame_analysis(st, y, NULL, NULL, in);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
286
287
288
289
  /* Do processing here. */
  frame_synthesis(st, out, y);
}

Jean-Marc Valin's avatar
Jean-Marc Valin committed
290
int main(int argc, char **argv) {
291
  int i;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
292
  float x[FRAME_SIZE];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
293
294
  float n[FRAME_SIZE];
  float xn[FRAME_SIZE];
295
  int vad_cnt=0;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
296
  FILE *f1, *f2, *fout;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
297
  DenoiseState *st;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
298
  DenoiseState *noise_state;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
299
  DenoiseState *noisy;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
300
  st = rnnoise_create();
Jean-Marc Valin's avatar
Jean-Marc Valin committed
301
  noise_state = rnnoise_create();
Jean-Marc Valin's avatar
Jean-Marc Valin committed
302
  noisy = rnnoise_create();
Jean-Marc Valin's avatar
cleanup    
Jean-Marc Valin committed
303
304
305
306
  if (argc!=4) {
    fprintf(stderr, "usage: %s <speech> <noise> <output denoised>\n", argv[0]);
    return 1;
  }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
307
308
309
310
311
312
313
314
  f1 = fopen(argv[1], "r");
  f2 = fopen(argv[2], "r");
  fout = fopen(argv[3], "w");
  for(i=0;i<150;i++) {
    short tmp[FRAME_SIZE];
    fread(tmp, sizeof(short), FRAME_SIZE, f2);
  }
  while (1) {
315
316
    kiss_fft_cpx X[FREQ_SIZE], Y[FREQ_SIZE], N[FREQ_SIZE];
    float Ex[NB_BANDS], Ey[NB_BANDS], En[NB_BANDS];
Jean-Marc Valin's avatar
oops    
Jean-Marc Valin committed
317
    float Ln[NB_BANDS];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
318
    float features[NB_FEATURES];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
319
320
321
    float g[NB_BANDS];
    float gf[FREQ_SIZE];
    short tmp[FRAME_SIZE];
322
323
    float vad=0;
    float E=0;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
324
325
326
327
328
329
    fread(tmp, sizeof(short), FRAME_SIZE, f1);
    if (feof(f1)) break;
    for (i=0;i<FRAME_SIZE;i++) x[i] = tmp[i];
    fread(tmp, sizeof(short), FRAME_SIZE, f2);
    if (feof(f2)) break;
    for (i=0;i<FRAME_SIZE;i++) n[i] = tmp[i];
330
    for (i=0;i<FRAME_SIZE;i++) xn[i] = x[i] + n[i];
331
332
333
334
335
336
337
338
339
340
341
342
343
    for (i=0;i<FRAME_SIZE;i++) E += x[i]*(float)x[i];
    if (E > 1e9f) {
      vad_cnt=0;
    } else if (E > 1e8f) {
      vad_cnt -= 5;
      if (vad_cnt < 0) vad_cnt = 0;
    } else {
      vad_cnt++;
      if (vad_cnt > 15) vad_cnt = 15;
    }
    if (vad_cnt >= 10) vad = 0;
    else if (vad_cnt > 0) vad = 0.5f;
    else vad = 1.f;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
344

Jean-Marc Valin's avatar
Jean-Marc Valin committed
345
    frame_analysis(st, X, Ex, NULL, x);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
346
    frame_analysis(noise_state, N, En, NULL, n);
Jean-Marc Valin's avatar
oops    
Jean-Marc Valin committed
347
    for (i=0;i<NB_BANDS;i++) Ln[i] = log10(1e-10+En[i]);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
348
    frame_analysis(noisy, Y, Ey, features, xn);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
349
    for (i=0;i<NB_FEATURES;i++) printf("%f ", features[i]);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
350
351
352
353
354
355
    for (i=0;i<NB_BANDS;i++) {
      g[i] = sqrt((Ex[i]+1e-15)/(Ey[i]+1e-15));
      if (g[i] > 1) g[i] = 1;
    }
#if 1
    for (i=0;i<NB_BANDS;i++) printf("%f ", g[i]);
Jean-Marc Valin's avatar
oops    
Jean-Marc Valin committed
356
    for (i=0;i<NB_BANDS;i++) printf("%f ", Ln[i]);
357
    printf("%f\n", vad);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
358
#endif
Jean-Marc Valin's avatar
Jean-Marc Valin committed
359
360
    //for (i=0;i<NB_BANDS;i++) scanf("%f", &g[i]);
    interp_band_gain(gf, g);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
361
362
363
364
365
366
367
368
369
370
371
372
373
374
#if 1
    for (i=0;i<FREQ_SIZE;i++) {
      Y[i].r *= gf[i];
      Y[i].i *= gf[i];
    }
#endif
    frame_synthesis(noisy, xn, Y);

    for (i=0;i<FRAME_SIZE;i++) tmp[i] = xn[i];
    fwrite(tmp, sizeof(short), FRAME_SIZE, fout);
  }
  fclose(f1);
  fclose(f2);
  fclose(fout);
375
376
  return 0;
}