denoise.c 7.26 KB
Newer Older
1
2
3
4
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include "kiss_fft.h"
Jean-Marc Valin's avatar
Jean-Marc Valin committed
5
#include "common.h"
Jean-Marc Valin's avatar
add DCT    
Jean-Marc Valin committed
6
#include <math.h>
7
8
9
10
11
12
13
14
15

#define FRAME_SIZE_SHIFT 2
#define FRAME_SIZE (120<<FRAME_SIZE_SHIFT)
#define WINDOW_SIZE (2*FRAME_SIZE)
#define FREQ_SIZE (FRAME_SIZE + 1)


#define SQUARE(x) ((x)*(x))

Jean-Marc Valin's avatar
add DCT    
Jean-Marc Valin committed
16
17
18
19
20
21
22
23
#define SMOOTH_BANDS 1

#if SMOOTH_BANDS
#define NB_BANDS 22
#else
#define NB_BANDS 21
#endif

24
25
26
27
28
29
30
31
32
static const opus_int16 eband5ms[] = {
/*0  200 400 600 800  1k 1.2 1.4 1.6  2k 2.4 2.8 3.2  4k 4.8 5.6 6.8  8k 9.6 12k 15.6 20k*/
  0,  1,  2,  3,  4,  5,  6,  7,  8, 10, 12, 14, 16, 20, 24, 28, 34, 40, 48, 60, 78, 100
};


typedef struct {
  int init;
  kiss_fft_state *kfft;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
33
  float half_window[FRAME_SIZE];
Jean-Marc Valin's avatar
add DCT    
Jean-Marc Valin committed
34
  float dct_table[NB_BANDS*NB_BANDS];
35
36
37
38
39
40
41
} CommonState;

typedef struct {
  float analysis_mem[FRAME_SIZE];
  float synthesis_mem[FRAME_SIZE];
} DenoiseState;

Jean-Marc Valin's avatar
add DCT    
Jean-Marc Valin committed
42
#if SMOOTH_BANDS
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
void compute_band_energy(float *bandE, const kiss_fft_cpx *X) {
  int i;
  float sum[NB_BANDS] = {0};
  for (i=0;i<NB_BANDS-1;i++)
  {
    int j;
    int band_size;
    band_size = (eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;
    for (j=0;j<band_size;j++) {
      float tmp;
      float frac = (float)j/band_size;
      tmp = SQUARE(X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].r);
      tmp += SQUARE(X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].i);
      sum[i] += (1-frac)*tmp;
      sum[i+1] += frac*tmp;
    }
  }
  sum[0] *= 2;
  sum[NB_BANDS-1] *= 2;
  for (i=0;i<NB_BANDS;i++)
  {
    bandE[i] = sum[i];
  }
}

void interp_band_gain(float *g, const float *bandE) {
  int i;
  memset(g, 0, FREQ_SIZE);
  for (i=0;i<NB_BANDS-1;i++)
  {
    int j;
    int band_size;
    band_size = (eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;
    for (j=0;j<band_size;j++) {
      float frac = (float)j/band_size;
      g[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j] = (1-frac)*bandE[i] + frac*bandE[i+1];
    }
  }
}
#else
83
84
85
86
87
88
89
90
91
92
void compute_band_energy(float *bandE, const kiss_fft_cpx *X) {
  int i;
  for (i=0;i<NB_BANDS;i++)
  {
    int j;
    opus_val32 sum = 1e-27;
    for (j=0;j<(eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;j++) {
      sum += SQUARE(X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].r);
      sum += SQUARE(X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].i);
    }
93
    bandE[i] = sum;
94
95
96
97
98
  }
}

void interp_band_gain(float *g, const float *bandE) {
  int i;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
99
  memset(g, 0, FREQ_SIZE);
100
101
102
103
104
105
106
  for (i=0;i<NB_BANDS;i++)
  {
    int j;
    for (j=0;j<(eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;j++)
      g[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j] = bandE[i];
  }
}
107
108
#endif

109
110
111
112

CommonState common;

static void check_init() {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
113
  int i;
114
115
  if (common.init) return;
  common.kfft = opus_fft_alloc_twiddles(2*FRAME_SIZE, NULL, NULL, NULL, 0);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
116
117
  for (i=0;i<FRAME_SIZE;i++)
    common.half_window[i] = sin(.5*M_PI*sin(.5*M_PI*(i+.5)/FRAME_SIZE) * sin(.5*M_PI*(i+.5)/FRAME_SIZE));
Jean-Marc Valin's avatar
add DCT    
Jean-Marc Valin committed
118
119
120
121
122
123
124
  for (i=0;i<NB_BANDS;i++) {
    int j;
    for (j=0;j<NB_BANDS;j++) {
      common.dct_table[i*NB_BANDS + j] = cos((i+.5)*j*M_PI/NB_BANDS);
      if (j==0) common.dct_table[i*NB_BANDS + j] *= sqrt(.5);
    }
  }
125
126
127
  common.init = 1;
}

Jean-Marc Valin's avatar
add DCT    
Jean-Marc Valin committed
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
static void dct(float *out, const float *in) {
  int i;
  check_init();
  for (i=0;i<NB_BANDS;i++) {
    int j;
    float sum = 0;
    for (j=0;j<NB_BANDS;j++) {
      sum += in[j] * common.dct_table[j*NB_BANDS + i];
    }
    out[i] = sum*sqrt(2./22);
  }
}

#if 0
static void idct(float *out, const float *in) {
  int i;
  check_init();
  for (i=0;i<NB_BANDS;i++) {
    int j;
    float sum = 0;
    for (j=0;j<NB_BANDS;j++) {
      sum += in[j] * common.dct_table[i*NB_BANDS + j];
    }
    out[i] = sum*sqrt(2./22);
  }
}
#endif
155

Jean-Marc Valin's avatar
Jean-Marc Valin committed
156
static void forward_transform(kiss_fft_cpx *out, const float *in) {
157
158
159
160
161
162
163
164
165
166
167
168
169
170
  int i;
  kiss_fft_cpx x[WINDOW_SIZE];
  kiss_fft_cpx y[WINDOW_SIZE];
  check_init();
  for (i=0;i<WINDOW_SIZE;i++) {
    x[i].r = in[i];
    x[i].i = 0;
  }
  opus_fft(common.kfft, x, y, 0);
  for (i=0;i<FREQ_SIZE;i++) {
    out[i] = y[i];
  }
}

Jean-Marc Valin's avatar
Jean-Marc Valin committed
171
static void inverse_transform(float *out, const kiss_fft_cpx *in) {
172
173
174
  int i;
  kiss_fft_cpx x[WINDOW_SIZE];
  kiss_fft_cpx y[WINDOW_SIZE];
175
  check_init();
176
177
178
179
180
181
182
183
184
185
186
187
188
  for (i=0;i<FREQ_SIZE;i++) {
    x[i] = in[i];
  }
  for (;i<WINDOW_SIZE;i++) {
    x[i].r = x[WINDOW_SIZE - i].r;
    x[i].i = -x[WINDOW_SIZE - i].i;
  }
  opus_fft(common.kfft, x, y, 0);
  /* output in reverse order for IFFT. */
  out[0] = WINDOW_SIZE*y[0].r;
  for (i=1;i<WINDOW_SIZE;i++) {
    out[i] = WINDOW_SIZE*y[WINDOW_SIZE - i].r;
  }
189
190
}

Jean-Marc Valin's avatar
Jean-Marc Valin committed
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
static void apply_window(float *x) {
  int i;
  check_init();
  for (i=0;i<FRAME_SIZE;i++) {
    x[i] *= common.half_window[i];
    x[WINDOW_SIZE - 1 - i] *= common.half_window[i];
  }
}

int rnnoise_init(DenoiseState *st) {
  memset(st, 0, sizeof(*st));
  return 0;
}

DenoiseState *rnnoise_create() {
  DenoiseState *st;
  st = malloc(sizeof(DenoiseState));
  rnnoise_init(st);
  return st;
}

Jean-Marc Valin's avatar
Jean-Marc Valin committed
212
213

static void frame_analysis(DenoiseState *st, kiss_fft_cpx *y, const float *in) {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
214
215
216
217
218
219
220
  float x[WINDOW_SIZE];
  int i;
  RNN_COPY(x, st->analysis_mem, FRAME_SIZE);
  for (i=0;i<FRAME_SIZE;i++) x[FRAME_SIZE + i] = in[i];
  RNN_COPY(st->analysis_mem, in, FRAME_SIZE);
  apply_window(x);
  forward_transform(y, x);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
221
222
223
224
225
}

static void frame_synthesis(DenoiseState *st, float *out, const kiss_fft_cpx *y) {
  float x[WINDOW_SIZE];
  int i;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
226
227
228
229
230
231
  inverse_transform(x, y);
  apply_window(x);
  for (i=0;i<FRAME_SIZE;i++) out[i] = x[i] + st->synthesis_mem[i];
  RNN_COPY(st->synthesis_mem, &x[FRAME_SIZE], FRAME_SIZE);
}

Jean-Marc Valin's avatar
cleanup    
Jean-Marc Valin committed
232
void rnnoise_process_frame(DenoiseState *st, float *out, const float *in) {
Jean-Marc Valin's avatar
Jean-Marc Valin committed
233
234
235
236
237
238
  kiss_fft_cpx y[FREQ_SIZE];
  frame_analysis(st, y, in);
  /* Do processing here. */
  frame_synthesis(st, out, y);
}

Jean-Marc Valin's avatar
Jean-Marc Valin committed
239
int main(int argc, char **argv) {
240
  int i;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
241
  float x[FRAME_SIZE];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
242
243
244
  float n[FRAME_SIZE];
  float xn[FRAME_SIZE];
  FILE *f1, *f2, *fout;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
245
  DenoiseState *st;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
246
  DenoiseState *noisy;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
247
  st = rnnoise_create();
Jean-Marc Valin's avatar
Jean-Marc Valin committed
248
  noisy = rnnoise_create();
Jean-Marc Valin's avatar
cleanup    
Jean-Marc Valin committed
249
250
251
252
  if (argc!=4) {
    fprintf(stderr, "usage: %s <speech> <noise> <output denoised>\n", argv[0]);
    return 1;
  }
Jean-Marc Valin's avatar
Jean-Marc Valin committed
253
254
255
256
257
258
259
260
261
262
  f1 = fopen(argv[1], "r");
  f2 = fopen(argv[2], "r");
  fout = fopen(argv[3], "w");
  for(i=0;i<150;i++) {
    short tmp[FRAME_SIZE];
    fread(tmp, sizeof(short), FRAME_SIZE, f2);
  }
  while (1) {
    kiss_fft_cpx X[FREQ_SIZE], Y[FREQ_SIZE];
    float Ex[NB_BANDS], Ey[NB_BANDS];
Jean-Marc Valin's avatar
add DCT    
Jean-Marc Valin committed
263
    float Ly[NB_BANDS], Cy[NB_BANDS];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
264
265
266
267
268
269
270
271
272
    float g[NB_BANDS];
    float gf[FREQ_SIZE];
    short tmp[FRAME_SIZE];
    fread(tmp, sizeof(short), FRAME_SIZE, f1);
    if (feof(f1)) break;
    for (i=0;i<FRAME_SIZE;i++) x[i] = tmp[i];
    fread(tmp, sizeof(short), FRAME_SIZE, f2);
    if (feof(f2)) break;
    for (i=0;i<FRAME_SIZE;i++) n[i] = tmp[i];
273
    for (i=0;i<FRAME_SIZE;i++) xn[i] = x[i] + n[i];
Jean-Marc Valin's avatar
Jean-Marc Valin committed
274
275
276
277
278

    frame_analysis(st, X, x);
    frame_analysis(noisy, Y, xn);
    compute_band_energy(Ex, X);
    compute_band_energy(Ey, Y);
Jean-Marc Valin's avatar
add DCT    
Jean-Marc Valin committed
279
280
    for (i=0;i<NB_BANDS;i++) Ly[i] = 10*log10(1e-10+Ey[i]);
    dct(Cy, Ly);
281
282
283
    Cy[0] -= 120;
    Cy[1] -= 40;
    for (i=0;i<NB_BANDS;i++) printf("%f ", Cy[i]);
Jean-Marc Valin's avatar
Jean-Marc Valin committed
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
    for (i=0;i<NB_BANDS;i++) {
      g[i] = sqrt((Ex[i]+1e-15)/(Ey[i]+1e-15));
      if (g[i] > 1) g[i] = 1;
    }
    interp_band_gain(gf, g);
#if 1
    for (i=0;i<NB_BANDS;i++) printf("%f ", g[i]);
    printf("\n");
#endif
#if 1
    for (i=0;i<FREQ_SIZE;i++) {
      Y[i].r *= gf[i];
      Y[i].i *= gf[i];
    }
#endif
    frame_synthesis(noisy, xn, Y);

    for (i=0;i<FRAME_SIZE;i++) tmp[i] = xn[i];
    fwrite(tmp, sizeof(short), FRAME_SIZE, fout);
  }
  fclose(f1);
  fclose(f2);
  fclose(fout);
307
308
  return 0;
}