#include #include #include #include "kiss_fft.h" #include "common.h" #include #include "pitch.h" #include "rnn.h" #include "rnn_data.h" #define FRAME_SIZE_SHIFT 2 #define FRAME_SIZE (120<>1]; int pitch_index; float gain; float *(pre[1]); float tmp[NB_BANDS]; RNN_COPY(x, st->analysis_mem, FRAME_SIZE); for (i=0;ianalysis_mem, in, FRAME_SIZE); apply_window(x); forward_transform(X, x); compute_band_energy(Ex, X); if (features == NULL) return 1; RNN_MOVE(st->pitch_buf, &st->pitch_buf[FRAME_SIZE], PITCH_BUF_SIZE-FRAME_SIZE); RNN_COPY(&st->pitch_buf[PITCH_BUF_SIZE-FRAME_SIZE], in, FRAME_SIZE); pre[0] = &st->pitch_buf[0]; pitch_downsample(pre, pitch_buf, PITCH_BUF_SIZE, 1); pitch_search(pitch_buf+(PITCH_MAX_PERIOD>>1), pitch_buf, PITCH_FRAME_SIZE, PITCH_MAX_PERIOD-3*PITCH_MIN_PERIOD, &pitch_index); pitch_index = PITCH_MAX_PERIOD-pitch_index; gain = remove_doubling(pitch_buf, PITCH_MAX_PERIOD, PITCH_MIN_PERIOD, PITCH_FRAME_SIZE, &pitch_index, st->last_period, st->last_gain); st->last_period = pitch_index; st->last_gain = gain; for (i=0;ipitch_buf[PITCH_BUF_SIZE-WINDOW_SIZE-pitch_index+i]; apply_window(p); forward_transform(P, p); compute_band_energy(Ep, P); compute_band_corr(Exp, X, P); for (i=0;icepstral_mem[st->memid]; ceps_1 = (st->memid < 1) ? st->cepstral_mem[CEPS_MEM+st->memid-1] : st->cepstral_mem[st->memid-1]; ceps_2 = (st->memid < 2) ? st->cepstral_mem[CEPS_MEM+st->memid-2] : st->cepstral_mem[st->memid-2]; for (i=0;imemid++; for (i=0;imemid == CEPS_MEM) st->memid = 0; for (i=0;icepstral_mem[i][k] - st->cepstral_mem[j][k]; dist += tmp*tmp; } if (j!=i) mindist = MIN32(mindist, dist); } spec_variability += mindist; } features[NB_BANDS+3*NB_DELTA_CEPS+1] = spec_variability/CEPS_MEM-2.1; return TRAINING && E < 0.1; } static void frame_synthesis(DenoiseState *st, float *out, const kiss_fft_cpx *y) { float x[WINDOW_SIZE]; int i; inverse_transform(x, y); apply_window(x); for (i=0;isynthesis_mem[i]; RNN_COPY(st->synthesis_mem, &x[FRAME_SIZE], FRAME_SIZE); } static void biquad(float *y, float mem[2], const float *x, const float *b, const float *a, int N) { int i; for (i=0;imem_hp_x, in, b_hp, a_hp, FRAME_SIZE); silence = frame_analysis(st, X, P, Ex, Ep, features, x); if (!silence) { compute_rnn(&st->rnn, g, &vad_prob, features); interp_band_gain(gf, g); #if 1 for (i=0;i \n", argv[0]); return 1; } f1 = fopen(argv[1], "r"); f2 = fopen(argv[2], "r"); fout = fopen(argv[3], "w"); for(i=0;i<150;i++) { short tmp[FRAME_SIZE]; fread(tmp, sizeof(short), FRAME_SIZE, f2); } while (1) { kiss_fft_cpx X[FREQ_SIZE], Y[FREQ_SIZE], N[FREQ_SIZE], P[WINDOW_SIZE]; float Ex[NB_BANDS], Ey[NB_BANDS], En[NB_BANDS], Ep[NB_BANDS]; float Ln[NB_BANDS]; float features[NB_FEATURES]; float g[NB_BANDS]; float gf[FREQ_SIZE]={1}; short tmp[FRAME_SIZE]; float vad=0; float vad_prob; float E=0; if (++gain_change_count > 101*300) { speech_gain = pow(10., (-40+(rand()%60))/20.); noise_gain = pow(10., (-30+(rand()%40))/20.); if (rand()%10==0) noise_gain = 0; noise_gain *= speech_gain; if (rand()%10==0) speech_gain = 0; gain_change_count = 0; rand_resp(a_noise, b_noise); rand_resp(a_sig, b_sig); } fread(tmp, sizeof(short), FRAME_SIZE, f1); if (feof(f1)) break; for (i=0;i 1e9f*speech_gain*speech_gain) { vad_cnt=0; } else if (E > 1e8f*speech_gain*speech_gain) { vad_cnt -= 5; if (vad_cnt < 0) vad_cnt = 0; } else { vad_cnt++; if (vad_cnt > 15) vad_cnt = 15; } if (vad_cnt >= 10) vad = 0; else if (vad_cnt > 0) vad = 0.5f; else vad = 1.f; frame_analysis(st, X, NULL, Ex, NULL, NULL, x); frame_analysis(noise_state, N, NULL, En, NULL, NULL, n); for (i=0;ilast_gain, noisy->last_period); for (i=0;i 1) g[i] = 1; if (silence) g[i] = -1; } count++; #if 0 for (i=0;irnn, g, &vad_prob, features); //for (i=0;i \n", argv[0]); return 1; } f1 = fopen(argv[1], "r"); fout = fopen(argv[2], "w"); while (1) { short tmp[FRAME_SIZE]; fread(tmp, sizeof(short), FRAME_SIZE, f1); if (feof(f1)) break; for (i=0;i