diff --git a/dnn/compile.sh b/dnn/compile.sh index 446e7e32d83edeb9f95876010b85296c7ad2e80c..fba4f8e3e70cd73964589b793f443c38f0e6d9a3 100755 --- a/dnn/compile.sh +++ b/dnn/compile.sh @@ -1,4 +1,4 @@ #!/bin/sh -gcc -DTRAINING=1 -Wall -W -O3 -g -I../include dump_data.c kiss_fft.c pitch.c celt_lpc.c -o dump_data -lm -gcc -o test_lpcnet -mavx2 -mfma -g -O3 -Wall -W -Wextra lpcnet.c nnet.c nnet_data.c dump_data.c kiss_fft.c pitch.c celt_lpc.c -lm +gcc -DTRAINING=1 -Wall -W -O3 -g -I../include dump_data.c freq.c kiss_fft.c pitch.c celt_lpc.c -o dump_data -lm +gcc -o test_lpcnet -mavx2 -mfma -g -O3 -Wall -W -Wextra lpcnet.c nnet.c nnet_data.c freq.c kiss_fft.c pitch.c celt_lpc.c -lm diff --git a/dnn/dump_data.c b/dnn/dump_data.c index 8c39c7cf5efc27563dbfbd4ece91cc5c7f54e627..7bdacc617813cf91f1c40c4369dd39396249e26a 100644 --- a/dnn/dump_data.c +++ b/dnn/dump_data.c @@ -40,27 +40,13 @@ #include "celt_lpc.h" #include <assert.h> -#define PREEMPHASIS (0.85f) - -#define FRAME_SIZE_SHIFT 2 -#define FRAME_SIZE (40<<FRAME_SIZE_SHIFT) -#define WINDOW_SIZE (2*FRAME_SIZE) -#define FREQ_SIZE (FRAME_SIZE + 1) #define PITCH_MIN_PERIOD 32 #define PITCH_MAX_PERIOD 256 #define PITCH_FRAME_SIZE 320 #define PITCH_BUF_SIZE (PITCH_MAX_PERIOD+PITCH_FRAME_SIZE) -#define SQUARE(x) ((x)*(x)) - -#define SMOOTH_BANDS 1 -#if SMOOTH_BANDS -#define NB_BANDS 18 -#else -#define NB_BANDS 17 -#endif #define CEPS_MEM 8 #define NB_DELTA_CEPS 6 @@ -72,18 +58,9 @@ #define TRAINING 0 #endif -static const opus_int16 eband5ms[] = { -/*0 200 400 600 800 1k 1.2 1.4 1.6 2k 2.4 2.8 3.2 4k 4.8 5.6 6.8 8k*/ - 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 34, 40 -}; -typedef struct { - int init; - kiss_fft_state *kfft; - float half_window[FRAME_SIZE]; - float dct_table[NB_BANDS*NB_BANDS]; -} CommonState; + struct DenoiseState { float analysis_mem[FRAME_SIZE]; @@ -98,178 +75,6 @@ struct DenoiseState { float lastg[NB_BANDS]; }; -#if SMOOTH_BANDS -void compute_band_energy(float *bandE, const kiss_fft_cpx *X) { - int i; - float sum[NB_BANDS] = {0}; - for (i=0;i<NB_BANDS-1;i++) - { - int j; - int band_size; - band_size = (eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT; - for (j=0;j<band_size;j++) { - float tmp; - float frac = (float)j/band_size; - tmp = SQUARE(X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].r); - tmp += SQUARE(X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].i); - sum[i] += (1-frac)*tmp; - sum[i+1] += frac*tmp; - } - } - sum[0] *= 2; - sum[NB_BANDS-1] *= 2; - for (i=0;i<NB_BANDS;i++) - { - bandE[i] = sum[i]; - } -} - -void compute_band_corr(float *bandE, const kiss_fft_cpx *X, const kiss_fft_cpx *P) { - int i; - float sum[NB_BANDS] = {0}; - for (i=0;i<NB_BANDS-1;i++) - { - int j; - int band_size; - band_size = (eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT; - for (j=0;j<band_size;j++) { - float tmp; - float frac = (float)j/band_size; - tmp = X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].r * P[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].r; - tmp += X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].i * P[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].i; - sum[i] += (1-frac)*tmp; - sum[i+1] += frac*tmp; - } - } - sum[0] *= 2; - sum[NB_BANDS-1] *= 2; - for (i=0;i<NB_BANDS;i++) - { - bandE[i] = sum[i]; - } -} - -void interp_band_gain(float *g, const float *bandE) { - int i; - memset(g, 0, FREQ_SIZE); - for (i=0;i<NB_BANDS-1;i++) - { - int j; - int band_size; - band_size = (eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT; - for (j=0;j<band_size;j++) { - float frac = (float)j/band_size; - g[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j] = (1-frac)*bandE[i] + frac*bandE[i+1]; - } - } -} -#else -void compute_band_energy(float *bandE, const kiss_fft_cpx *X) { - int i; - for (i=0;i<NB_BANDS;i++) - { - int j; - opus_val32 sum = 0; - for (j=0;j<(eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;j++) { - sum += SQUARE(X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].r); - sum += SQUARE(X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].i); - } - bandE[i] = sum; - } -} - -void interp_band_gain(float *g, const float *bandE) { - int i; - memset(g, 0, FREQ_SIZE); - for (i=0;i<NB_BANDS;i++) - { - int j; - for (j=0;j<(eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT;j++) - g[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j] = bandE[i]; - } -} -#endif - - -CommonState common; - -static void check_init() { - int i; - if (common.init) return; - common.kfft = opus_fft_alloc_twiddles(2*FRAME_SIZE, NULL, NULL, NULL, 0); - for (i=0;i<FRAME_SIZE;i++) - common.half_window[i] = sin(.5*M_PI*sin(.5*M_PI*(i+.5)/FRAME_SIZE) * sin(.5*M_PI*(i+.5)/FRAME_SIZE)); - for (i=0;i<NB_BANDS;i++) { - int j; - for (j=0;j<NB_BANDS;j++) { - common.dct_table[i*NB_BANDS + j] = cos((i+.5)*j*M_PI/NB_BANDS); - if (j==0) common.dct_table[i*NB_BANDS + j] *= sqrt(.5); - } - } - common.init = 1; -} - -static void dct(float *out, const float *in) { - int i; - check_init(); - for (i=0;i<NB_BANDS;i++) { - int j; - float sum = 0; - for (j=0;j<NB_BANDS;j++) { - sum += in[j] * common.dct_table[j*NB_BANDS + i]; - } - out[i] = sum*sqrt(2./NB_BANDS); - } -} - -static void idct(float *out, const float *in) { - int i; - check_init(); - for (i=0;i<NB_BANDS;i++) { - int j; - float sum = 0; - for (j=0;j<NB_BANDS;j++) { - sum += in[j] * common.dct_table[i*NB_BANDS + j]; - } - out[i] = sum*sqrt(2./NB_BANDS); - } -} - -static void forward_transform(kiss_fft_cpx *out, const float *in) { - int i; - kiss_fft_cpx x[WINDOW_SIZE]; - kiss_fft_cpx y[WINDOW_SIZE]; - check_init(); - for (i=0;i<WINDOW_SIZE;i++) { - x[i].r = in[i]; - x[i].i = 0; - } - opus_fft(common.kfft, x, y, 0); - for (i=0;i<FREQ_SIZE;i++) { - out[i] = y[i]; - } -} - -static void inverse_transform(float *out, const kiss_fft_cpx *in) { - int i; - kiss_fft_cpx x[WINDOW_SIZE]; - kiss_fft_cpx y[WINDOW_SIZE]; - check_init(); - for (i=0;i<FREQ_SIZE;i++) { - x[i] = in[i]; - } - for (;i<WINDOW_SIZE;i++) { - x[i].r = x[WINDOW_SIZE - i].r; - x[i].i = -x[WINDOW_SIZE - i].i; - } - opus_fft(common.kfft, x, y, 0); - /* output in reverse order for IFFT. */ - out[0] = WINDOW_SIZE*y[0].r; - for (i=1;i<WINDOW_SIZE;i++) { - out[i] = WINDOW_SIZE*y[WINDOW_SIZE - i].r; - } -} - int rnnoise_get_size() { return sizeof(DenoiseState); } @@ -297,55 +102,9 @@ short float2short(float x) return IMAX(-32767, IMIN(32767, i)); } -static float lpc_from_bands(float *lpc, const float *Ex) -{ - int i; - float e; - float ac[LPC_ORDER+1]; - float rc[LPC_ORDER]; - float Xr[FREQ_SIZE]; - kiss_fft_cpx X_auto[FREQ_SIZE]; - float x_auto[FRAME_SIZE]; - interp_band_gain(Xr, Ex); - RNN_CLEAR(X_auto, FREQ_SIZE); - for (i=0;i<160;i++) X_auto[i].r = Xr[i]; - inverse_transform(x_auto, X_auto); - for (i=0;i<LPC_ORDER+1;i++) ac[i] = x_auto[i]; - - /* -40 dB noise floor. */ - ac[0] += ac[0]*1e-4 + 320/12/38.; - /* Lag windowing. */ - for (i=1;i<LPC_ORDER+1;i++) ac[i] *= (1 - 6e-5*i*i); - e = _celt_lpc(lpc, rc, ac, LPC_ORDER); - return e; -} - -float lpc_from_cepstrum(float *lpc, const float *cepstrum) -{ - int i; - float Ex[NB_BANDS]; - float tmp[NB_BANDS]; - RNN_COPY(tmp, cepstrum, NB_BANDS); - tmp[0] += 4; - idct(Ex, tmp); - for (i=0;i<NB_BANDS;i++) Ex[i] = pow(10.f, Ex[i]); - return lpc_from_bands(lpc, Ex); -} - -#if TRAINING - int lowpass = FREQ_SIZE; int band_lp = NB_BANDS; -static void apply_window(float *x) { - int i; - check_init(); - for (i=0;i<FRAME_SIZE;i++) { - x[i] *= common.half_window[i]; - x[WINDOW_SIZE - 1 - i] *= common.half_window[i]; - } -} - static void frame_analysis(DenoiseState *st, kiss_fft_cpx *X, float *Ex, const float *in) { int i; float x[WINDOW_SIZE]; @@ -575,4 +334,3 @@ int main(int argc, char **argv) { return 0; } -#endif diff --git a/dnn/freq.c b/dnn/freq.c new file mode 100644 index 0000000000000000000000000000000000000000..8f7e5ed35fd59f817001246cea4d26930f0c7292 --- /dev/null +++ b/dnn/freq.c @@ -0,0 +1,247 @@ +/* Copyright (c) 2017-2018 Mozilla */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include "kiss_fft.h" +#include "common.h" +#include <math.h> +#include "freq.h" +#include "pitch.h" +#include "arch.h" +#include "celt_lpc.h" +#include <assert.h> + +#define SQUARE(x) ((x)*(x)) + +static const opus_int16 eband5ms[] = { +/*0 200 400 600 800 1k 1.2 1.4 1.6 2k 2.4 2.8 3.2 4k 4.8 5.6 6.8 8k*/ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 34, 40 +}; + + +typedef struct { + int init; + kiss_fft_state *kfft; + float half_window[FRAME_SIZE]; + float dct_table[NB_BANDS*NB_BANDS]; +} CommonState; + + + +void compute_band_energy(float *bandE, const kiss_fft_cpx *X) { + int i; + float sum[NB_BANDS] = {0}; + for (i=0;i<NB_BANDS-1;i++) + { + int j; + int band_size; + band_size = (eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT; + for (j=0;j<band_size;j++) { + float tmp; + float frac = (float)j/band_size; + tmp = SQUARE(X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].r); + tmp += SQUARE(X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].i); + sum[i] += (1-frac)*tmp; + sum[i+1] += frac*tmp; + } + } + sum[0] *= 2; + sum[NB_BANDS-1] *= 2; + for (i=0;i<NB_BANDS;i++) + { + bandE[i] = sum[i]; + } +} + +void compute_band_corr(float *bandE, const kiss_fft_cpx *X, const kiss_fft_cpx *P) { + int i; + float sum[NB_BANDS] = {0}; + for (i=0;i<NB_BANDS-1;i++) + { + int j; + int band_size; + band_size = (eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT; + for (j=0;j<band_size;j++) { + float tmp; + float frac = (float)j/band_size; + tmp = X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].r * P[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].r; + tmp += X[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].i * P[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j].i; + sum[i] += (1-frac)*tmp; + sum[i+1] += frac*tmp; + } + } + sum[0] *= 2; + sum[NB_BANDS-1] *= 2; + for (i=0;i<NB_BANDS;i++) + { + bandE[i] = sum[i]; + } +} + +void interp_band_gain(float *g, const float *bandE) { + int i; + memset(g, 0, FREQ_SIZE); + for (i=0;i<NB_BANDS-1;i++) + { + int j; + int band_size; + band_size = (eband5ms[i+1]-eband5ms[i])<<FRAME_SIZE_SHIFT; + for (j=0;j<band_size;j++) { + float frac = (float)j/band_size; + g[(eband5ms[i]<<FRAME_SIZE_SHIFT) + j] = (1-frac)*bandE[i] + frac*bandE[i+1]; + } + } +} + +CommonState common; + +static void check_init() { + int i; + if (common.init) return; + common.kfft = opus_fft_alloc_twiddles(2*FRAME_SIZE, NULL, NULL, NULL, 0); + for (i=0;i<FRAME_SIZE;i++) + common.half_window[i] = sin(.5*M_PI*sin(.5*M_PI*(i+.5)/FRAME_SIZE) * sin(.5*M_PI*(i+.5)/FRAME_SIZE)); + for (i=0;i<NB_BANDS;i++) { + int j; + for (j=0;j<NB_BANDS;j++) { + common.dct_table[i*NB_BANDS + j] = cos((i+.5)*j*M_PI/NB_BANDS); + if (j==0) common.dct_table[i*NB_BANDS + j] *= sqrt(.5); + } + } + common.init = 1; +} + +void dct(float *out, const float *in) { + int i; + check_init(); + for (i=0;i<NB_BANDS;i++) { + int j; + float sum = 0; + for (j=0;j<NB_BANDS;j++) { + sum += in[j] * common.dct_table[j*NB_BANDS + i]; + } + out[i] = sum*sqrt(2./NB_BANDS); + } +} + +void idct(float *out, const float *in) { + int i; + check_init(); + for (i=0;i<NB_BANDS;i++) { + int j; + float sum = 0; + for (j=0;j<NB_BANDS;j++) { + sum += in[j] * common.dct_table[i*NB_BANDS + j]; + } + out[i] = sum*sqrt(2./NB_BANDS); + } +} + +void forward_transform(kiss_fft_cpx *out, const float *in) { + int i; + kiss_fft_cpx x[WINDOW_SIZE]; + kiss_fft_cpx y[WINDOW_SIZE]; + check_init(); + for (i=0;i<WINDOW_SIZE;i++) { + x[i].r = in[i]; + x[i].i = 0; + } + opus_fft(common.kfft, x, y, 0); + for (i=0;i<FREQ_SIZE;i++) { + out[i] = y[i]; + } +} + +void inverse_transform(float *out, const kiss_fft_cpx *in) { + int i; + kiss_fft_cpx x[WINDOW_SIZE]; + kiss_fft_cpx y[WINDOW_SIZE]; + check_init(); + for (i=0;i<FREQ_SIZE;i++) { + x[i] = in[i]; + } + for (;i<WINDOW_SIZE;i++) { + x[i].r = x[WINDOW_SIZE - i].r; + x[i].i = -x[WINDOW_SIZE - i].i; + } + opus_fft(common.kfft, x, y, 0); + /* output in reverse order for IFFT. */ + out[0] = WINDOW_SIZE*y[0].r; + for (i=1;i<WINDOW_SIZE;i++) { + out[i] = WINDOW_SIZE*y[WINDOW_SIZE - i].r; + } +} + +float lpc_from_bands(float *lpc, const float *Ex) +{ + int i; + float e; + float ac[LPC_ORDER+1]; + float rc[LPC_ORDER]; + float Xr[FREQ_SIZE]; + kiss_fft_cpx X_auto[FREQ_SIZE]; + float x_auto[FRAME_SIZE]; + interp_band_gain(Xr, Ex); + RNN_CLEAR(X_auto, FREQ_SIZE); + for (i=0;i<160;i++) X_auto[i].r = Xr[i]; + inverse_transform(x_auto, X_auto); + for (i=0;i<LPC_ORDER+1;i++) ac[i] = x_auto[i]; + + /* -40 dB noise floor. */ + ac[0] += ac[0]*1e-4 + 320/12/38.; + /* Lag windowing. */ + for (i=1;i<LPC_ORDER+1;i++) ac[i] *= (1 - 6e-5*i*i); + e = _celt_lpc(lpc, rc, ac, LPC_ORDER); + return e; +} + +float lpc_from_cepstrum(float *lpc, const float *cepstrum) +{ + int i; + float Ex[NB_BANDS]; + float tmp[NB_BANDS]; + RNN_COPY(tmp, cepstrum, NB_BANDS); + tmp[0] += 4; + idct(Ex, tmp); + for (i=0;i<NB_BANDS;i++) Ex[i] = pow(10.f, Ex[i]); + return lpc_from_bands(lpc, Ex); +} + +void apply_window(float *x) { + int i; + check_init(); + for (i=0;i<FRAME_SIZE;i++) { + x[i] *= common.half_window[i]; + x[WINDOW_SIZE - 1 - i] *= common.half_window[i]; + } +} + diff --git a/dnn/freq.h b/dnn/freq.h index 562b34cb4c3d7e3207b6d6342f26e98cfe2e028c..da34272b727e97dcdf6273b4020dab3e9c2496c6 100644 --- a/dnn/freq.h +++ b/dnn/freq.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2017 Mozilla */ +/* Copyright (c) 2017-2018 Mozilla */ /* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions @@ -24,6 +24,16 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#define PREEMPHASIS (0.85f) + +#define FRAME_SIZE_SHIFT 2 +#define FRAME_SIZE (40<<FRAME_SIZE_SHIFT) +#define WINDOW_SIZE (2*FRAME_SIZE) +#define FREQ_SIZE (FRAME_SIZE + 1) + +#define NB_BANDS 18 + + #ifndef RNNOISE_EXPORT # if defined(WIN32) # if defined(RNNOISE_BUILD) && defined(DLL_EXPORT) @@ -50,3 +60,16 @@ RNNOISE_EXPORT DenoiseState *rnnoise_create(); RNNOISE_EXPORT void rnnoise_destroy(DenoiseState *st); RNNOISE_EXPORT float rnnoise_process_frame(DenoiseState *st, float *out, const float *in); + +void compute_band_energy(float *bandE, const kiss_fft_cpx *X); +void compute_band_corr(float *bandE, const kiss_fft_cpx *X, const kiss_fft_cpx *P); + +void apply_window(float *x); +void dct(float *out, const float *in); +void idct(float *out, const float *in); +void forward_transform(kiss_fft_cpx *out, const float *in); +void inverse_transform(float *out, const kiss_fft_cpx *in); +float lpc_from_bands(float *lpc, const float *Ex); +float lpc_from_cepstrum(float *lpc, const float *cepstrum); +void apply_window(float *x); +