diff --git a/dnn/dump_data.c b/dnn/dump_data.c index 0be446d5585e58681ee370695945bff17d5d3942..1da90ee6674f254a9660a1ecd794810d447de1e7 100644 --- a/dnn/dump_data.c +++ b/dnn/dump_data.c @@ -134,6 +134,7 @@ int main(int argc, char **argv) { float noise_std=0; int training = -1; int burg = 0; + int pitch = 0; srand(getpid()); st = lpcnet_encoder_create(); argv0=argv[0]; @@ -145,6 +146,14 @@ int main(int argc, char **argv) { burg = 1; training = 0; } + if (argc == 5 && strcmp(argv[1], "-ptrain")==0) { + pitch = 1; + training = 1; + } + if (argc == 4 && strcmp(argv[1], "-ptest")==0) { + pitch = 1; + training = 0; + } if (argc == 5 && strcmp(argv[1], "-train")==0) training = 1; if (argc == 4 && strcmp(argv[1], "-test")==0) training = 0; if (training == -1) { @@ -239,7 +248,18 @@ int main(int argc, char **argv) { compute_noise(noisebuf, noise_std); } - process_single_frame(st, ffeat); + if (pitch) { + signed char pitch_features[PITCH_MAX_PERIOD-PITCH_MIN_PERIOD+PITCH_IF_FEATURES]; + for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) { + pitch_features[i] = floor(.5 + 127.f*st->xcorr_features[i]); + } + for (i=0;i<PITCH_IF_FEATURES;i++) { + pitch_features[i+PITCH_MAX_PERIOD-PITCH_MIN_PERIOD] = floor(.5 + 127.f*st->if_features[i]); + } + fwrite(pitch_features, PITCH_MAX_PERIOD-PITCH_MIN_PERIOD+PITCH_IF_FEATURES, 1, ffeat); + } else { + process_single_frame(st, ffeat); + } if (fpcm) write_audio(st, pcm, noisebuf, fpcm); /*if (fpcm) fwrite(pcm, sizeof(opus_int16), FRAME_SIZE, fpcm);*/ for (i=0;i<TRAINING_OFFSET;i++) pcm[i] = float2short(x[i+FRAME_SIZE-TRAINING_OFFSET]); diff --git a/dnn/lpcnet_enc.c b/dnn/lpcnet_enc.c index cfd04d2cf34fa2127c187d1f7b658aedf53310a7..7205ddb5cef5db5598bf25abafa67cda100dae1c 100644 --- a/dnn/lpcnet_enc.c +++ b/dnn/lpcnet_enc.c @@ -41,6 +41,7 @@ #include "lpcnet_private.h" #include "lpcnet.h" #include "os_support.h" +#include "_kiss_fft_guts.h" int lpcnet_encoder_get_size() { @@ -104,6 +105,19 @@ void compute_frame_features(LPCNetEncState *st, const float *in) { static const float lp_a[2] = {-1.54220f, 0.70781f}; OPUS_COPY(aligned_in, &st->analysis_mem[OVERLAP_SIZE-TRAINING_OFFSET], TRAINING_OFFSET); frame_analysis(st, X, Ex, in); + st->if_features[0] = MAX16(-1, MIN16(1, (1.f/64)*(10.f*log10(1e-15 + X[0].r*X[0].r)-6))); + for (i=1;i<PITCH_IF_MAX_FREQ;i++) { + kiss_fft_cpx prod; + float norm_1; + C_MULC(prod, X[i], st->prev_if[i]); + norm_1 = 1.f/sqrt(1e-15 + prod.r*prod.r + prod.i*prod.i); + C_MULBYSCALAR(prod, norm_1); + st->if_features[3*i-2] = prod.r; + st->if_features[3*i-1] = prod.i; + st->if_features[3*i] = MAX16(-1, MIN16(1, (1.f/64)*(10.f*log10(1e-15 + X[i].r*X[i].r + X[i].i*X[i].i)-6))); + } + OPUS_COPY(st->prev_if, X, PITCH_IF_MAX_FREQ); + /*for (i=0;i<88;i++) printf("%f ", st->if_features[i]);printf("\n");*/ logMax = -2; follow = -2; for (i=0;i<NB_BANDS;i++) { @@ -133,6 +147,22 @@ void compute_frame_features(LPCNetEncState *st, const float *in) { /*printf("%f\n", st->exc_buf[PITCH_MAX_PERIOD+i]);*/ } biquad(&st->lp_buf[PITCH_MAX_PERIOD], st->lp_mem, &st->lp_buf[PITCH_MAX_PERIOD], lp_b, lp_a, FRAME_SIZE); + { + double ener1; + float *buf = st->exc_buf; + celt_pitch_xcorr(&buf[PITCH_MAX_PERIOD], buf, xcorr, FRAME_SIZE, PITCH_MAX_PERIOD-PITCH_MIN_PERIOD, st->arch); + ener0 = celt_inner_prod_c(&buf[PITCH_MAX_PERIOD], &buf[PITCH_MAX_PERIOD], FRAME_SIZE); + ener1 = celt_inner_prod_c(&buf[0], &buf[0], FRAME_SIZE-1); + /*printf("%f\n", st->frame_weight[sub]);*/ + for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) { + ener1 += buf[i+FRAME_SIZE-1]*buf[i+FRAME_SIZE-1]; + ener = 1 + ener0 + ener1; + st->xcorr_features[i] = 2*xcorr[i] / ener; + ener1 -= buf[i]*buf[i]; + /*printf("%f ", st->xcorr_features[i]);*/ + } + /*printf("\n");*/ + } /* Cross-correlation on half-frames. */ for (sub=0;sub<2;sub++) { int off = sub*FRAME_SIZE/2; diff --git a/dnn/lpcnet_private.h b/dnn/lpcnet_private.h index 0bd8cad332bfe82b509fbd97c13450247aeb26f5..597b487d624d9b997e92ec66f7f82a6f2ef50872 100644 --- a/dnn/lpcnet_private.h +++ b/dnn/lpcnet_private.h @@ -17,6 +17,9 @@ #define PLC_MAX_FEC 100 #define MAX_FEATURE_BUFFER_SIZE 4 +#define PITCH_IF_MAX_FREQ 30 +#define PITCH_IF_FEATURES (3*PITCH_IF_MAX_FREQ - 2) + struct LPCNetState { LPCNetModel model; int arch; @@ -44,6 +47,9 @@ struct LPCNetEncState{ int arch; float analysis_mem[OVERLAP_SIZE]; float mem_preemph; + kiss_fft_cpx prev_if[PITCH_IF_MAX_FREQ]; + float if_features[PITCH_IF_FEATURES]; + float xcorr_features[PITCH_MAX_PERIOD - PITCH_MIN_PERIOD]; float pitch_mem[LPC_ORDER]; float pitch_filt; float xc[2][PITCH_MAX_PERIOD+1];