diff --git a/dnn/dump_data.c b/dnn/dump_data.c index 0515352b1cc29f0ee76fb997e1e0533e261c06bd..053ca774e4332dec17c6ddd0fe9c7f58b779c06b 100644 --- a/dnn/dump_data.c +++ b/dnn/dump_data.c @@ -81,22 +81,21 @@ static short float2short(float x) } -void write_audio(LPCNetEncState *st, const short *pcm, const int *noise, FILE *file, int nframes) { - int i, k; - for (k=0;k<nframes;k++) { +void write_audio(LPCNetEncState *st, const short *pcm, const int *noise, FILE *file) { + int i; short data[2*FRAME_SIZE]; for (i=0;i<FRAME_SIZE;i++) { float p=0; float e; int j; - for (j=0;j<LPC_ORDER;j++) p -= st->features[k][NB_BANDS+2+j]*st->sig_mem[j]; - e = lin2ulaw(pcm[k*FRAME_SIZE+i] - p); + for (j=0;j<LPC_ORDER;j++) p -= st->features[NB_BANDS+2+j]*st->sig_mem[j]; + e = lin2ulaw(pcm[i] - p); /* Signal in. */ data[2*i] = float2short(st->sig_mem[0]); /* Signal out. */ - data[2*i+1] = pcm[k*FRAME_SIZE+i]; + data[2*i+1] = pcm[i]; /* Simulate error on excitation. */ - e += noise[k*FRAME_SIZE+i]; + e += noise[i]; e = IMIN(255, IMAX(0, e)); RNN_MOVE(&st->sig_mem[1], &st->sig_mem[0], LPC_ORDER-1); @@ -104,7 +103,6 @@ void write_audio(LPCNetEncState *st, const short *pcm, const int *noise, FILE *f st->exc_mem = e; } fwrite(data, 4*FRAME_SIZE, 1, file); - } } int main(int argc, char **argv) { @@ -124,8 +122,7 @@ int main(int argc, char **argv) { FILE *ffeat; FILE *fpcm=NULL; short pcm[FRAME_SIZE]={0}; - short pcmbuf[FRAME_SIZE*4]={0}; - int noisebuf[FRAME_SIZE*4]={0}; + int noisebuf[FRAME_SIZE]={0}; short tmp[FRAME_SIZE] = {0}; float savedX[FRAME_SIZE] = {0}; float speech_gain=1; @@ -237,18 +234,12 @@ int main(int argc, char **argv) { for (i=0;i<FRAME_SIZE-TRAINING_OFFSET;i++) pcm[i+TRAINING_OFFSET] = float2short(x[i]); compute_frame_features(st, x); - RNN_COPY(&pcmbuf[st->pcount*FRAME_SIZE], pcm, FRAME_SIZE); if (fpcm) { - compute_noise(&noisebuf[st->pcount*FRAME_SIZE], noise_std); + compute_noise(noisebuf, noise_std); } process_single_frame(st, ffeat); - if (fpcm) write_audio(st, pcm, &noisebuf[st->pcount*FRAME_SIZE], fpcm, 1); - st->pcount++; - /* Running on groups of 4 frames. */ - if (st->pcount == 4) { - st->pcount = 0; - } + if (fpcm) write_audio(st, pcm, noisebuf, fpcm); /*if (fpcm) fwrite(pcm, sizeof(short), FRAME_SIZE, fpcm);*/ for (i=0;i<TRAINING_OFFSET;i++) pcm[i] = float2short(x[i+FRAME_SIZE-TRAINING_OFFSET]); old_speech_gain = speech_gain; diff --git a/dnn/lpcnet_enc.c b/dnn/lpcnet_enc.c index 76f9f776134803017053ad315d3d2b14317f0922..f5e33689f980cfd66a5beb1e039eb68f96c9006e 100644 --- a/dnn/lpcnet_enc.c +++ b/dnn/lpcnet_enc.c @@ -99,10 +99,10 @@ void compute_frame_features(LPCNetEncState *st, const float *in) { follow = MAX16(follow-2.5f, Ly[i]); E += Ex[i]; } - dct(st->features[st->pcount], Ly); - st->features[st->pcount][0] -= 4; - lpc_from_cepstrum(st->lpc, st->features[st->pcount]); - for (i=0;i<LPC_ORDER;i++) st->features[st->pcount][NB_BANDS+2+i] = st->lpc[i]; + dct(st->features, Ly); + st->features[0] -= 4; + lpc_from_cepstrum(st->lpc, st->features); + for (i=0;i<LPC_ORDER;i++) st->features[NB_BANDS+2+i] = st->lpc[i]; RNN_MOVE(st->exc_buf, &st->exc_buf[FRAME_SIZE], PITCH_MAX_PERIOD); RNN_COPY(&aligned_in[TRAINING_OFFSET], in, FRAME_SIZE-TRAINING_OFFSET); for (i=0;i<FRAME_SIZE;i++) { @@ -123,12 +123,12 @@ void compute_frame_features(LPCNetEncState *st, const float *in) { celt_pitch_xcorr(&st->exc_buf[PITCH_MAX_PERIOD+off], st->exc_buf+off, xcorr, FRAME_SIZE/2, PITCH_MAX_PERIOD, st->arch); ener0 = celt_inner_prod_c(&st->exc_buf[PITCH_MAX_PERIOD+off], &st->exc_buf[PITCH_MAX_PERIOD+off], FRAME_SIZE/2); ener1 = celt_inner_prod_c(&st->exc_buf[off], &st->exc_buf[off], FRAME_SIZE/2-1); - st->frame_weight[2+2*st->pcount+sub] = ener0; - /*printf("%f\n", st->frame_weight[2+2*st->pcount+sub]);*/ + st->frame_weight[sub] = ener0; + /*printf("%f\n", st->frame_weight[sub]);*/ for (i=0;i<PITCH_MAX_PERIOD;i++) { ener1 += st->exc_buf[i+off+FRAME_SIZE/2-1]*st->exc_buf[i+off+FRAME_SIZE/2-1]; ener = 1 + ener0 + ener1; - st->xc[2+2*st->pcount+sub][i] = 2*xcorr[i] / ener; + st->xc[sub][i] = 2*xcorr[i] / ener; ener1 -= st->exc_buf[i+off]*st->exc_buf[i+off]; } if (1) { @@ -140,18 +140,18 @@ void compute_frame_features(LPCNetEncState *st, const float *in) { float val1=0, val2=0; int j; for (j=0;j<7;j++) { - val1 += st->xc[2+2*st->pcount+sub][i-3+j]*interp[j]; - val2 += st->xc[2+2*st->pcount+sub][i+3-j]*interp[j]; - interpolated[i] = MAX16(st->xc[2+2*st->pcount+sub][i], MAX16(val1, val2)); + val1 += st->xc[sub][i-3+j]*interp[j]; + val2 += st->xc[sub][i+3-j]*interp[j]; + interpolated[i] = MAX16(st->xc[sub][i], MAX16(val1, val2)); } } for (i=4;i<PITCH_MAX_PERIOD-4;i++) { - st->xc[2+2*st->pcount+sub][i] = interpolated[i]; + st->xc[sub][i] = interpolated[i]; } } #if 0 for (i=0;i<PITCH_MAX_PERIOD;i++) - printf("%f ", st->xc[2*st->pcount+sub][i]); + printf("%f ", st->xc[sub][i]); printf("\n"); #endif } @@ -165,14 +165,14 @@ void process_single_frame(LPCNetEncState *st, FILE *ffeat) { int pitch_prev[2][PITCH_MAX_PERIOD]; float frame_corr; float frame_weight_sum = 1e-15f; - for(sub=0;sub<2;sub++) frame_weight_sum += st->frame_weight[2+2*st->pcount+sub]; - for(sub=0;sub<2;sub++) st->frame_weight[2+2*st->pcount+sub] *= (2.f/frame_weight_sum); + for(sub=0;sub<2;sub++) frame_weight_sum += st->frame_weight[sub]; + for(sub=0;sub<2;sub++) st->frame_weight[sub] *= (2.f/frame_weight_sum); for(sub=0;sub<2;sub++) { float max_path_all = -1e15f; best_i = 0; for (i=0;i<PITCH_MAX_PERIOD-2*PITCH_MIN_PERIOD;i++) { - float xc_half = MAX16(MAX16(st->xc[2+2*st->pcount+sub][(PITCH_MAX_PERIOD+i)/2], st->xc[2+2*st->pcount+sub][(PITCH_MAX_PERIOD+i+2)/2]), st->xc[2+2*st->pcount+sub][(PITCH_MAX_PERIOD+i-1)/2]); - if (st->xc[2+2*st->pcount+sub][i] < xc_half*1.1f) st->xc[2+2*st->pcount+sub][i] *= .8f; + float xc_half = MAX16(MAX16(st->xc[sub][(PITCH_MAX_PERIOD+i)/2], st->xc[sub][(PITCH_MAX_PERIOD+i+2)/2]), st->xc[sub][(PITCH_MAX_PERIOD+i-1)/2]); + if (st->xc[sub][i] < xc_half*1.1f) st->xc[sub][i] *= .8f; } for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) { int j; @@ -185,7 +185,7 @@ void process_single_frame(LPCNetEncState *st, FILE *ffeat) { pitch_prev[sub][i] = i+j; } } - st->pitch_max_path[1][i] = max_prev + st->frame_weight[2+2*st->pcount+sub]*st->xc[2+2*st->pcount+sub][i]; + st->pitch_max_path[1][i] = max_prev + st->frame_weight[sub]*st->xc[sub][i]; if (st->pitch_max_path[1][i] > max_path_all) { max_path_all = st->pitch_max_path[1][i]; best_i = i; @@ -204,14 +204,14 @@ void process_single_frame(LPCNetEncState *st, FILE *ffeat) { /* Backward pass. */ for (sub=1;sub>=0;sub--) { best[2+sub] = PITCH_MAX_PERIOD-best_i; - frame_corr += st->frame_weight[2+2*st->pcount+sub]*st->xc[2+2*st->pcount+sub][best_i]; + frame_corr += st->frame_weight[sub]*st->xc[sub][best_i]; best_i = pitch_prev[sub][best_i]; } frame_corr /= 2; - st->features[st->pcount][NB_BANDS] = .01f*(IMAX(66, IMIN(510, best[2]+best[3]))-200); - st->features[st->pcount][NB_BANDS + 1] = frame_corr-.5f; + st->features[NB_BANDS] = .01f*(IMAX(66, IMIN(510, best[2]+best[3]))-200); + st->features[NB_BANDS + 1] = frame_corr-.5f; if (ffeat) { - fwrite(st->features[st->pcount], sizeof(float), NB_TOTAL_FEATURES, ffeat); + fwrite(st->features, sizeof(float), NB_TOTAL_FEATURES, ffeat); } } @@ -229,7 +229,7 @@ static int lpcnet_compute_single_frame_features_impl(LPCNetEncState *st, float * preemphasis(x, &st->mem_preemph, x, PREEMPHASIS, FRAME_SIZE); compute_frame_features(st, x); process_single_frame(st, NULL); - RNN_COPY(features, &st->features[0][0], NB_TOTAL_FEATURES); + RNN_COPY(features, &st->features[0], NB_TOTAL_FEATURES); return 0; } diff --git a/dnn/lpcnet_plc.c b/dnn/lpcnet_plc.c index 61d96b33ef71007156e529c09ab89788330bf6be..6d384a0370c12a472dd484ee692c2fc8eb188712 100644 --- a/dnn/lpcnet_plc.c +++ b/dnn/lpcnet_plc.c @@ -183,7 +183,6 @@ int lpcnet_plc_update(LPCNetPLCState *st, short *pcm) { int delta = 0; for (i=0;i<FRAME_SIZE;i++) x[i] = pcm[i]; burg_cepstral_analysis(plc_features, x); - st->enc.pcount = 0; if (st->skip_analysis) { /*fprintf(stderr, "skip update\n");*/ if (st->blend) { @@ -232,7 +231,7 @@ int lpcnet_plc_update(LPCNetPLCState *st, short *pcm) { compute_frame_features(&st->enc, x); process_single_frame(&st->enc, NULL); if (!st->blend) { - RNN_COPY(&plc_features[2*NB_BANDS], st->enc.features[0], NB_FEATURES); + RNN_COPY(&plc_features[2*NB_BANDS], st->enc.features, NB_FEATURES); plc_features[2*NB_BANDS+NB_FEATURES] = 1; compute_plc_pred(st, st->features, plc_features); /* Discard an FEC frame that we know we will no longer need. */ @@ -243,7 +242,7 @@ int lpcnet_plc_update(LPCNetPLCState *st, short *pcm) { if (st->skip_analysis) { if (st->enable_blending) { /* FIXME: backtrack state, replace features. */ - run_frame_network_deferred(&st->lpcnet, st->enc.features[0]); + run_frame_network_deferred(&st->lpcnet, st->enc.features); } st->skip_analysis--; } else { @@ -251,10 +250,10 @@ int lpcnet_plc_update(LPCNetPLCState *st, short *pcm) { RNN_COPY(output, &st->pcm[0], FRAME_SIZE); #ifdef PLC_SKIP_UPDATES { - run_frame_network_deferred(&st->lpcnet, st->enc.features[0]); + run_frame_network_deferred(&st->lpcnet, st->enc.features); } #else - lpcnet_synthesize_impl(&st->lpcnet, st->enc.features[0], output, FRAME_SIZE, FRAME_SIZE); + lpcnet_synthesize_impl(&st->lpcnet, st->enc.features, output, FRAME_SIZE, FRAME_SIZE); #endif RNN_MOVE(st->pcm, &st->pcm[FRAME_SIZE], PLC_BUF_SIZE); } @@ -268,7 +267,6 @@ int lpcnet_plc_conceal(LPCNetPLCState *st, short *pcm) { int i; short output[FRAME_SIZE]; run_frame_network_flush(&st->lpcnet); - st->enc.pcount = 0; /* If we concealed the previous frame, finish synthesizing the rest of the samples. */ /* FIXME: Copy/predict features. */ while (st->pcm_fill > 0) { diff --git a/dnn/lpcnet_private.h b/dnn/lpcnet_private.h index 753f6b016d525aa10d729aeb03ff553c1de7097d..a0f0f7dac1670229440a6f8c57465ef8a190bab0 100644 --- a/dnn/lpcnet_private.h +++ b/dnn/lpcnet_private.h @@ -45,11 +45,10 @@ struct LPCNetEncState{ int arch; float analysis_mem[OVERLAP_SIZE]; float mem_preemph; - int pcount; float pitch_mem[LPC_ORDER]; float pitch_filt; - float xc[10][PITCH_MAX_PERIOD+1]; - float frame_weight[10]; + float xc[2][PITCH_MAX_PERIOD+1]; + float frame_weight[2]; float exc_buf[PITCH_BUF_SIZE]; float pitch_max_path[2][PITCH_MAX_PERIOD]; float pitch_max_path_all; @@ -58,7 +57,7 @@ struct LPCNetEncState{ int last_period; float lpc[LPC_ORDER]; float vq_mem[NB_BANDS]; - float features[4][NB_TOTAL_FEATURES]; + float features[NB_TOTAL_FEATURES]; float sig_mem[LPC_ORDER]; int exc_mem; float burg_cepstrum[2*NB_BANDS];