diff --git a/dnn/lpcnet_plc.c b/dnn/lpcnet_plc.c index 879064aff318db79bbaf7fc546698961050681a2..761c907517b06a126bf0c1e0de0a427aba77b7fa 100644 --- a/dnn/lpcnet_plc.c +++ b/dnn/lpcnet_plc.c @@ -49,18 +49,15 @@ void lpcnet_plc_reset(LPCNetPLCState *st) { OPUS_CLEAR((char*)&st->LPCNET_PLC_RESET_START, sizeof(LPCNetPLCState)- ((char*)&st->LPCNET_PLC_RESET_START - (char*)st)); - lpcnet_reset(&st->lpcnet); lpcnet_encoder_init(&st->enc); OPUS_CLEAR(st->pcm, PLC_BUF_SIZE); - st->pcm_fill = PLC_BUF_SIZE; - st->skip_analysis = 0; st->blend = 0; st->loss_count = 0; } int lpcnet_plc_init(LPCNetPLCState *st, int options) { int ret; - lpcnet_init(&st->lpcnet); + fargan_init(&st->fargan); lpcnet_encoder_init(&st->enc); if ((options&0x3) == LPCNET_PLC_CAUSAL) { st->enable_blending = 1; @@ -86,7 +83,7 @@ int lpcnet_plc_load_model(LPCNetPLCState *st, const unsigned char *data, int len ret = init_plc_model(&st->model, list); free(list); if (ret == 0) { - return lpcnet_load_model(&st->lpcnet, data, len); + return fargan_load_model(&st->fargan, data, len); } else return -1; } @@ -134,8 +131,6 @@ static void compute_plc_pred(LPCNetPLCState *st, float *out, const float *in) { compute_gruB(&st->model.plc_gru1, zeros, net->plc_gru1_state, dense_out); compute_gruB(&st->model.plc_gru2, zeros, net->plc_gru2_state, net->plc_gru1_state); _lpcnet_compute_dense(&st->model.plc_out, out, net->plc_gru2_state); - /* Artificially boost the correlation to make harmonics cleaner. */ - out[19] = MIN16(.5f, out[19]+.1f); } static int get_fec_or_pred(LPCNetPLCState *st, float *out) { @@ -166,65 +161,18 @@ static void fec_rewind(LPCNetPLCState *st, int offset) { } } -void clear_state(LPCNetPLCState *st) { - OPUS_CLEAR(st->lpcnet.last_sig, LPC_ORDER); - st->lpcnet.last_exc = lin2ulaw(0.f); - st->lpcnet.deemph_mem = 0; - OPUS_CLEAR(st->lpcnet.nnet.gru_a_state, GRU_A_STATE_SIZE); - OPUS_CLEAR(st->lpcnet.nnet.gru_b_state, GRU_B_STATE_SIZE); -} - /* In this causal version of the code, the DNN model implemented by compute_plc_pred() needs to generate two feature vectors to conceal the first lost packet.*/ int lpcnet_plc_update(LPCNetPLCState *st, opus_int16 *pcm) { int i; float x[FRAME_SIZE]; - opus_int16 output[FRAME_SIZE]; float plc_features[2*NB_BANDS+NB_FEATURES+1]; - int delta = 0; for (i=0;i<FRAME_SIZE;i++) x[i] = pcm[i]; burg_cepstral_analysis(plc_features, x); - if (st->skip_analysis) { - /*fprintf(stderr, "skip update\n");*/ - if (st->blend) { - opus_int16 tmp[FRAME_SIZE-TRAINING_OFFSET]; - float zeros[2*NB_BANDS+NB_FEATURES+1] = {0}; - OPUS_COPY(zeros, plc_features, 2*NB_BANDS); - zeros[2*NB_BANDS+NB_FEATURES] = 1; - if (st->enable_blending) { - LPCNetState copy; - st->plc_net = st->plc_copy[FEATURES_DELAY]; - compute_plc_pred(st, st->features, zeros); - for (i=0;i<FEATURES_DELAY;i++) { - /* FIXME: backtrack state, replace features. */ - run_frame_network_deferred(&st->lpcnet, st->features); - } - copy = st->lpcnet; - lpcnet_synthesize_impl(&st->lpcnet, &st->features[0], tmp, FRAME_SIZE-TRAINING_OFFSET, 0); - for (i=0;i<FRAME_SIZE-TRAINING_OFFSET;i++) { - float w; - w = .5 - .5*cos(M_PI*i/(FRAME_SIZE-TRAINING_OFFSET)); - pcm[i] = (int)floor(.5 + w*pcm[i] + (1-w)*(tmp[i]-delta)); - } - st->lpcnet = copy; - /*lpcnet_synthesize_impl(&st->lpcnet, &st->features[0], pcm, FRAME_SIZE-TRAINING_OFFSET, FRAME_SIZE-TRAINING_OFFSET);*/ - } else { - if (FEATURES_DELAY > 0) st->plc_net = st->plc_copy[FEATURES_DELAY-1]; - fec_rewind(st, FEATURES_DELAY); -#ifdef PLC_SKIP_UPDATES - lpcnet_reset_signal(&st->lpcnet); -#else - OPUS_COPY(tmp, pcm, FRAME_SIZE-TRAINING_OFFSET); - lpcnet_synthesize_tail_impl(&st->lpcnet, tmp, FRAME_SIZE-TRAINING_OFFSET, FRAME_SIZE-TRAINING_OFFSET); -#endif - } - OPUS_COPY(st->pcm, pcm, FRAME_SIZE); - st->pcm_fill = FRAME_SIZE; - } else { - OPUS_COPY(&st->pcm[st->pcm_fill], pcm, FRAME_SIZE); - st->pcm_fill += FRAME_SIZE; - } + if (st->blend) { + if (FEATURES_DELAY > 0) st->plc_net = st->plc_copy[FEATURES_DELAY-1]; + fec_rewind(st, FEATURES_DELAY); } /* Update state. */ /*fprintf(stderr, "update state\n");*/ @@ -240,25 +188,11 @@ int lpcnet_plc_update(LPCNetPLCState *st, opus_int16 *pcm) { if (st->fec_skip) st->fec_skip--; else if (st->fec_read_pos < st->fec_fill_pos) st->fec_read_pos++; st->fec_keep_pos = IMAX(0, IMAX(st->fec_keep_pos, st->fec_read_pos-FEATURES_DELAY-1)); + OPUS_MOVE(&st->cont_features[0], &st->cont_features[NB_FEATURES], (CONT_VECTORS-1)*NB_FEATURES); } - if (st->skip_analysis) { - if (st->enable_blending) { - /* FIXME: backtrack state, replace features. */ - run_frame_network_deferred(&st->lpcnet, st->enc.features); - } - st->skip_analysis--; - } else { - for (i=0;i<FRAME_SIZE;i++) st->pcm[PLC_BUF_SIZE+i] = pcm[i]; - OPUS_COPY(output, &st->pcm[0], FRAME_SIZE); -#ifdef PLC_SKIP_UPDATES - { - run_frame_network_deferred(&st->lpcnet, st->enc.features); - } -#else - lpcnet_synthesize_impl(&st->lpcnet, st->enc.features, output, FRAME_SIZE, FRAME_SIZE); -#endif - OPUS_MOVE(st->pcm, &st->pcm[FRAME_SIZE], PLC_BUF_SIZE); - } + OPUS_COPY(&st->cont_features[(CONT_VECTORS-1)*NB_FEATURES], st->enc.features, NB_FEATURES); + OPUS_MOVE(st->pcm, &st->pcm[FRAME_SIZE], FARGAN_CONT_SAMPLES-FRAME_SIZE); + for (i=0;i<FRAME_SIZE;i++) st->pcm[FARGAN_CONT_SAMPLES-FRAME_SIZE+i] = (1.f/32768.f)*pcm[i]; st->loss_count = 0; st->blend = 0; return 0; @@ -267,31 +201,22 @@ int lpcnet_plc_update(LPCNetPLCState *st, opus_int16 *pcm) { static const float att_table[10] = {0, 0, -.2, -.2, -.4, -.4, -.8, -.8, -1.6, -1.6}; int lpcnet_plc_conceal(LPCNetPLCState *st, opus_int16 *pcm) { int i; - opus_int16 output[FRAME_SIZE]; - run_frame_network_flush(&st->lpcnet); - /* If we concealed the previous frame, finish synthesizing the rest of the samples. */ - /* FIXME: Copy/predict features. */ - while (st->pcm_fill > 0) { - /*fprintf(stderr, "update state for PLC %d\n", st->pcm_fill);*/ - int update_count; - update_count = IMIN(st->pcm_fill, FRAME_SIZE); - OPUS_COPY(output, &st->pcm[0], update_count); - OPUS_MOVE(&st->plc_copy[1], &st->plc_copy[0], FEATURES_DELAY); - st->plc_copy[0] = st->plc_net; + if (st->blend == 0) { + get_fec_or_pred(st, st->features); + OPUS_MOVE(&st->cont_features[0], &st->cont_features[NB_FEATURES], (CONT_VECTORS-1)*NB_FEATURES); + OPUS_COPY(&st->cont_features[(CONT_VECTORS-1)*NB_FEATURES], st->features, NB_FEATURES); get_fec_or_pred(st, st->features); - lpcnet_synthesize_impl(&st->lpcnet, &st->features[0], output, update_count, update_count); - OPUS_MOVE(st->pcm, &st->pcm[FRAME_SIZE], PLC_BUF_SIZE); - st->pcm_fill -= update_count; - st->skip_analysis++; + OPUS_MOVE(&st->cont_features[0], &st->cont_features[NB_FEATURES], (CONT_VECTORS-1)*NB_FEATURES); + OPUS_COPY(&st->cont_features[(CONT_VECTORS-1)*NB_FEATURES], st->features, NB_FEATURES); + fargan_cont(&st->fargan, st->pcm, st->cont_features); } OPUS_MOVE(&st->plc_copy[1], &st->plc_copy[0], FEATURES_DELAY); st->plc_copy[0] = st->plc_net; - /*lpcnet_synthesize_tail_impl(&st->lpcnet, pcm, FRAME_SIZE-TRAINING_OFFSET, 0);*/ if (get_fec_or_pred(st, st->features)) st->loss_count = 0; else st->loss_count++; if (st->loss_count >= 10) st->features[0] = MAX16(-10, st->features[0]+att_table[9] - 2*(st->loss_count-9)); else st->features[0] = MAX16(-10, st->features[0]+att_table[st->loss_count]); - lpcnet_synthesize_impl(&st->lpcnet, &st->features[0], pcm, FRAME_SIZE, 0); + fargan_synthesize_int(&st->fargan, pcm, &st->features[0]); { float x[FRAME_SIZE]; /* FIXME: Can we do better? */ @@ -300,6 +225,10 @@ int lpcnet_plc_conceal(LPCNetPLCState *st, opus_int16 *pcm) { compute_frame_features(&st->enc, x); process_single_frame(&st->enc, NULL); } + OPUS_MOVE(&st->cont_features[0], &st->cont_features[NB_FEATURES], (CONT_VECTORS-1)*NB_FEATURES); + OPUS_COPY(&st->cont_features[(CONT_VECTORS-1)*NB_FEATURES], st->enc.features, NB_FEATURES); + OPUS_MOVE(st->pcm, &st->pcm[FRAME_SIZE], FARGAN_CONT_SAMPLES-FRAME_SIZE); + for (i=0;i<FRAME_SIZE;i++) st->pcm[FARGAN_CONT_SAMPLES-FRAME_SIZE+i] = (1.f/32768.f)*pcm[i]; st->blend = 1; return 0; } diff --git a/dnn/lpcnet_private.h b/dnn/lpcnet_private.h index c533cbaf96995381d95f170e3de9e0654ec878b6..4dfcadaaf87f3509d8868e5331c4aa69da854be9 100644 --- a/dnn/lpcnet_private.h +++ b/dnn/lpcnet_private.h @@ -8,6 +8,7 @@ #include "plc_data.h" #include "kiss99.h" #include "pitchdnn.h" +#include "fargan.h" #define PITCH_FRAME_SIZE 320 @@ -19,6 +20,8 @@ #define PITCH_IF_MAX_FREQ 30 #define PITCH_IF_FEATURES (3*PITCH_IF_MAX_FREQ - 2) +#define CONT_VECTORS 5 + struct LPCNetState { LPCNetModel model; int arch; @@ -69,7 +72,7 @@ struct LPCNetEncState{ #define PLC_BUF_SIZE (FEATURES_DELAY*FRAME_SIZE + FRAME_SIZE) struct LPCNetPLCState { PLCModel model; - LPCNetState lpcnet; + FARGANState fargan; LPCNetEncState enc; int arch; int enable_blending; @@ -80,11 +83,10 @@ struct LPCNetPLCState { int fec_read_pos; int fec_fill_pos; int fec_skip; - opus_int16 pcm[PLC_BUF_SIZE+FRAME_SIZE]; - int pcm_fill; - int skip_analysis; + float pcm[FARGAN_CONT_SAMPLES]; int blend; float features[NB_TOTAL_FEATURES]; + float cont_features[CONT_VECTORS*NB_FEATURES]; int loss_count; PLCNetState plc_net; PLCNetState plc_copy[FEATURES_DELAY+1];