From 2824bd1f666102cff9e904ea8c6496b261960c03 Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin <jmvalin@amazon.com> Date: Wed, 28 Jun 2023 17:01:17 -0400 Subject: [PATCH] Adjusting offsets to synthesize 10 ms at a time Should make synthesis easier in the future --- dnn/lpcnet_plc.c | 10 +++++----- dnn/lpcnet_private.h | 2 +- silk/dred_config.h | 9 ++++++--- src/opus_decoder.c | 2 +- src/opus_encoder.c | 2 +- 5 files changed, 14 insertions(+), 11 deletions(-) diff --git a/dnn/lpcnet_plc.c b/dnn/lpcnet_plc.c index d0067583f..879064aff 100644 --- a/dnn/lpcnet_plc.c +++ b/dnn/lpcnet_plc.c @@ -208,7 +208,7 @@ int lpcnet_plc_update(LPCNetPLCState *st, opus_int16 *pcm) { pcm[i] = (int)floor(.5 + w*pcm[i] + (1-w)*(tmp[i]-delta)); } st->lpcnet = copy; - lpcnet_synthesize_impl(&st->lpcnet, &st->features[0], pcm, FRAME_SIZE-TRAINING_OFFSET, FRAME_SIZE-TRAINING_OFFSET); + /*lpcnet_synthesize_impl(&st->lpcnet, &st->features[0], pcm, FRAME_SIZE-TRAINING_OFFSET, FRAME_SIZE-TRAINING_OFFSET);*/ } else { if (FEATURES_DELAY > 0) st->plc_net = st->plc_copy[FEATURES_DELAY-1]; fec_rewind(st, FEATURES_DELAY); @@ -219,8 +219,8 @@ int lpcnet_plc_update(LPCNetPLCState *st, opus_int16 *pcm) { lpcnet_synthesize_tail_impl(&st->lpcnet, tmp, FRAME_SIZE-TRAINING_OFFSET, FRAME_SIZE-TRAINING_OFFSET); #endif } - OPUS_COPY(st->pcm, &pcm[FRAME_SIZE-TRAINING_OFFSET], TRAINING_OFFSET); - st->pcm_fill = TRAINING_OFFSET; + OPUS_COPY(st->pcm, pcm, FRAME_SIZE); + st->pcm_fill = FRAME_SIZE; } else { OPUS_COPY(&st->pcm[st->pcm_fill], pcm, FRAME_SIZE); st->pcm_fill += FRAME_SIZE; @@ -286,12 +286,12 @@ int lpcnet_plc_conceal(LPCNetPLCState *st, opus_int16 *pcm) { } OPUS_MOVE(&st->plc_copy[1], &st->plc_copy[0], FEATURES_DELAY); st->plc_copy[0] = st->plc_net; - lpcnet_synthesize_tail_impl(&st->lpcnet, pcm, FRAME_SIZE-TRAINING_OFFSET, 0); + /*lpcnet_synthesize_tail_impl(&st->lpcnet, pcm, FRAME_SIZE-TRAINING_OFFSET, 0);*/ if (get_fec_or_pred(st, st->features)) st->loss_count = 0; else st->loss_count++; if (st->loss_count >= 10) st->features[0] = MAX16(-10, st->features[0]+att_table[9] - 2*(st->loss_count-9)); else st->features[0] = MAX16(-10, st->features[0]+att_table[st->loss_count]); - lpcnet_synthesize_impl(&st->lpcnet, &st->features[0], &pcm[FRAME_SIZE-TRAINING_OFFSET], TRAINING_OFFSET, 0); + lpcnet_synthesize_impl(&st->lpcnet, &st->features[0], pcm, FRAME_SIZE, 0); { float x[FRAME_SIZE]; /* FIXME: Can we do better? */ diff --git a/dnn/lpcnet_private.h b/dnn/lpcnet_private.h index 87d142a14..da048e7e8 100644 --- a/dnn/lpcnet_private.h +++ b/dnn/lpcnet_private.h @@ -62,7 +62,7 @@ struct LPCNetEncState{ float burg_cepstrum[2*NB_BANDS]; }; -#define PLC_BUF_SIZE (FEATURES_DELAY*FRAME_SIZE + TRAINING_OFFSET) +#define PLC_BUF_SIZE (FEATURES_DELAY*FRAME_SIZE + FRAME_SIZE) struct LPCNetPLCState { PLCModel model; LPCNetState lpcnet; diff --git a/silk/dred_config.h b/silk/dred_config.h index ccaf76dbb..c190b5545 100644 --- a/silk/dred_config.h +++ b/silk/dred_config.h @@ -35,13 +35,16 @@ #define DRED_NUM_FEATURES 20 #define DRED_LATENT_DIM 80 #define DRED_STATE_DIM 24 -#define DRED_MAX_FRAMES 100 -#define DRED_SILK_ENCODER_DELAY (79+12) +#define DRED_SILK_ENCODER_DELAY (79+12-80) #define DRED_FRAME_SIZE 160 #define DRED_DFRAME_SIZE (2 * (DRED_FRAME_SIZE)) #define DRED_MAX_DATA_SIZE 1000 #define DRED_ENC_Q0 9 #define DRED_ENC_Q1 15 -#define DRED_NUM_REDUNDANCY_FRAMES 50 + +/* Covers 1.04 second so we can cover one second, after the lookahead. */ +#define DRED_MAX_LATENTS 26 +#define DRED_NUM_REDUNDANCY_FRAMES (2*DRED_MAX_LATENTS) +#define DRED_MAX_FRAMES (4*DRED_MAX_LATENTS) #endif diff --git a/src/opus_decoder.c b/src/opus_decoder.c index 88571157d..c45facac4 100644 --- a/src/opus_decoder.c +++ b/src/opus_decoder.c @@ -673,7 +673,7 @@ int opus_decode_native(OpusDecoder *st, const unsigned char *data, /* if blend==0, the last PLC call was "update" and we need to feed two extra 10-ms frames. */ if (st->lpcnet.blend == 0) needed_feature_frames+=2; for (i=0;i<needed_feature_frames;i++) { - int feature_offset = (needed_feature_frames-i-1 + (dred_offset/(st->Fs/100)-2)); + int feature_offset = (needed_feature_frames-i-1 + (dred_offset/(st->Fs/100)-1)); if (feature_offset <= 4*dred->nb_latents-1 && feature_offset >= 0) { lpcnet_plc_fec_add(&st->lpcnet, dred->fec_features+feature_offset*DRED_NUM_FEATURES); } else { diff --git a/src/opus_encoder.c b/src/opus_encoder.c index 9f9f3fe33..b8e0f92aa 100644 --- a/src/opus_encoder.c +++ b/src/opus_encoder.c @@ -2205,7 +2205,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ unsigned char buf[DRED_MAX_DATA_SIZE]; int dred_chunks; int dred_bytes_left; - dred_chunks = IMIN(st->dred_duration/4, DRED_NUM_REDUNDANCY_FRAMES/2); + dred_chunks = IMIN((st->dred_duration+5)/4, DRED_NUM_REDUNDANCY_FRAMES/2); dred_bytes_left = IMIN(DRED_MAX_DATA_SIZE, max_data_bytes-ret-2); /* Check whether we actually have something to encode. */ if (dred_chunks >= 1 && dred_bytes_left >= DRED_MIN_BYTES+2) { -- GitLab