diff --git a/lpcnet b/lpcnet index 85d12691acd49938f8688479496e669076a78ba7..83e89e7e8b8efeaea9b2c67eb2cb9da8a86e27e6 160000 --- a/lpcnet +++ b/lpcnet @@ -1 +1 @@ -Subproject commit 85d12691acd49938f8688479496e669076a78ba7 +Subproject commit 83e89e7e8b8efeaea9b2c67eb2cb9da8a86e27e6 diff --git a/silk/dred_config.h b/silk/dred_config.h index ff9903bafe9b8177e65b638aa70e4a8f4767600b..ccaf76dbb30de309a5f36cd208b1e0aa833c8420 100644 --- a/silk/dred_config.h +++ b/silk/dred_config.h @@ -36,7 +36,7 @@ #define DRED_LATENT_DIM 80 #define DRED_STATE_DIM 24 #define DRED_MAX_FRAMES 100 -#define DRED_SILK_ENCODER_DELAY 79 +#define DRED_SILK_ENCODER_DELAY (79+12) #define DRED_FRAME_SIZE 160 #define DRED_DFRAME_SIZE (2 * (DRED_FRAME_SIZE)) #define DRED_MAX_DATA_SIZE 1000 diff --git a/silk/dred_encoder.c b/silk/dred_encoder.c index 83815b1f7905f442d1bb536b8c03d7ca6f8cfd3f..e1db96a2dfdbca2db2d9c103e2986be0f3899673 100644 --- a/silk/dred_encoder.c +++ b/silk/dred_encoder.c @@ -41,36 +41,48 @@ #include "celt/entenc.h" #include "dred_decoder.h" +#include "float_cast.h" +#include "os_support.h" -void init_dred_encoder(DREDEnc* enc) +void dred_encoder_reset(DREDEnc* enc) { - memset(enc, 0, sizeof(*enc)); + RNN_CLEAR((char*)&enc->DREDENC_RESET_START, + sizeof(DREDEnc)- + ((char*)&enc->DREDENC_RESET_START - (char*)enc)); + lpcnet_encoder_init(&enc->lpcnet_enc_state); + DRED_rdovae_init_encoder(&enc->rdovae_enc); +} + +void dred_encoder_init(DREDEnc* enc, int Fs, int channels) +{ + enc->Fs = Fs; + enc->channels = channels; #ifndef USE_WEIGHTS_FILE init_rdovaeenc(&enc->model, rdovae_enc_arrays); #endif - lpcnet_encoder_init(&enc->lpcnet_enc_state); - DRED_rdovae_init_encoder(&enc->rdovae_enc); + dred_encoder_reset(enc); } -void dred_process_silk_frame(DREDEnc *enc, const opus_int16 *silk_frame) +void dred_process_silk_frame(DREDEnc *enc, const float *silk_frame) { + int i; float feature_buffer[2 * 36]; float input_buffer[2*DRED_NUM_FEATURES] = {0}; /* delay signal by 79 samples */ - memmove(enc->input_buffer, enc->input_buffer + DRED_DFRAME_SIZE, DRED_SILK_ENCODER_DELAY * sizeof(*enc->input_buffer)); - memcpy(enc->input_buffer + DRED_SILK_ENCODER_DELAY, silk_frame, DRED_DFRAME_SIZE * sizeof(*silk_frame)); + OPUS_MOVE(enc->input_buffer, enc->input_buffer + DRED_DFRAME_SIZE, DRED_SILK_ENCODER_DELAY); + for (i=0;i<DRED_DFRAME_SIZE;i++) enc->input_buffer[DRED_SILK_ENCODER_DELAY+i] = FLOAT2INT16(silk_frame[i]); /* shift latents buffer */ - memmove(enc->latents_buffer + DRED_LATENT_DIM, enc->latents_buffer, (DRED_MAX_FRAMES - 1) * DRED_LATENT_DIM * sizeof(*enc->latents_buffer)); + OPUS_MOVE(enc->latents_buffer + DRED_LATENT_DIM, enc->latents_buffer, (DRED_MAX_FRAMES - 1) * DRED_LATENT_DIM); /* calculate LPCNet features */ - lpcnet_compute_single_frame_features(&enc->lpcnet_enc_state, enc->input_buffer, feature_buffer); - lpcnet_compute_single_frame_features(&enc->lpcnet_enc_state, enc->input_buffer + DRED_FRAME_SIZE, feature_buffer + 36); + lpcnet_compute_single_frame_features_float(&enc->lpcnet_enc_state, enc->input_buffer, feature_buffer); + lpcnet_compute_single_frame_features_float(&enc->lpcnet_enc_state, enc->input_buffer + DRED_FRAME_SIZE, feature_buffer + 36); /* prepare input buffer (discard LPC coefficients) */ - memcpy(input_buffer, feature_buffer, DRED_NUM_FEATURES * sizeof(input_buffer[0])); - memcpy(input_buffer + DRED_NUM_FEATURES, feature_buffer + 36, DRED_NUM_FEATURES * sizeof(input_buffer[0])); + OPUS_COPY(input_buffer, feature_buffer, DRED_NUM_FEATURES); + OPUS_COPY(input_buffer + DRED_NUM_FEATURES, feature_buffer + 36, DRED_NUM_FEATURES); /* run RDOVAE encoder */ DRED_rdovae_encode_dframe(&enc->rdovae_enc, &enc->model, enc->latents_buffer, enc->state_buffer, input_buffer); diff --git a/silk/dred_encoder.h b/silk/dred_encoder.h index 8fcb07d1f31e7aefeb0a7e369a662af3bccd0e3b..414f73df5088c4807342fdf7c2d860aa226d3d32 100644 --- a/silk/dred_encoder.h +++ b/silk/dred_encoder.h @@ -38,7 +38,11 @@ typedef struct { RDOVAEEnc model; - opus_int16 input_buffer[DRED_DFRAME_SIZE + DRED_SILK_ENCODER_DELAY]; + int Fs; + int channels; + +#define DREDENC_RESET_START input_buffer + float input_buffer[DRED_DFRAME_SIZE + DRED_SILK_ENCODER_DELAY]; float latents_buffer[DRED_MAX_FRAMES * DRED_LATENT_DIM]; int latents_buffer_fill; float state_buffer[24]; @@ -47,11 +51,12 @@ typedef struct { } DREDEnc; -void init_dred_encoder(DREDEnc* enc); +void dred_encoder_init(DREDEnc* enc, int Fs, int channels); +void dred_encoder_reset(DREDEnc* enc); void dred_deinit_encoder(DREDEnc *enc); -void dred_process_silk_frame(DREDEnc *enc, const opus_int16 *silk_frame); +void dred_process_silk_frame(DREDEnc *enc, const float *silk_frame); int dred_encode_silk_frame(DREDEnc *enc, unsigned char *buf, int max_chunks, int max_bytes); diff --git a/silk/enc_API.c b/silk/enc_API.c index bdb858afc1ee5302702087edf1db7c161c501fc5..ff3070a1d7dfa6d4d48f344e3a2d29ad584e8484 100644 --- a/silk/enc_API.c +++ b/silk/enc_API.c @@ -468,15 +468,6 @@ opus_int silk_Encode( /* O Returns error co } silk_encode_do_VAD_Fxx( &psEnc->state_Fxx[ 0 ], activity ); -#ifdef ENABLE_NEURAL_FEC - if ( encControl->useDRED ) { - /* DRED Encoder */ - dred_process_silk_frame( &psEnc->state_Fxx[ 0 ].sCmn.dred_encoder, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[0] ); - } else { - psEnc->state_Fxx[ 0 ].sCmn.dred_encoder.latents_buffer_fill = 0; - } -#endif - /* Encode */ for( n = 0; n < encControl->nChannelsInternal; n++ ) { opus_int maxBits, useCBR; diff --git a/silk/init_encoder.c b/silk/init_encoder.c index 50ec22b34f8cb1f0f27fba255a132e86ca1a4f40..78b5ee88b4ae80f14d9779ced3c3f53ff0a00f2e 100644 --- a/silk/init_encoder.c +++ b/silk/init_encoder.c @@ -64,9 +64,5 @@ opus_int silk_init_encoder( /* Initialize Silk VAD */ ret += silk_VAD_Init( &psEnc->sCmn.sVAD ); -#ifdef ENABLE_NEURAL_FEC - /* Initialize DRED Encoder */ - init_dred_encoder( &psEnc->sCmn.dred_encoder ); -#endif return ret; } diff --git a/silk/structs.h b/silk/structs.h index 6a0889eeeaee87bf40b85d503bb55086a51e42ba..2058d4e86d23864f4c597a65689b173e206f8193 100644 --- a/silk/structs.h +++ b/silk/structs.h @@ -235,9 +235,6 @@ typedef struct { opus_int LBRR_GainIncreases; /* Gains increment for coding LBRR frames */ SideInfoIndices indices_LBRR[ MAX_FRAMES_PER_PACKET ]; opus_int8 pulses_LBRR[ MAX_FRAMES_PER_PACKET ][ MAX_FRAME_LENGTH ]; -#ifdef ENABLE_NEURAL_FEC - DREDEnc dred_encoder; -#endif } silk_encoder_state; diff --git a/src/opus_encoder.c b/src/opus_encoder.c index 3764b373caaadbc5be82a725f4cf9232fee2e4d6..c6759c8c733ddf06762c4f5fb57151c3f403f9b4 100644 --- a/src/opus_encoder.c +++ b/src/opus_encoder.c @@ -67,6 +67,9 @@ struct OpusEncoder { int celt_enc_offset; int silk_enc_offset; silk_EncControlStruct silk_mode; +#ifdef ENABLE_NEURAL_FEC + DREDEnc dred_encoder; +#endif int application; int channels; int delay_compensation; @@ -240,6 +243,11 @@ int opus_encoder_init(OpusEncoder* st, opus_int32 Fs, int channels, int applicat celt_encoder_ctl(celt_enc, CELT_SET_SIGNALLING(0)); celt_encoder_ctl(celt_enc, OPUS_SET_COMPLEXITY(st->silk_mode.complexity)); +#ifdef ENABLE_NEURAL_FEC + /* Initialize DRED Encoder */ + dred_encoder_init( &st->dred_encoder, Fs, channels ); +#endif + st->use_vbr = 1; /* Makes constrained VBR the default (safer for real-time use) */ st->vbr_constraint = 1; @@ -1461,6 +1469,10 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ { silk_EncControlStruct dummy; silk_InitEncoder( silk_enc, st->arch, &dummy); +#ifdef ENABLE_NEURAL_FEC + /* Initialize DRED Encoder */ + dred_encoder_reset( &st->dred_encoder ); +#endif prefill=1; } @@ -1814,6 +1826,15 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ } } +#ifdef ENABLE_NEURAL_FEC + if ( st->dred_duration > 0 ) { + /* DRED Encoder */ + dred_process_silk_frame( &st->dred_encoder, &pcm_buf[total_buffer*st->channels] ); + } else { + st->dred_encoder.latents_buffer_fill = 0; + } +#endif + if (prefill) { opus_int32 zero=0; @@ -1888,7 +1909,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ #ifdef ENABLE_NEURAL_FEC /* If we're not in SILK mode, delete all the processed DRED. TODO: Remove this if/when DRED gets encoded for CELT. */ - DREDEnc *dred = &((silk_encoder*)silk_enc)->state_Fxx[0].sCmn.dred_encoder; + DREDEnc *dred = &st->dred_encoder; dred->latents_buffer_fill = 0; #endif } @@ -2196,7 +2217,6 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ unsigned char buf[DRED_MAX_DATA_SIZE]; int dred_chunks; int dred_bytes_left; - DREDEnc *dred = &((silk_encoder*)silk_enc)->state_Fxx[0].sCmn.dred_encoder; dred_chunks = IMIN(st->dred_duration/4, DRED_NUM_REDUNDANCY_FRAMES/2); dred_bytes_left = IMIN(DRED_MAX_DATA_SIZE, max_data_bytes-ret-2); /* Check whether we actually have something to encode. */ @@ -2206,7 +2226,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ These bytes will be removed once extension is finalized. */ buf[0] = 'D'; buf[1] = DRED_VERSION; - dred_bytes = dred_encode_silk_frame(dred, buf+2, dred_chunks, dred_bytes_left-2); + dred_bytes = dred_encode_silk_frame(&st->dred_encoder, buf+2, dred_chunks, dred_bytes_left-2); dred_bytes += 2; celt_assert(dred_bytes <= dred_bytes_left); extension.id = 127; @@ -2763,6 +2783,10 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...) celt_encoder_ctl(celt_enc, OPUS_RESET_STATE); silk_InitEncoder( silk_enc, st->arch, &dummy ); +#ifdef ENABLE_NEURAL_FEC + /* Initialize DRED Encoder */ + dred_encoder_reset( &st->dred_encoder ); +#endif st->stream_channels = st->channels; st->hybrid_stereo_width_Q14 = 1 << 14; st->prev_HB_gain = Q15ONE;