diff --git a/silk/control_codec.c b/silk/control_codec.c index b1d3bab9fb1d10fbf0d902d073b12ff957a18025..64b109152641a3810cfd793462bbd3ac906ff1fb 100644 --- a/silk/control_codec.c +++ b/silk/control_codec.c @@ -37,6 +37,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif #include "tuning_parameters.h" + +static const int enc_delay_matrix[3][5] = { +/*SILK API 8 12 16 24 48 */ +/* 8 */ {5, 0, 3, 4, 8}, +/*12 */ {0, 6, 0, 0, 0}, +/*16 */ {4, 5, 11, 5, 18} +}; + opus_int silk_setup_resamplers( silk_encoder_state_Fxx *psEnc, /* I/O */ opus_int fs_kHz /* I */ @@ -235,6 +243,9 @@ opus_int silk_setup_fs( psEnc->sCmn.TargetRate_bps = 0; /* trigger new SNR computation */ } + psEnc->sCmn.delay = enc_delay_matrix[rateID(fs_kHz*1000)][rateID(psEnc->sCmn.API_fs_Hz)]; + silk_assert(psEnc->sCmn.delay <= MAX_ENCODER_DELAY); + /* Set internal sampling frequency */ silk_assert( fs_kHz == 8 || fs_kHz == 12 || fs_kHz == 16 ); silk_assert( psEnc->sCmn.nb_subfr == 2 || psEnc->sCmn.nb_subfr == 4 ); diff --git a/silk/dec_API.c b/silk/dec_API.c index 675bfb99657008185b51a4394da44efde7107155..5e676932e07a6e0beb76f3a2d2dc37a8fd85abd6 100644 --- a/silk/dec_API.c +++ b/silk/dec_API.c @@ -31,6 +31,14 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "API.h" #include "main.h" +static const int dec_delay_matrix[3][5] = { +/*SILK API 8 12 16 24 48 */ +/* 8 */ {3, 0, 2, 0, 0}, +/*12 */ {0, 8, 5, 7, 5}, +/*16 */ {0, 0, 8, 5, 5} +}; + + /************************/ /* Decoder Super Struct */ /************************/ @@ -82,12 +90,15 @@ opus_int silk_Decode( { opus_int i, n, prev_fs_kHz, decode_only_middle = 0, ret = SILK_NO_ERROR; opus_int32 nSamplesOutDec, LBRR_symbol; - opus_int16 samplesOut1_tmp[ 2 ][ MAX_FS_KHZ * MAX_FRAME_LENGTH_MS + 2 ]; + opus_int16 samplesOut1_tmp[ 2 ][ MAX_FS_KHZ * MAX_FRAME_LENGTH_MS + 2 + MAX_DECODER_DELAY ]; opus_int16 samplesOut2_tmp[ MAX_API_FS_KHZ * MAX_FRAME_LENGTH_MS ]; opus_int32 MS_pred_Q13[ 2 ] = { 0 }; opus_int16 *resample_out_ptr; silk_decoder *psDec = ( silk_decoder * )decState; silk_decoder_state *channel_state = psDec->channel_state; + int delay; + + delay = channel_state[ 0 ].delay; /**********************************/ /* Test if first frame in payload */ @@ -106,6 +117,7 @@ opus_int silk_Decode( ret += silk_init_decoder( &channel_state[ 1 ] ); if( psDec->nChannelsAPI == 2 ) { silk_memcpy( &channel_state[ 1 ].resampler_state, &channel_state[ 0 ].resampler_state, sizeof( silk_resampler_state_struct ) ); + silk_memcpy( &channel_state[ 1 ].delayBuf, &channel_state[ 0 ].delayBuf, MAX_DECODER_DELAY*sizeof(opus_int16)); } } @@ -143,9 +155,12 @@ opus_int silk_Decode( /* Initialize resampler when switching internal or external sampling frequency */ if( prev_fs_kHz != channel_state[ 0 ].fs_kHz || channel_state[ 0 ].prev_API_sampleRate != decControl->API_sampleRate ) { + channel_state[ 0 ].delay = dec_delay_matrix[rateID(silk_SMULBB( channel_state[ 0 ].fs_kHz, 1000 ))][rateID(decControl->API_sampleRate)]; + silk_assert(channel_state[ 0 ].delay <= MAX_DECODER_DELAY); ret = silk_resampler_init( &channel_state[ 0 ].resampler_state, silk_SMULBB( channel_state[ 0 ].fs_kHz, 1000 ), decControl->API_sampleRate ); if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 ) { silk_memcpy( &channel_state[ 1 ].resampler_state, &channel_state[ 0 ].resampler_state, sizeof( silk_resampler_state_struct ) ); + channel_state[ 1 ].delay = channel_state[ 0 ].delay; } } channel_state[ 0 ].prev_API_sampleRate = decControl->API_sampleRate; @@ -230,19 +245,19 @@ opus_int silk_Decode( /* Call decoder for one frame */ for( n = 0; n < decControl->nChannelsInternal; n++ ) { if( n == 0 || decode_only_middle == 0 ) { - ret += silk_decode_frame( &channel_state[ n ], psRangeDec, &samplesOut1_tmp[ n ][ 2 ], &nSamplesOutDec, lostFlag ); + ret += silk_decode_frame( &channel_state[ n ], psRangeDec, &samplesOut1_tmp[ n ][ 2 + delay ], &nSamplesOutDec, lostFlag ); } else { - silk_memset( &samplesOut1_tmp[ n ][ 2 ], 0, nSamplesOutDec * sizeof( opus_int16 ) ); + silk_memset( &samplesOut1_tmp[ n ][ 2 + delay ], 0, nSamplesOutDec * sizeof( opus_int16 ) ); } } if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 ) { /* Convert Mid/Side to Left/Right */ - silk_stereo_MS_to_LR( &psDec->sStereo, samplesOut1_tmp[ 0 ], samplesOut1_tmp[ 1 ], MS_pred_Q13, channel_state[ 0 ].fs_kHz, nSamplesOutDec ); + silk_stereo_MS_to_LR( &psDec->sStereo, &samplesOut1_tmp[ 0 ][delay], &samplesOut1_tmp[ 1 ][delay], MS_pred_Q13, channel_state[ 0 ].fs_kHz, nSamplesOutDec ); } else { /* Buffering */ - silk_memcpy( samplesOut1_tmp[ 0 ], psDec->sStereo.sMid, 2 * sizeof( opus_int16 ) ); - silk_memcpy( psDec->sStereo.sMid, &samplesOut1_tmp[ 0 ][ nSamplesOutDec ], 2 * sizeof( opus_int16 ) ); + silk_memcpy( &samplesOut1_tmp[ 0 ][delay], psDec->sStereo.sMid, 2 * sizeof( opus_int16 ) ); + silk_memcpy( psDec->sStereo.sMid, &samplesOut1_tmp[ 0 ][ nSamplesOutDec + delay ], 2 * sizeof( opus_int16 ) ); } /* Number of output samples */ @@ -256,8 +271,11 @@ opus_int silk_Decode( } for( n = 0; n < silk_min( decControl->nChannelsAPI, decControl->nChannelsInternal ); n++ ) { + + silk_memcpy(&samplesOut1_tmp[ n ][ 1 ], &channel_state[ n ].delayBuf[ MAX_DECODER_DELAY-delay ], delay*sizeof(opus_int16)); /* Resample decoded signal to API_sampleRate */ ret += silk_resampler( &channel_state[ n ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ n ][ 1 ], nSamplesOutDec ); + silk_memcpy(channel_state[ n ].delayBuf, &samplesOut1_tmp[ n ][ 1 + nSamplesOutDec + delay - MAX_DECODER_DELAY ], MAX_DECODER_DELAY*sizeof(opus_int16)); /* Interleave if stereo output and stereo stream */ if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 ) { diff --git a/silk/define.h b/silk/define.h index dcfeb1d311a1e3e19227760708878bae59dcdaf0..c7cbdcf8f9b01752b835438dc9da3e31b88332ca 100644 --- a/silk/define.h +++ b/silk/define.h @@ -86,6 +86,9 @@ extern "C" #define MAX_FRAME_LENGTH_MS ( SUB_FRAME_LENGTH_MS * MAX_NB_SUBFR ) #define MAX_FRAME_LENGTH ( MAX_FRAME_LENGTH_MS * MAX_FS_KHZ ) +#define MAX_ENCODER_DELAY 18 +#define MAX_DECODER_DELAY 8 + /* Milliseconds of lookahead for pitch analysis */ #define LA_PITCH_MS 2 #define LA_PITCH_MAX ( LA_PITCH_MS * MAX_FS_KHZ ) diff --git a/silk/enc_API.c b/silk/enc_API.c index 0fe945b6abe4de05a976ce7aea9c895f2976eae4..403aeccea640a942ef07fb7ad8ae62059113ec37 100644 --- a/silk/enc_API.c +++ b/silk/enc_API.c @@ -138,8 +138,8 @@ opus_int silk_Encode( opus_int speech_act_thr_for_switch_Q8; opus_int32 TargetRate_bps, MStargetRates_bps[ 2 ], channelRate_bps, LBRR_symbol; silk_encoder *psEnc = ( silk_encoder * )encState; - opus_int16 buf[ MAX_FRAME_LENGTH_MS * MAX_API_FS_KHZ ]; - opus_int transition; + opus_int16 buf[ MAX_FRAME_LENGTH_MS * MAX_API_FS_KHZ + MAX_ENCODER_DELAY]; + opus_int transition, delay; psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded = psEnc->state_Fxx[ 1 ].sCmn.nFramesEncoded = 0; @@ -222,6 +222,7 @@ opus_int silk_Encode( } silk_assert( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 0 ].sCmn.fs_kHz == psEnc->state_Fxx[ 1 ].sCmn.fs_kHz ); + delay = psEnc->state_Fxx[ 0 ].sCmn.delay; /* Input buffering/resampling and encoding */ while( 1 ) { nSamplesToBuffer = psEnc->state_Fxx[ 0 ].sCmn.frame_length - psEnc->state_Fxx[ 0 ].sCmn.inputBufIx; @@ -231,12 +232,15 @@ opus_int silk_Encode( if( encControl->nChannelsAPI == 2 && encControl->nChannelsInternal == 2 ) { int id = psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded; for( n = 0; n < nSamplesFromInput; n++ ) { - buf[ n ] = samplesIn[ 2 * n ]; + buf[ n+delay ] = samplesIn[ 2 * n ]; } + silk_memcpy(buf, &psEnc->state_Fxx[ 0 ].sCmn.delayBuf[MAX_ENCODER_DELAY-delay], delay*sizeof(opus_int16)); /* Making sure to start both resamplers from the same state when switching from mono to stereo */ if(psEnc->nPrevChannelsInternal == 1 && id==0) { - silk_memcpy(&psEnc->state_Fxx[ 1 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, sizeof(psEnc->state_Fxx[ 1 ].sCmn.resampler_state)); + silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, sizeof(psEnc->state_Fxx[ 1 ].sCmn.resampler_state)); + silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.delayBuf, &psEnc->state_Fxx[ 0 ].sCmn.delayBuf, MAX_ENCODER_DELAY*sizeof(opus_int16)); } + silk_memcpy(psEnc->state_Fxx[ 0 ].sCmn.delayBuf, buf+nSamplesFromInput+delay-MAX_ENCODER_DELAY, MAX_ENCODER_DELAY*sizeof(opus_int16)); ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput ); @@ -245,23 +249,31 @@ opus_int silk_Encode( nSamplesToBuffer = psEnc->state_Fxx[ 1 ].sCmn.frame_length - psEnc->state_Fxx[ 1 ].sCmn.inputBufIx; nSamplesToBuffer = silk_min( nSamplesToBuffer, 10 * nBlocksOf10ms * psEnc->state_Fxx[ 1 ].sCmn.fs_kHz ); for( n = 0; n < nSamplesFromInput; n++ ) { - buf[ n ] = samplesIn[ 2 * n + 1 ]; + buf[ n+delay ] = samplesIn[ 2 * n + 1 ]; } + silk_memcpy(buf, &psEnc->state_Fxx[ 1 ].sCmn.delayBuf[MAX_ENCODER_DELAY-delay], delay*sizeof(opus_int16)); ret += silk_resampler( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state, &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput ); + silk_memcpy(psEnc->state_Fxx[ 1 ].sCmn.delayBuf, buf+nSamplesFromInput+delay-MAX_ENCODER_DELAY, MAX_ENCODER_DELAY*sizeof(opus_int16)); + psEnc->state_Fxx[ 1 ].sCmn.inputBufIx += nSamplesToBuffer; } else if( encControl->nChannelsAPI == 2 && encControl->nChannelsInternal == 1 ) { /* Combine left and right channels before resampling */ for( n = 0; n < nSamplesFromInput; n++ ) { - buf[ n ] = (opus_int16)silk_RSHIFT_ROUND( samplesIn[ 2 * n ] + samplesIn[ 2 * n + 1 ], 1 ); + buf[ n+delay ] = (opus_int16)silk_RSHIFT_ROUND( samplesIn[ 2 * n ] + samplesIn[ 2 * n + 1 ], 1 ); } + silk_memcpy(buf, &psEnc->state_Fxx[ 0 ].sCmn.delayBuf[MAX_ENCODER_DELAY-delay], delay*sizeof(opus_int16)); ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput ); + silk_memcpy(psEnc->state_Fxx[ 0 ].sCmn.delayBuf, buf+nSamplesFromInput+delay-MAX_ENCODER_DELAY, MAX_ENCODER_DELAY*sizeof(opus_int16)); psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer; } else { silk_assert( encControl->nChannelsAPI == 1 && encControl->nChannelsInternal == 1 ); + silk_memcpy(buf+delay, samplesIn, nSamplesFromInput*sizeof(opus_int16)); + silk_memcpy(buf, &psEnc->state_Fxx[ 0 ].sCmn.delayBuf[MAX_ENCODER_DELAY-delay], delay*sizeof(opus_int16)); ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, - &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], samplesIn, nSamplesFromInput ); + &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput ); + silk_memcpy(psEnc->state_Fxx[ 0 ].sCmn.delayBuf, buf+nSamplesFromInput+delay-MAX_ENCODER_DELAY, MAX_ENCODER_DELAY*sizeof(opus_int16)); psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer; } diff --git a/silk/main.h b/silk/main.h index d7ed22ceb2a0ef442a901ce8fd5706799f28f04f..981c7cab05fcb27801f52a117b55c774c8544600 100644 --- a/silk/main.h +++ b/silk/main.h @@ -43,6 +43,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. /* Uncomment the next line to force a fixed internal sampling rate (independent of what bitrate is used */ /*#define FORCE_INTERNAL_FS_KHZ 16*/ +/* Simple way to make [8000, 12000, 16000, 24000, 48000] to [0,1,2,3,4] */ +#define rateID(R) ( ( ( ((R)>>12) - ((R)>16000) ) >> ((R)>24000) ) - 1 ) /* Convert Left/Right stereo signal to adaptive Mid/Side representation */ void silk_stereo_LR_to_MS( diff --git a/silk/structs.h b/silk/structs.h index eda8173b7d8b0658f9ddcff45e85c43c7fdede67..70c81baeebff1cacc1f7aee7169225fbb32253a3 100644 --- a/silk/structs.h +++ b/silk/structs.h @@ -149,6 +149,7 @@ typedef struct { opus_int minInternal_fs_Hz; /* Minimum internal sampling frequency (Hz) */ opus_int desiredInternal_fs_Hz; /* Soft request for internal sampling frequency (Hz) */ opus_int fs_kHz; /* Internal sampling frequency (kHz) */ + opus_int delay; /* Number of samples of delay to apply */ opus_int nb_subfr; /* Number of 5 ms subframes in a frame */ opus_int frame_length; /* Frame length (samples) */ opus_int subfr_length; /* Subframe length (samples) */ @@ -192,6 +193,7 @@ typedef struct { /* Input/output buffering */ opus_int16 inputBuf[ MAX_FRAME_LENGTH + 2 ]; /* Buffer containing input signal */ + opus_int16 delayBuf[MAX_ENCODER_DELAY]; opus_int inputBufIx; opus_int nFramesPerPacket; opus_int nFramesEncoded; /* Number of frames analyzed in current packet */ @@ -257,6 +259,8 @@ typedef struct { opus_int32 sLPC_Q14[ MAX_SUB_FRAME_LENGTH + MAX_LPC_ORDER ]; opus_int32 exc_Q10[ MAX_FRAME_LENGTH ]; opus_int16 outBuf[ 2 * MAX_FRAME_LENGTH ]; /* Buffer for output signal */ + opus_int16 delayBuf[ MAX_DECODER_DELAY ]; /* Buffer for delaying the SILK output prior to resampling */ + opus_int delay; /* How much decoder delay to add */ opus_int lagPrev; /* Previous Lag */ opus_int8 LastGainIndex; /* Previous gain index */ opus_int fs_kHz; /* Sampling frequency in kHz */ diff --git a/src/opus_encoder.c b/src/opus_encoder.c index ad3279f8813f929e2c901d29a5378af4dc1aac50..1a6e38f14c0f89bddb6eaca4d4f541cd58d9a195 100644 --- a/src/opus_encoder.c +++ b/src/opus_encoder.c @@ -123,6 +123,11 @@ static const opus_int32 mode_thresholds[2][2] = { { 48000, 24000}, /* mono */ { 48000, 24000}, /* stereo */ }; + +static const int celt_delay_table[5] = { +/* API 8 12 16 24 48 */ + 10, 16, 21, 27, 55 +}; int opus_encoder_get_size(int channels) { int silkEncSizeBytes, celtEncSizeBytes; @@ -202,14 +207,8 @@ int opus_encoder_init(OpusEncoder* st, opus_int32 Fs, int channels, int applicat st->encoder_buffer = st->Fs/100; st->delay_compensation = st->Fs/400; - /* This part is meant to compensate for the resampler delay as a function - of the API sampling rate */ - if (st->Fs == 48000) - st->delay_compensation += 23; - else if (st->Fs == 24000) - st->delay_compensation += 15; - else - st->delay_compensation += 2; + + st->delay_compensation += celt_delay_table[rateID(st->Fs)]; st->hybrid_stereo_width_Q14 = 1 << 14; st->variable_HP_smth2_Q15 = silk_LSHIFT( silk_lin2log( VARIABLE_HP_MIN_CUTOFF_HZ ), 8 ); @@ -486,7 +485,7 @@ int opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_size, } #endif - if (st->stream_channels == 1 && st->prev_channels ==2 && st->silk_mode.toMono==0) + if (st->stream_channels == 1 && st->prev_channels ==2 && st->silk_mode.toMono==0) { /* Delay stereo->mono transition by two frames so that SILK can do a smooth downmix */ st->silk_mode.toMono = 1;