Commit 88875669 authored by Koen Vos's avatar Koen Vos Committed by Jean-Marc Valin
Browse files

SILK update

Simplifies mono/stereo switching in SILK
Fixes a quantization mismatch between encoder and decoder
Constrains the pitch lags in the same way in the encoder and decoder
parent 480ba703
......@@ -91,10 +91,10 @@ void silk_decode_core(
/* Preload LPC coeficients to array on stack. Gives small performance gain */
silk_memcpy( A_Q12_tmp, A_Q12, psDec->LPC_order * sizeof( opus_int16 ) );
B_Q14 = &psDecCtrl->LTPCoef_Q14[ k * LTP_ORDER ];
Gain_Q10 = silk_RSHIFT( psDecCtrl->Gains_Q16[ k ], 6 );
signalType = psDec->indices.signalType;
inv_gain_Q16 = silk_INVERSE32_varQ( Gain_Q10, 26 );
Gain_Q10 = silk_RSHIFT( psDecCtrl->Gains_Q16[ k ], 6 );
inv_gain_Q16 = silk_INVERSE32_varQ( psDecCtrl->Gains_Q16[ k ], 32 );
inv_gain_Q16 = silk_min( inv_gain_Q16, silk_int16_MAX );
/* Calculate Gain adjustment factor */
......
......@@ -67,7 +67,7 @@ void silk_decode_pitch(
}
min_lag = silk_SMULBB( PE_MIN_LAG_MS, Fs_kHz );
max_lag = silk_SMULBB( PE_MAX_LAG_MS, Fs_kHz );
max_lag = silk_SMULBB( PE_MAX_LAG_MS, Fs_kHz ) - 1;
lag = min_lag + lagIndex;
for( k = 0; k < nb_subfr; k++ ) {
......
......@@ -119,44 +119,6 @@ opus_int silk_QueryEncoder(
return ret;
}
static void stereo_crossmix(const opus_int16 *in, opus_int16 *out, int channel, int len, int to_mono, int id)
{
int i;
opus_int16 delta, g1, g2;
const opus_int16 *x1, *x2;
x1 = in+channel;
x2 = in+(1-channel);
g1 = to_mono ? 16384: 8192;
g2 = to_mono ? 0 : 8192;
/* We want to finish at 0.5 */
delta = (16384+(len>>1))/(len);
if (to_mono) {
delta = -delta;
}
i=0;
if (to_mono != 2)
{
if ( id==0 ) {
for ( ; i < len>>1; i++ ) {
out[ i ] = silk_RSHIFT_ROUND( silk_SMLABB( silk_SMULBB( x1[ 2*i ], g1 ), x2[ 2*i ], g2 ), 14 );
g1 += delta;
g2 -= delta;
}
}
}
if (to_mono) {
for ( ; i < len; i++ ) {
out[ i ] = silk_RSHIFT( (opus_int32)x1[ 2*i ] + (opus_int32)x2[ 2*i ], 1 );
}
} else {
for ( ; i < len; i++ ) {
out[ i ] = x1[ 2*i ];
}
}
}
/**************************/
/* Encode frame with Silk */
......@@ -268,18 +230,13 @@ opus_int silk_Encode(
/* Resample and write to buffer */
if( encControl->nChannelsAPI == 2 && encControl->nChannelsInternal == 2 ) {
int id = psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded;
if ( encControl->toMono > 0) {
stereo_crossmix( samplesIn, buf, 0, nSamplesFromInput, encControl->toMono, id );
} else if( psEnc->nPrevChannelsInternal == 1 || encControl->toMono == -1 ) {
stereo_crossmix( samplesIn, buf, 0, nSamplesFromInput, 0, id );
} else {
for( n = 0; n < nSamplesFromInput; n++ ) {
buf[ n ] = samplesIn[ 2 * n ];
}
for( n = 0; n < nSamplesFromInput; n++ ) {
buf[ n ] = samplesIn[ 2 * n ];
}
/* Making sure to start both resamplers from the same state when switching from mono to stereo */
if(psEnc->nPrevChannelsInternal == 1 && id==0)
if(psEnc->nPrevChannelsInternal == 1 && id==0) {
silk_memcpy(&psEnc->state_Fxx[ 1 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, sizeof(psEnc->state_Fxx[ 1 ].sCmn.resampler_state));
}
ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state,
&psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
......@@ -287,14 +244,8 @@ opus_int silk_Encode(
nSamplesToBuffer = psEnc->state_Fxx[ 1 ].sCmn.frame_length - psEnc->state_Fxx[ 1 ].sCmn.inputBufIx;
nSamplesToBuffer = silk_min( nSamplesToBuffer, 10 * nBlocksOf10ms * psEnc->state_Fxx[ 1 ].sCmn.fs_kHz );
if ( encControl->toMono > 0) {
stereo_crossmix( samplesIn, buf, 1, nSamplesFromInput, encControl->toMono, id );
} else if( psEnc->nPrevChannelsInternal == 1 || encControl->toMono == -1) {
stereo_crossmix( samplesIn, buf, 1, nSamplesFromInput, 0, id );
} else {
for( n = 0; n < nSamplesFromInput; n++ ) {
buf[ n ] = samplesIn[ 2 * n + 1 ];
}
for( n = 0; n < nSamplesFromInput; n++ ) {
buf[ n ] = samplesIn[ 2 * n + 1 ];
}
ret += silk_resampler( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state,
&psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );
......@@ -394,9 +345,9 @@ opus_int silk_Encode(
if( encControl->nChannelsInternal == 2 ) {
silk_stereo_LR_to_MS( &psEnc->sStereo, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ 2 ], &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ 2 ],
psEnc->sStereo.predIx[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ], &psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ],
MStargetRates_bps, TargetRate_bps, psEnc->state_Fxx[ 0 ].sCmn.speech_activity_Q8,
MStargetRates_bps, TargetRate_bps, psEnc->state_Fxx[ 0 ].sCmn.speech_activity_Q8, encControl->toMono,
psEnc->state_Fxx[ 0 ].sCmn.fs_kHz, psEnc->state_Fxx[ 0 ].sCmn.frame_length );
if (!prefillFlag) {
if( !prefillFlag ) {
silk_stereo_encode_pred( psRangeEnc, psEnc->sStereo.predIx[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] );
silk_stereo_encode_mid_only( psRangeEnc, psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] );
}
......@@ -437,8 +388,9 @@ opus_int silk_Encode(
flags = silk_LSHIFT( flags, 1 );
flags |= psEnc->state_Fxx[ n ].sCmn.LBRR_flag;
}
if (!prefillFlag)
if( !prefillFlag ) {
ec_enc_patch_initial_bits( psRangeEnc, flags, ( psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket + 1 ) * encControl->nChannelsInternal );
}
/* Return zero bytes if all channels DTXed */
if( psEnc->state_Fxx[ 0 ].sCmn.inDTX && ( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 1 ].sCmn.inDTX ) ) {
......
......@@ -467,17 +467,19 @@ opus_int silk_pitch_analysis_core_FLP( /* O voicing estimate: 0 voiced, 1 unvoic
for( k = 0; k < nb_subfr; k++ ) {
pitch_out[ k ] = lag_new + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size );
pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag, max_lag );
}
*lagIndex = (opus_int16)( lag_new - min_lag );
*contourIndex = (opus_int8)CBimax;
} else {
} else { /* Fs_kHz == 8 */
/* Save Lags and correlation */
silk_assert( CCmax >= 0.0f );
*LTPCorr = (silk_float)sqrt( CCmax / nb_subfr ); /* Output normalized correlation */
for( k = 0; k < nb_subfr; k++ ) {
pitch_out[ k ] = lag + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size );
pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag_8kHz, max_lag_8kHz );
}
*lagIndex = (opus_int16)( lag - min_lag );
*lagIndex = (opus_int16)( lag - min_lag_8kHz );
*contourIndex = (opus_int8)CBimax;
}
silk_assert( *lagIndex >= 0 );
......
......@@ -54,6 +54,7 @@ void silk_stereo_LR_to_MS(
opus_int32 mid_side_rates_bps[], /* O Bitrates for mid and side signals */
opus_int32 total_rate_bps, /* I Total bitrate */
opus_int prev_speech_act_Q8, /* I Speech activity level in previous frame */
opus_int toMono, /* I Last frame before a stereo->mono transition */
opus_int fs_kHz, /* I Sample rate (kHz) */
opus_int frame_length /* I Number of samples */
);
......
......@@ -558,15 +558,17 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 voiced, 1
for( k = 0; k < nb_subfr; k++ ) {
pitch_out[ k ] = lag_new + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size );
pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag, max_lag );
}
*lagIndex = (opus_int16)( lag_new - min_lag);
*contourIndex = (opus_int8)CBimax;
} else {
} else { /* Fs_kHz == 8 */
/* Save Lags and correlation */
CCmax = silk_max( CCmax, 0 );
*LTPCorr_Q15 = (opus_int)silk_SQRT_APPROX( silk_LSHIFT( CCmax, 13 ) ); /* Output normalized correlation */
for( k = 0; k < nb_subfr; k++ ) {
pitch_out[ k ] = lag + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size );
pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag_8kHz, max_lag_8kHz );
}
*lagIndex = (opus_int16)( lag - min_lag_8kHz );
*contourIndex = (opus_int8)CBimax;
......
......@@ -41,6 +41,7 @@ void silk_stereo_LR_to_MS(
opus_int32 mid_side_rates_bps[], /* O Bitrates for mid and side signals */
opus_int32 total_rate_bps, /* I Total bitrate */
opus_int prev_speech_act_Q8, /* I Speech activity level in previous frame */
opus_int toMono, /* I Last frame before a stereo->mono transition */
opus_int fs_kHz, /* I Sample rate (kHz) */
opus_int frame_length /* I Number of samples */
)
......@@ -96,7 +97,7 @@ void silk_stereo_LR_to_MS(
/* Determine bitrate distribution between mid and side, and possibly reduce stereo width */
total_rate_bps -= is10msFrame ? 1200 : 600; /* Subtract approximate bitrate for coding stereo parameters */
if (total_rate_bps < 1 ) {
if( total_rate_bps < 1 ) {
total_rate_bps = 1;
}
min_mid_rate_bps = silk_SMLABB( 2000, fs_kHz, 900 );
......@@ -122,7 +123,13 @@ void silk_stereo_LR_to_MS(
/* At very low bitrates or for inputs that are nearly amplitude panned, switch to panned-mono coding */
*mid_only_flag = 0;
if( state->width_prev_Q14 == 0 &&
if( toMono ) {
/* Last frame before stereo->mono transition; collapse stereo width */
width_Q14 = 0;
pred_Q13[ 0 ] = 0;
pred_Q13[ 1 ] = 0;
silk_stereo_quant_pred( pred_Q13, ix );
} else if( state->width_prev_Q14 == 0 &&
( 8 * total_rate_bps < 13 * min_mid_rate_bps || silk_SMULWB( frac_Q16, state->smth_width_Q14 ) < SILK_FIX_CONST( 0.05, 14 ) ) )
{
/* Code as panned-mono; previous frame already had zero width */
......
......@@ -486,21 +486,13 @@ int opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
}
#endif
if (st->silk_mode.toMono==1 && st->stream_channels==2)
{
/* In case the encoder changes its mind on stereo->mono transition */
st->silk_mode.toMono = -1;
} else if (st->stream_channels == 1 && st->prev_channels ==2 && st->silk_mode.toMono==0)
if (st->stream_channels == 1 && st->prev_channels ==2 && st->silk_mode.toMono==0)
{
/* Delay stereo->mono transition by two frames so that SILK can do a smooth downmix */
st->silk_mode.toMono=1;
st->stream_channels = 2;
} else if (st->stream_channels == 1 && st->prev_channels ==2 && st->silk_mode.toMono==1)
{
st->silk_mode.toMono=2;
st->silk_mode.toMono = 1;
st->stream_channels = 2;
} else {
st->silk_mode.toMono=0;
st->silk_mode.toMono = 0;
}
#ifdef FUZZING
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment