diff --git a/silk/control.h b/silk/control.h index c2f906b8522b4c90a6b57ffffb5e03f516b72428..47afecf62fbb80e4cc4682b1278590ec7ee6c94a 100644 --- a/silk/control.h +++ b/silk/control.h @@ -89,6 +89,9 @@ typedef struct { /* I: Causes a smooth downmix to mono */ opus_int toMono; + /* I: Opus encoder is allowing us to switch bandwidth */ + opus_int opusCanSwitch; + /* O: Internal sampling rate used, in Hertz; 8000/12000/16000 */ opus_int32 internalSampleRate; @@ -100,6 +103,10 @@ typedef struct { /* O: Stereo width */ opus_int stereoWidth_Q14; + + /* O: Tells the Opus encoder we're ready to switch */ + opus_int switchReady; + } silk_EncControlStruct; /**************************************************************************/ diff --git a/silk/control_audio_bandwidth.c b/silk/control_audio_bandwidth.c index 54bf3d4e6ee97bdab7289ed6fb5a9411a905f853..e551e9d67abda7f70404e4a99291fa0c5efc3aec 100644 --- a/silk/control_audio_bandwidth.c +++ b/silk/control_audio_bandwidth.c @@ -34,7 +34,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. /* Control internal sampling rate */ opus_int silk_control_audio_bandwidth( - silk_encoder_state *psEncC /* I/O Pointer to Silk encoder state */ + silk_encoder_state *psEncC, /* I/O Pointer to Silk encoder state */ + silk_EncControlStruct *encControl /* I: Control structure */ ) { opus_int fs_kHz; @@ -58,7 +59,7 @@ opus_int silk_control_audio_bandwidth( /* Stop transition phase */ psEncC->sLP.mode = 0; } - if( psEncC->allow_bandwidth_switch ) { + if( psEncC->allow_bandwidth_switch || encControl->opusCanSwitch ) { /* Check if we should switch down */ if( silk_SMULBB( psEncC->fs_kHz, 1000 ) > psEncC->desiredInternal_fs_Hz ) { @@ -70,15 +71,19 @@ opus_int silk_control_audio_bandwidth( /* Reset transition filter state */ silk_memset( psEncC->sLP.In_LP_State, 0, sizeof( psEncC->sLP.In_LP_State ) ); } - if( psEncC->sLP.transition_frame_no <= 0 ) { + if (encControl->opusCanSwitch) { /* Stop transition phase */ psEncC->sLP.mode = 0; /* Switch to a lower sample frequency */ fs_kHz = psEncC->fs_kHz == 16 ? 12 : 8; } else { - /* Direction: down (at double speed) */ - psEncC->sLP.mode = -2; + if( psEncC->sLP.transition_frame_no <= 0 ) { + encControl->switchReady = 1; + } else { + /* Direction: down (at double speed) */ + psEncC->sLP.mode = -2; + } } } else @@ -86,7 +91,7 @@ opus_int silk_control_audio_bandwidth( if( silk_SMULBB( psEncC->fs_kHz, 1000 ) < psEncC->desiredInternal_fs_Hz ) { /* Switch up */ - if( psEncC->sLP.mode == 0 ) { + if (encControl->opusCanSwitch) { /* Switch to a higher sample frequency */ fs_kHz = psEncC->fs_kHz == 8 ? 12 : 16; @@ -95,9 +100,17 @@ opus_int silk_control_audio_bandwidth( /* Reset transition filter state */ silk_memset( psEncC->sLP.In_LP_State, 0, sizeof( psEncC->sLP.In_LP_State ) ); + + /* Direction: up */ + psEncC->sLP.mode = 1; + } else { + if( psEncC->sLP.mode == 0 ) { + encControl->switchReady = 1; + } else { + /* Direction: up */ + psEncC->sLP.mode = 1; + } } - /* Direction: up */ - psEncC->sLP.mode = 1; } } } diff --git a/silk/control_codec.c b/silk/control_codec.c index a942336ca41b94dba195a27fe3d06e687a0ebe63..418be4d99e9960192e74130b6736a1ecebad26f9 100644 --- a/silk/control_codec.c +++ b/silk/control_codec.c @@ -104,7 +104,7 @@ opus_int silk_control_encoder( /********************************************/ /* Determine internal sampling rate */ /********************************************/ - fs_kHz = silk_control_audio_bandwidth( &psEnc->sCmn ); + fs_kHz = silk_control_audio_bandwidth( &psEnc->sCmn, encControl ); if (force_fs_kHz) fs_kHz = force_fs_kHz; /********************************************/ diff --git a/silk/enc_API.c b/silk/enc_API.c index 149befa682d5b35add2443e3a092a8c51a913cf4..1320de469e535c5b9767567853d1c8e0140cd10d 100644 --- a/silk/enc_API.c +++ b/silk/enc_API.c @@ -149,6 +149,8 @@ opus_int silk_Encode( return ret; } + encControl->switchReady = 0; + if( encControl->nChannelsInternal > psEnc->nChannelsInternal ) { /* Mono -> Stereo transition: init state of second channel and stereo state */ ret += silk_init_encoder( &psEnc->state_Fxx[ 1 ] ); diff --git a/silk/main.h b/silk/main.h index 1f2f01a78c9464a953b30bdc5ada469568fcb472..f5238136c61c1e5b1a01b3f8003e1bc32c4747ca 100644 --- a/silk/main.h +++ b/silk/main.h @@ -138,7 +138,8 @@ opus_int check_control_input( /* Control internal sampling rate */ opus_int silk_control_audio_bandwidth( - silk_encoder_state *psEncC /* I/O Pointer to Silk encoder state */ + silk_encoder_state *psEncC, /* I/O Pointer to Silk encoder state */ + silk_EncControlStruct *encControl /* I: Control structure */ ); /* Control SNR of redidual quantizer */ diff --git a/src/opus_encoder.c b/src/opus_encoder.c index 03d936777c8a8add08f54a972f35bf9b2ebc67a5..4944ace4253dfdb15a967dde47fd835dcdd82cca 100644 --- a/src/opus_encoder.c +++ b/src/opus_encoder.c @@ -79,6 +79,7 @@ struct OpusEncoder { int prev_channels; int prev_framesize; int bandwidth; + int silk_bw_switch; /* Sampling rate (at the API level) */ int first; opus_val16 delay_buffer[MAX_ENCODER_BUFFER*2]; @@ -466,6 +467,7 @@ int opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_size, int delay_compensation; int frame_rate; opus_int32 max_rate; + int curr_bandwidth; VARDECL(opus_val16, tmp_prefill); ALLOC_STACK; @@ -612,6 +614,13 @@ int opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_size, } } } + if (st->silk_bw_switch) + { + redundancy = 1; + celt_to_silk = 1; + st->silk_bw_switch = 0; + } + if (st->mode != MODE_CELT_ONLY && st->prev_mode == MODE_CELT_ONLY) { silk_EncControlStruct dummy; @@ -685,6 +694,10 @@ int opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_size, if (max_data_bytes < 8000*frame_size / (st->Fs * 8)) st->mode = MODE_CELT_ONLY; + /* CELT mode doesn't support mediumband, use wideband instead */ + if (st->mode == MODE_CELT_ONLY && st->bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) + st->bandwidth = OPUS_BANDWIDTH_WIDEBAND; + /* Can't support higher than wideband for >20 ms frames */ if (frame_size > st->Fs/50 && (st->mode == MODE_CELT_ONLY || st->bandwidth > OPUS_BANDWIDTH_WIDEBAND)) { @@ -736,18 +749,17 @@ int opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_size, RESTORE_STACK; return ret; } - /* CELT mode doesn't support mediumband, use wideband instead */ - if (st->mode == MODE_CELT_ONLY && st->bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) - st->bandwidth = OPUS_BANDWIDTH_WIDEBAND; + + curr_bandwidth = st->bandwidth; /* Chooses the appropriate mode for speech *NEVER* switch to/from CELT-only mode here as this will invalidate some assumptions */ - if (st->mode == MODE_SILK_ONLY && st->bandwidth > OPUS_BANDWIDTH_WIDEBAND) + if (st->mode == MODE_SILK_ONLY && curr_bandwidth > OPUS_BANDWIDTH_WIDEBAND) st->mode = MODE_HYBRID; - if (st->mode == MODE_HYBRID && st->bandwidth <= OPUS_BANDWIDTH_WIDEBAND) + if (st->mode == MODE_HYBRID && curr_bandwidth <= OPUS_BANDWIDTH_WIDEBAND) st->mode = MODE_SILK_ONLY; - /* printf("%d %d %d %d\n", st->bitrate_bps, st->stream_channels, st->mode, st->bandwidth); */ + /* printf("%d %d %d %d\n", st->bitrate_bps, st->stream_channels, st->mode, curr_bandwidth); */ bytes_target = IMIN(max_data_bytes, st->bitrate_bps * frame_size / (st->Fs * 8)) - 1; data += 1; @@ -789,7 +801,7 @@ int opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_size, st->silk_mode.bitRate = st->bitrate_bps - 8*st->Fs/frame_size; if( st->mode == MODE_HYBRID ) { st->silk_mode.bitRate /= st->stream_channels; - if( st->bandwidth == OPUS_BANDWIDTH_SUPERWIDEBAND ) { + if( curr_bandwidth == OPUS_BANDWIDTH_SUPERWIDEBAND ) { if( st->Fs == 100 * frame_size ) { /* 24 kHz, 10 ms */ st->silk_mode.bitRate = ( ( st->silk_mode.bitRate + 2000 + st->use_vbr * 1000 ) * 2 ) / 3; @@ -816,12 +828,12 @@ int opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_size, st->silk_mode.payloadSize_ms = 1000 * frame_size / st->Fs; st->silk_mode.nChannelsAPI = st->channels; st->silk_mode.nChannelsInternal = st->stream_channels; - if (st->bandwidth == OPUS_BANDWIDTH_NARROWBAND) { + if (curr_bandwidth == OPUS_BANDWIDTH_NARROWBAND) { st->silk_mode.desiredInternalSampleRate = 8000; - } else if (st->bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) { + } else if (curr_bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) { st->silk_mode.desiredInternalSampleRate = 12000; } else { - silk_assert( st->mode == MODE_HYBRID || st->bandwidth == OPUS_BANDWIDTH_WIDEBAND ); + silk_assert( st->mode == MODE_HYBRID || curr_bandwidth == OPUS_BANDWIDTH_WIDEBAND ); st->silk_mode.desiredInternalSampleRate = 16000; } if( st->mode == MODE_HYBRID ) { @@ -891,29 +903,37 @@ int opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_size, } if (nBytes==0) { - data[-1] = gen_toc(st->mode, st->Fs/frame_size, st->bandwidth, st->stream_channels); + data[-1] = gen_toc(st->mode, st->Fs/frame_size, curr_bandwidth, st->stream_channels); RESTORE_STACK; return 1; } /* Extract SILK internal bandwidth for signaling in first byte */ if( st->mode == MODE_SILK_ONLY ) { if( st->silk_mode.internalSampleRate == 8000 ) { - st->bandwidth = OPUS_BANDWIDTH_NARROWBAND; + curr_bandwidth = OPUS_BANDWIDTH_NARROWBAND; } else if( st->silk_mode.internalSampleRate == 12000 ) { - st->bandwidth = OPUS_BANDWIDTH_MEDIUMBAND; + curr_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND; } else if( st->silk_mode.internalSampleRate == 16000 ) { - st->bandwidth = OPUS_BANDWIDTH_WIDEBAND; + curr_bandwidth = OPUS_BANDWIDTH_WIDEBAND; } } else { silk_assert( st->silk_mode.internalSampleRate == 16000 ); } + + st->silk_mode.opusCanSwitch = st->silk_mode.switchReady; + if (st->silk_mode.opusCanSwitch) + { + redundancy = 1; + celt_to_silk = 0; + st->silk_bw_switch = 1; + } } /* CELT processing */ { int endband=21; - switch(st->bandwidth) + switch(curr_bandwidth) { case OPUS_BANDWIDTH_NARROWBAND: endband = 13; @@ -1029,6 +1049,9 @@ int opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_size, redundancy = 0; } + if (!redundancy) + st->silk_bw_switch = 0; + if (st->mode != MODE_CELT_ONLY)start_band=17; if (st->mode == MODE_SILK_ONLY) @@ -1101,7 +1124,7 @@ int opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_size, /* Signalling the mode in the first byte */ data--; - data[0] = gen_toc(st->mode, st->Fs/frame_size, st->bandwidth, st->stream_channels); + data[0] = gen_toc(st->mode, st->Fs/frame_size, curr_bandwidth, st->stream_channels); st->rangeFinal = enc.rng ^ redundant_rng;