From 4d07b1357e1993d7fb69d15f6e5205d7633629f5 Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin <jmvalin@jmvalin.ca> Date: Mon, 6 Jan 2014 17:43:20 -0500 Subject: [PATCH] Reduces the decoder stack use by removing the pcm_silk buffer in fixed-point We only keep when concealing less than 10ms with SILK. --- celt/arch.h | 4 +++ celt/celt.h | 3 ++- celt/celt_decoder.c | 62 ++++++++++++++++++++++++++++++++------------- src/opus_decoder.c | 46 ++++++++++++++++++++------------- 4 files changed, 79 insertions(+), 36 deletions(-) diff --git a/celt/arch.h b/celt/arch.h index 25cc0b779..8c79a66e1 100644 --- a/celt/arch.h +++ b/celt/arch.h @@ -108,6 +108,10 @@ typedef opus_val32 celt_ener; #define ABS16(x) ((x) < 0 ? (-(x)) : (x)) #define ABS32(x) ((x) < 0 ? (-(x)) : (x)) +static OPUS_INLINE opus_int16 SAT16(opus_int32 x) { + return x > 32767 ? 32767 : x < -32768 ? -32768 : (opus_int16)x; +} + #ifdef FIXED_DEBUG #include "fixed_debug.h" #else diff --git a/celt/celt.h b/celt/celt.h index dcc6c7eb3..b1967516d 100644 --- a/celt/celt.h +++ b/celt/celt.h @@ -134,7 +134,8 @@ int celt_decoder_get_size(int channels); int celt_decoder_init(CELTDecoder *st, opus_int32 sampling_rate, int channels); -int celt_decode_with_ec(OpusCustomDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec); +int celt_decode_with_ec(OpusCustomDecoder * OPUS_RESTRICT st, const unsigned char *data, + int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum); #define celt_encoder_ctl opus_custom_encoder_ctl #define celt_decoder_ctl opus_custom_decoder_ctl diff --git a/celt/celt_decoder.c b/celt/celt_decoder.c index 5d0951264..0f92b746a 100644 --- a/celt/celt_decoder.c +++ b/celt/celt_decoder.c @@ -191,7 +191,7 @@ static OPUS_INLINE opus_val16 SIG2WORD16(celt_sig x) static #endif void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, - celt_sig *mem) + celt_sig *mem, int accum) { int c; int Nd; @@ -199,7 +199,10 @@ void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, c opus_val16 coef0; VARDECL(celt_sig, scratch); SAVE_STACK; - +#ifndef FIXED_POINT + (void)accum; + celt_assert(accum==0); +#endif ALLOC(scratch, N, celt_sig); coef0 = coef[0]; Nd = N/downsample; @@ -238,11 +241,24 @@ void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, c apply_downsampling=1; } else { /* Shortcut for the standard (non-custom modes) case */ - for (j=0;j<N;j++) +#ifdef FIXED_POINT + if (accum) { - celt_sig tmp = x[j] + m + VERY_SMALL; - m = MULT16_32_Q15(coef0, tmp); - y[j*C] = SCALEOUT(SIG2WORD16(tmp)); + for (j=0;j<N;j++) + { + celt_sig tmp = x[j] + m + VERY_SMALL; + m = MULT16_32_Q15(coef0, tmp); + y[j*C] = SAT16(ADD32(y[j*C], SCALEOUT(SIG2WORD16(tmp)))); + } + } else +#endif + { + for (j=0;j<N;j++) + { + celt_sig tmp = x[j] + m + VERY_SMALL; + m = MULT16_32_Q15(coef0, tmp); + y[j*C] = SCALEOUT(SIG2WORD16(tmp)); + } } } mem[c] = m; @@ -250,8 +266,17 @@ void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, c if (apply_downsampling) { /* Perform down-sampling */ - for (j=0;j<Nd;j++) - y[j*C] = SCALEOUT(SIG2WORD16(scratch[j*downsample])); +#ifdef FIXED_POINT + if (accum) + { + for (j=0;j<Nd;j++) + y[j*C] = SAT16(ADD32(y[j*C], SCALEOUT(SIG2WORD16(scratch[j*downsample])))); + } else +#endif + { + for (j=0;j<Nd;j++) + y[j*C] = SCALEOUT(SIG2WORD16(scratch[j*downsample])); + } } } while (++c<C); RESTORE_STACK; @@ -378,7 +403,8 @@ static void tf_decode(int start, int end, int isTransient, int *tf_res, int LM, pitch of 480 Hz. */ #define PLC_PITCH_LAG_MIN (100) -static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_RESTRICT pcm, int N, int LM) +static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_RESTRICT pcm, + int N, int LM, int accum) { int c; int i; @@ -680,15 +706,15 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R } while (++c<C); } - deemphasis(out_syn, pcm, N, C, downsample, - mode->preemph, st->preemph_memD); + deemphasis(out_syn, pcm, N, C, downsample, mode->preemph, st->preemph_memD, accum); st->loss_count = loss_count+1; RESTORE_STACK; } -int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec) +int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, + int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum) { int c, i, N; int spread_decision; @@ -803,7 +829,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat if (data == NULL || len<=1) { - celt_decode_lost(st, pcm, N, LM); + celt_decode_lost(st, pcm, N, LM, accum); RESTORE_STACK; return frame_size/st->downsample; } @@ -1030,7 +1056,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat st->rng = dec->rng; /* We reuse freq[] as scratch space for the de-emphasis */ - deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD); + deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, accum); st->loss_count = 0; RESTORE_STACK; if (ec_tell(dec) > 8*len) @@ -1046,7 +1072,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat #ifdef FIXED_POINT int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size) { - return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL); + return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL, 0); } #ifndef DISABLE_FLOAT_API @@ -1063,7 +1089,7 @@ int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char N = frame_size; ALLOC(out, C*N, opus_int16); - ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL); + ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL, 0); if (ret>0) for (j=0;j<C*ret;j++) pcm[j]=out[j]*(1.f/32768.f); @@ -1077,7 +1103,7 @@ int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, float * OPUS_RESTRICT pcm, int frame_size) { - return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL); + return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL, 0); } int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size) @@ -1093,7 +1119,7 @@ int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data N = frame_size; ALLOC(out, C*N, celt_sig); - ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL); + ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL, 0); if (ret>0) for (j=0;j<C*ret;j++) diff --git a/src/opus_decoder.c b/src/opus_decoder.c index 919ba521b..397446f2c 100644 --- a/src/opus_decoder.c +++ b/src/opus_decoder.c @@ -77,12 +77,6 @@ struct OpusDecoder { opus_uint32 rangeFinal; }; -#ifdef FIXED_POINT -static OPUS_INLINE opus_int16 SAT16(opus_int32 x) { - return x > 32767 ? 32767 : x < -32768 ? -32768 : (opus_int16)x; -} -#endif - int opus_decoder_get_size(int channels) { @@ -215,7 +209,7 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, VARDECL(opus_val16, pcm_transition_silk); int pcm_transition_celt_size; VARDECL(opus_val16, pcm_transition_celt); - opus_val16 *pcm_transition; + opus_val16 *pcm_transition=NULL; int redundant_audio_size; VARDECL(opus_val16, redundant_audio); @@ -230,6 +224,7 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, int F2_5, F5, F10, F20; const opus_val16 *window; opus_uint32 redundant_rng = 0; + int celt_accum; ALLOC_STACK; silk_dec = (char*)st+st->silk_dec_offset; @@ -295,6 +290,14 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, } } + /* In fixed-point, we can tell CELT to do the accumulation on top of the + SILK PCM buffer. This saves some stack space. */ +#ifdef FIXED_POINT + celt_accum = (mode != MODE_CELT_ONLY) && (frame_size >= F10); +#else + celt_accum = 0; +#endif + pcm_transition_silk_size = ALLOC_NONE; pcm_transition_celt_size = ALLOC_NONE; if (data!=NULL && st->prev_mode > 0 && ( @@ -325,14 +328,20 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, } /* Don't allocate any memory when in CELT-only mode */ - pcm_silk_size = (mode != MODE_CELT_ONLY) ? IMAX(F10, frame_size)*st->channels : ALLOC_NONE; + pcm_silk_size = (mode != MODE_CELT_ONLY && !celt_accum) ? IMAX(F10, frame_size)*st->channels : ALLOC_NONE; ALLOC(pcm_silk, pcm_silk_size, opus_int16); /* SILK processing */ if (mode != MODE_CELT_ONLY) { int lost_flag, decoded_samples; - opus_int16 *pcm_ptr = pcm_silk; + opus_int16 *pcm_ptr; +#ifdef FIXED_POINT + if (celt_accum) + pcm_ptr = pcm; + else +#endif + pcm_ptr = pcm_silk; if (st->prev_mode==MODE_CELT_ONLY) silk_InitDecoder( silk_dec ); @@ -462,7 +471,7 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, { celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0)); celt_decode_with_ec(celt_dec, data+len, redundancy_bytes, - redundant_audio, F5, NULL); + redundant_audio, F5, NULL, 0); celt_decoder_ctl(celt_dec, OPUS_GET_FINAL_RANGE(&redundant_rng)); } @@ -477,25 +486,28 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, celt_decoder_ctl(celt_dec, OPUS_RESET_STATE); /* Decode CELT */ celt_ret = celt_decode_with_ec(celt_dec, decode_fec ? NULL : data, - len, pcm, celt_frame_size, &dec); + len, pcm, celt_frame_size, &dec, celt_accum); } else { unsigned char silence[2] = {0xFF, 0xFF}; - for (i=0;i<frame_size*st->channels;i++) - pcm[i] = 0; + if (!celt_accum) + { + for (i=0;i<frame_size*st->channels;i++) + pcm[i] = 0; + } /* For hybrid -> SILK transitions, we let the CELT MDCT do a fade-out by decoding a silence frame */ if (st->prev_mode == MODE_HYBRID && !(redundancy && celt_to_silk && st->prev_redundancy) ) { celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0)); - celt_decode_with_ec(celt_dec, silence, 2, pcm, F2_5, NULL); + celt_decode_with_ec(celt_dec, silence, 2, pcm, F2_5, NULL, celt_accum); } } - if (mode != MODE_CELT_ONLY) + if (mode != MODE_CELT_ONLY && !celt_accum) { #ifdef FIXED_POINT for (i=0;i<frame_size*st->channels;i++) - pcm[i] = SAT16(pcm[i] + pcm_silk[i]); + pcm[i] = SAT16(ADD32(pcm[i], pcm_silk[i])); #else for (i=0;i<frame_size*st->channels;i++) pcm[i] = pcm[i] + (opus_val16)((1.f/32768.f)*pcm_silk[i]); @@ -514,7 +526,7 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, celt_decoder_ctl(celt_dec, OPUS_RESET_STATE); celt_decoder_ctl(celt_dec, CELT_SET_START_BAND(0)); - celt_decode_with_ec(celt_dec, data+len, redundancy_bytes, redundant_audio, F5, NULL); + celt_decode_with_ec(celt_dec, data+len, redundancy_bytes, redundant_audio, F5, NULL, 0); celt_decoder_ctl(celt_dec, OPUS_GET_FINAL_RANGE(&redundant_rng)); smooth_fade(pcm+st->channels*(frame_size-F2_5), redundant_audio+st->channels*F2_5, pcm+st->channels*(frame_size-F2_5), F2_5, st->channels, window, st->Fs); -- GitLab