From cc83f6b87597595f5c1ad57e17a53b8636586867 Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin <jmvalin@jmvalin.ca> Date: Mon, 5 Nov 2012 10:25:20 -0500 Subject: [PATCH] Reduces decoder stack usage Delays stack allocations as much as possible, and makes some of these allocations conditional. --- celt/celt.c | 15 ++++++++++----- src/opus_decoder.c | 39 ++++++++++++++++++++++++++++++++++----- 2 files changed, 44 insertions(+), 10 deletions(-) diff --git a/celt/celt.c b/celt/celt.c index 69b7a6935..7c78947b8 100644 --- a/celt/celt.c +++ b/celt/celt.c @@ -2838,9 +2838,8 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat return frame_size/st->downsample; } - ALLOC(freq, IMAX(CC,C)*N, celt_sig); /**< Interleaved signal MDCTs */ ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */ - ALLOC(bandE, nbEBands*C, celt_ener); + c=0; do for (i=0;i<M*eBands[st->start];i++) X[c*N+i] = 0; @@ -2923,13 +2922,12 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat if (tell+4 <= total_bits) spread_decision = ec_dec_icdf(dec, spread_icdf, 5); - ALLOC(pulses, nbEBands, int); ALLOC(cap, nbEBands, int); - ALLOC(offsets, nbEBands, int); - ALLOC(fine_priority, nbEBands, int); init_caps(mode,cap,LM,C); + ALLOC(offsets, nbEBands, int); + dynalloc_logp = 6; total_bits<<=BITRES; tell = ec_tell_frac(dec); @@ -2968,6 +2966,10 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat bits = (((opus_int32)len*8)<<BITRES) - ec_tell_frac(dec) - 1; anti_collapse_rsv = isTransient&&LM>=2&&bits>=((LM+2)<<BITRES) ? (1<<BITRES) : 0; bits -= anti_collapse_rsv; + + ALLOC(pulses, nbEBands, int); + ALLOC(fine_priority, nbEBands, int); + codedBands = compute_allocation(mode, st->start, st->end, offsets, cap, alloc_trim, &intensity, &dual_stereo, bits, &balance, pulses, fine_quant, fine_priority, C, LM, dec, 0, 0); @@ -2992,6 +2994,8 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat anti_collapse(mode, X, collapse_masks, LM, C, N, st->start, st->end, oldBandE, oldLogE, oldLogE2, pulses, st->rng); + ALLOC(bandE, nbEBands*C, celt_ener); + log2Amp(mode, st->start, st->end, bandE, oldBandE, C); if (silence) @@ -3002,6 +3006,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat oldBandE[i] = -QCONST16(28.f,DB_SHIFT); } } + ALLOC(freq, IMAX(CC,C)*N, celt_sig); /**< Interleaved signal MDCTs */ /* Synthesis */ denormalise_bands(mode, X, freq, bandE, effEnd, C, M); diff --git a/src/opus_decoder.c b/src/opus_decoder.c index be6ae401b..18262322a 100644 --- a/src/opus_decoder.c +++ b/src/opus_decoder.c @@ -200,8 +200,14 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, int i, silk_ret=0, celt_ret=0; ec_dec dec; opus_int32 silk_frame_size; + int pcm_silk_size; VARDECL(opus_int16, pcm_silk); - VARDECL(opus_val16, pcm_transition); + int pcm_transition_silk_size; + VARDECL(opus_val16, pcm_transition_silk); + int pcm_transition_celt_size; + VARDECL(opus_val16, pcm_transition_celt); + opus_val16 *pcm_transition; + int redundant_audio_size; VARDECL(opus_val16, redundant_audio); int audiosize; @@ -274,16 +280,26 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, RESTORE_STACK; return frame_size; } - ALLOC(pcm_transition, F5*st->channels, opus_val16); + pcm_transition_silk_size = 0; + pcm_transition_celt_size = 0; if (data!=NULL && st->prev_mode > 0 && ( (mode == MODE_CELT_ONLY && st->prev_mode != MODE_CELT_ONLY && !st->prev_redundancy) || (mode != MODE_CELT_ONLY && st->prev_mode == MODE_CELT_ONLY) ) ) { transition = 1; + /* Decide where to allocate the stack memory for pcm_transition */ if (mode == MODE_CELT_ONLY) - opus_decode_frame(st, NULL, 0, pcm_transition, IMIN(F5, audiosize), 0); + pcm_transition_celt_size = F5*st->channels; + else + pcm_transition_silk_size = F5*st->channels; + } + ALLOC(pcm_transition_celt, pcm_transition_celt_size, opus_val16); + if (transition && mode == MODE_CELT_ONLY) + { + pcm_transition = pcm_transition_celt; + opus_decode_frame(st, NULL, 0, pcm_transition, IMIN(F5, audiosize), 0); } if (audiosize > frame_size) { @@ -294,8 +310,9 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, frame_size = audiosize; } - ALLOC(pcm_silk, IMAX(F10, frame_size)*st->channels, opus_int16); - ALLOC(redundant_audio, F5*st->channels, opus_val16); + /* Don't allocate any memory when in CELT-only mode */ + pcm_silk_size = (mode != MODE_CELT_ONLY) ? IMAX(F10, frame_size)*st->channels : 0; + ALLOC(pcm_silk, pcm_silk_size, opus_int16); /* SILK processing */ if (mode != MODE_CELT_ONLY) @@ -409,10 +426,22 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data, } if (redundancy) + { transition = 0; + pcm_transition_silk_size=0; + } + + ALLOC(pcm_transition_silk, pcm_transition_silk_size, opus_val16); if (transition && mode != MODE_CELT_ONLY) + { + pcm_transition = pcm_transition_silk; opus_decode_frame(st, NULL, 0, pcm_transition, IMIN(F5, audiosize), 0); + } + + /* Only allocation memory for redundancy if/when needed */ + redundant_audio_size = redundancy ? F5*st->channels : 0; + ALLOC(redundant_audio, redundant_audio_size, opus_val16); /* 5 ms redundant frame for CELT->SILK*/ if (redundancy && celt_to_silk) -- GitLab