From cc83f6b87597595f5c1ad57e17a53b8636586867 Mon Sep 17 00:00:00 2001
From: Jean-Marc Valin <jmvalin@jmvalin.ca>
Date: Mon, 5 Nov 2012 10:25:20 -0500
Subject: [PATCH] Reduces decoder stack usage

Delays stack allocations as much as possible, and makes some of these
allocations conditional.
---
 celt/celt.c        | 15 ++++++++++-----
 src/opus_decoder.c | 39 ++++++++++++++++++++++++++++++++++-----
 2 files changed, 44 insertions(+), 10 deletions(-)

diff --git a/celt/celt.c b/celt/celt.c
index 69b7a6935..7c78947b8 100644
--- a/celt/celt.c
+++ b/celt/celt.c
@@ -2838,9 +2838,8 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
       return frame_size/st->downsample;
    }
 
-   ALLOC(freq, IMAX(CC,C)*N, celt_sig); /**< Interleaved signal MDCTs */
    ALLOC(X, C*N, celt_norm);   /**< Interleaved normalised MDCTs */
-   ALLOC(bandE, nbEBands*C, celt_ener);
+
    c=0; do
       for (i=0;i<M*eBands[st->start];i++)
          X[c*N+i] = 0;
@@ -2923,13 +2922,12 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
    if (tell+4 <= total_bits)
       spread_decision = ec_dec_icdf(dec, spread_icdf, 5);
 
-   ALLOC(pulses, nbEBands, int);
    ALLOC(cap, nbEBands, int);
-   ALLOC(offsets, nbEBands, int);
-   ALLOC(fine_priority, nbEBands, int);
 
    init_caps(mode,cap,LM,C);
 
+   ALLOC(offsets, nbEBands, int);
+
    dynalloc_logp = 6;
    total_bits<<=BITRES;
    tell = ec_tell_frac(dec);
@@ -2968,6 +2966,10 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
    bits = (((opus_int32)len*8)<<BITRES) - ec_tell_frac(dec) - 1;
    anti_collapse_rsv = isTransient&&LM>=2&&bits>=((LM+2)<<BITRES) ? (1<<BITRES) : 0;
    bits -= anti_collapse_rsv;
+
+   ALLOC(pulses, nbEBands, int);
+   ALLOC(fine_priority, nbEBands, int);
+
    codedBands = compute_allocation(mode, st->start, st->end, offsets, cap,
          alloc_trim, &intensity, &dual_stereo, bits, &balance, pulses,
          fine_quant, fine_priority, C, LM, dec, 0, 0);
@@ -2992,6 +2994,8 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
       anti_collapse(mode, X, collapse_masks, LM, C, N,
             st->start, st->end, oldBandE, oldLogE, oldLogE2, pulses, st->rng);
 
+   ALLOC(bandE, nbEBands*C, celt_ener);
+
    log2Amp(mode, st->start, st->end, bandE, oldBandE, C);
 
    if (silence)
@@ -3002,6 +3006,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
          oldBandE[i] = -QCONST16(28.f,DB_SHIFT);
       }
    }
+   ALLOC(freq, IMAX(CC,C)*N, celt_sig); /**< Interleaved signal MDCTs */
    /* Synthesis */
    denormalise_bands(mode, X, freq, bandE, effEnd, C, M);
 
diff --git a/src/opus_decoder.c b/src/opus_decoder.c
index be6ae401b..18262322a 100644
--- a/src/opus_decoder.c
+++ b/src/opus_decoder.c
@@ -200,8 +200,14 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data,
    int i, silk_ret=0, celt_ret=0;
    ec_dec dec;
    opus_int32 silk_frame_size;
+   int pcm_silk_size;
    VARDECL(opus_int16, pcm_silk);
-   VARDECL(opus_val16, pcm_transition);
+   int pcm_transition_silk_size;
+   VARDECL(opus_val16, pcm_transition_silk);
+   int pcm_transition_celt_size;
+   VARDECL(opus_val16, pcm_transition_celt);
+   opus_val16 *pcm_transition;
+   int redundant_audio_size;
    VARDECL(opus_val16, redundant_audio);
 
    int audiosize;
@@ -274,16 +280,26 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data,
       RESTORE_STACK;
       return frame_size;
    }
-   ALLOC(pcm_transition, F5*st->channels, opus_val16);
 
+   pcm_transition_silk_size = 0;
+   pcm_transition_celt_size = 0;
    if (data!=NULL && st->prev_mode > 0 && (
        (mode == MODE_CELT_ONLY && st->prev_mode != MODE_CELT_ONLY && !st->prev_redundancy)
     || (mode != MODE_CELT_ONLY && st->prev_mode == MODE_CELT_ONLY) )
       )
    {
       transition = 1;
+      /* Decide where to allocate the stack memory for pcm_transition */
       if (mode == MODE_CELT_ONLY)
-         opus_decode_frame(st, NULL, 0, pcm_transition, IMIN(F5, audiosize), 0);
+         pcm_transition_celt_size = F5*st->channels;
+      else
+         pcm_transition_silk_size = F5*st->channels;
+   }
+   ALLOC(pcm_transition_celt, pcm_transition_celt_size, opus_val16);
+   if (transition && mode == MODE_CELT_ONLY)
+   {
+      pcm_transition = pcm_transition_celt;
+      opus_decode_frame(st, NULL, 0, pcm_transition, IMIN(F5, audiosize), 0);
    }
    if (audiosize > frame_size)
    {
@@ -294,8 +310,9 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data,
       frame_size = audiosize;
    }
 
-   ALLOC(pcm_silk, IMAX(F10, frame_size)*st->channels, opus_int16);
-   ALLOC(redundant_audio, F5*st->channels, opus_val16);
+   /* Don't allocate any memory when in CELT-only mode */
+   pcm_silk_size = (mode != MODE_CELT_ONLY) ? IMAX(F10, frame_size)*st->channels : 0;
+   ALLOC(pcm_silk, pcm_silk_size, opus_int16);
 
    /* SILK processing */
    if (mode != MODE_CELT_ONLY)
@@ -409,10 +426,22 @@ static int opus_decode_frame(OpusDecoder *st, const unsigned char *data,
    }
 
    if (redundancy)
+   {
       transition = 0;
+      pcm_transition_silk_size=0;
+   }
+
+   ALLOC(pcm_transition_silk, pcm_transition_silk_size, opus_val16);
 
    if (transition && mode != MODE_CELT_ONLY)
+   {
+      pcm_transition = pcm_transition_silk;
       opus_decode_frame(st, NULL, 0, pcm_transition, IMIN(F5, audiosize), 0);
+   }
+
+   /* Only allocation memory for redundancy if/when needed */
+   redundant_audio_size = redundancy ? F5*st->channels : 0;
+   ALLOC(redundant_audio, redundant_audio_size, opus_val16);
 
    /* 5 ms redundant frame for CELT->SILK*/
    if (redundancy && celt_to_silk)
-- 
GitLab