From dce69d2b9b4f33ecb482023f0fe1230ef5b53235 Mon Sep 17 00:00:00 2001
From: Jean-Marc Valin <jmvalin@jmvalin.ca>
Date: Mon, 6 Jan 2014 21:59:48 -0500
Subject: [PATCH] Store decoded SILK pulses as 16-bit vector

This saves 640 bytes on the peak stack usage.
---
 silk/code_signs.c    | 4 ++--
 silk/dec_API.c       | 2 +-
 silk/decode_core.c   | 2 +-
 silk/decode_frame.c  | 4 ++--
 silk/decode_pulses.c | 6 +++---
 silk/main.h          | 8 ++++----
 silk/shell_coder.c   | 8 ++++----
 7 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/silk/code_signs.c b/silk/code_signs.c
index 0419ea262..dfd1dca9a 100644
--- a/silk/code_signs.c
+++ b/silk/code_signs.c
@@ -74,7 +74,7 @@ void silk_encode_signs(
 /* Decodes signs of excitation */
 void silk_decode_signs(
     ec_dec                      *psRangeDec,                        /* I/O  Compressor data structure                   */
-    opus_int                    pulses[],                           /* I/O  pulse signal                                */
+    opus_int16                  pulses[],                           /* I/O  pulse signal                                */
     opus_int                    length,                             /* I    length of input                             */
     const opus_int              signalType,                         /* I    Signal type                                 */
     const opus_int              quantOffsetType,                    /* I    Quantization offset type                    */
@@ -83,7 +83,7 @@ void silk_decode_signs(
 {
     opus_int         i, j, p;
     opus_uint8       icdf[ 2 ];
-    opus_int         *q_ptr;
+    opus_int16       *q_ptr;
     const opus_uint8 *icdf_ptr;
 
     icdf[ 1 ] = 0;
diff --git a/silk/dec_API.c b/silk/dec_API.c
index 4cbcf7151..660f93db7 100644
--- a/silk/dec_API.c
+++ b/silk/dec_API.c
@@ -196,7 +196,7 @@ opus_int silk_Decode(                                   /* O    Returns error co
             for( i = 0; i < channel_state[ 0 ].nFramesPerPacket; i++ ) {
                 for( n = 0; n < decControl->nChannelsInternal; n++ ) {
                     if( channel_state[ n ].LBRR_flags[ i ] ) {
-                        opus_int pulses[ MAX_FRAME_LENGTH ];
+                        opus_int16 pulses[ MAX_FRAME_LENGTH ];
                         opus_int condCoding;
 
                         if( decControl->nChannelsInternal == 2 && n == 0 ) {
diff --git a/silk/decode_core.c b/silk/decode_core.c
index a820bf11d..af68b75da 100644
--- a/silk/decode_core.c
+++ b/silk/decode_core.c
@@ -39,7 +39,7 @@ void silk_decode_core(
     silk_decoder_state          *psDec,                         /* I/O  Decoder state                               */
     silk_decoder_control        *psDecCtrl,                     /* I    Decoder control                             */
     opus_int16                  xq[],                           /* O    Decoded speech                              */
-    const opus_int              pulses[ MAX_FRAME_LENGTH ]      /* I    Pulse signal                                */
+    const opus_int16            pulses[ MAX_FRAME_LENGTH ]      /* I    Pulse signal                                */
 )
 {
     opus_int   i, k, lag = 0, start_idx, sLTP_buf_idx, NLSF_interpolation_flag, signalType;
diff --git a/silk/decode_frame.c b/silk/decode_frame.c
index abc00a3d5..1d98267f9 100644
--- a/silk/decode_frame.c
+++ b/silk/decode_frame.c
@@ -47,13 +47,13 @@ opus_int silk_decode_frame(
 {
     VARDECL( silk_decoder_control, psDecCtrl );
     opus_int         L, mv_len, ret = 0;
-    VARDECL( opus_int, pulses );
+    VARDECL( opus_int16, pulses );
     SAVE_STACK;
 
     L = psDec->frame_length;
     ALLOC( psDecCtrl, 1, silk_decoder_control );
     ALLOC( pulses, (L + SHELL_CODEC_FRAME_LENGTH - 1) &
-                   ~(SHELL_CODEC_FRAME_LENGTH - 1), opus_int );
+                   ~(SHELL_CODEC_FRAME_LENGTH - 1), opus_int16 );
     psDecCtrl->LTP_scale_Q14 = 0;
 
     /* Safety checks */
diff --git a/silk/decode_pulses.c b/silk/decode_pulses.c
index e8a87c2ab..1e14bc37b 100644
--- a/silk/decode_pulses.c
+++ b/silk/decode_pulses.c
@@ -36,7 +36,7 @@ POSSIBILITY OF SUCH DAMAGE.
 /*********************************************/
 void silk_decode_pulses(
     ec_dec                      *psRangeDec,                    /* I/O  Compressor data structure                   */
-    opus_int                    pulses[],                       /* O    Excitation signal                           */
+    opus_int16                  pulses[],                       /* O    Excitation signal                           */
     const opus_int              signalType,                     /* I    Sigtype                                     */
     const opus_int              quantOffsetType,                /* I    quantOffsetType                             */
     const opus_int              frame_length                    /* I    Frame length                                */
@@ -44,7 +44,7 @@ void silk_decode_pulses(
 {
     opus_int   i, j, k, iter, abs_q, nLS, RateLevelIndex;
     opus_int   sum_pulses[ MAX_NB_SHELL_BLOCKS ], nLshifts[ MAX_NB_SHELL_BLOCKS ];
-    opus_int   *pulses_ptr;
+    opus_int16 *pulses_ptr;
     const opus_uint8 *cdf_ptr;
 
     /*********************/
@@ -84,7 +84,7 @@ void silk_decode_pulses(
         if( sum_pulses[ i ] > 0 ) {
             silk_shell_decoder( &pulses[ silk_SMULBB( i, SHELL_CODEC_FRAME_LENGTH ) ], psRangeDec, sum_pulses[ i ] );
         } else {
-            silk_memset( &pulses[ silk_SMULBB( i, SHELL_CODEC_FRAME_LENGTH ) ], 0, SHELL_CODEC_FRAME_LENGTH * sizeof( opus_int ) );
+            silk_memset( &pulses[ silk_SMULBB( i, SHELL_CODEC_FRAME_LENGTH ) ], 0, SHELL_CODEC_FRAME_LENGTH * sizeof( pulses[0] ) );
         }
     }
 
diff --git a/silk/main.h b/silk/main.h
index 2bdf89784..77524f5b5 100644
--- a/silk/main.h
+++ b/silk/main.h
@@ -116,7 +116,7 @@ void silk_encode_signs(
 /* Decodes signs of excitation */
 void silk_decode_signs(
     ec_dec                      *psRangeDec,                        /* I/O  Compressor data structure                   */
-    opus_int                    pulses[],                           /* I/O  pulse signal                                */
+    opus_int16                  pulses[],                           /* I/O  pulse signal                                */
     opus_int                    length,                             /* I    length of input                             */
     const opus_int              signalType,                         /* I    Signal type                                 */
     const opus_int              quantOffsetType,                    /* I    Quantization offset type                    */
@@ -161,7 +161,7 @@ void silk_shell_encoder(
 
 /* Shell decoder, operates on one shell code frame of 16 pulses */
 void silk_shell_decoder(
-    opus_int                    *pulses0,                       /* O    data: nonnegative pulse amplitudes          */
+    opus_int16                  *pulses0,                       /* O    data: nonnegative pulse amplitudes          */
     ec_dec                      *psRangeDec,                    /* I/O  Compressor data structure                   */
     const opus_int              pulses4                         /* I    number of pulses per pulse-subframe         */
 );
@@ -397,13 +397,13 @@ void silk_decode_core(
     silk_decoder_state          *psDec,                         /* I/O  Decoder state                               */
     silk_decoder_control        *psDecCtrl,                     /* I    Decoder control                             */
     opus_int16                  xq[],                           /* O    Decoded speech                              */
-    const opus_int              pulses[ MAX_FRAME_LENGTH ]      /* I    Pulse signal                                */
+    const opus_int16            pulses[ MAX_FRAME_LENGTH ]      /* I    Pulse signal                                */
 );
 
 /* Decode quantization indices of excitation (Shell coding) */
 void silk_decode_pulses(
     ec_dec                      *psRangeDec,                    /* I/O  Compressor data structure                   */
-    opus_int                    pulses[],                       /* O    Excitation signal                           */
+    opus_int16                  pulses[],                       /* O    Excitation signal                           */
     const opus_int              signalType,                     /* I    Sigtype                                     */
     const opus_int              quantOffsetType,                /* I    quantOffsetType                             */
     const opus_int              frame_length                    /* I    Frame length                                */
diff --git a/silk/shell_coder.c b/silk/shell_coder.c
index 796f57d6c..4af341474 100644
--- a/silk/shell_coder.c
+++ b/silk/shell_coder.c
@@ -58,8 +58,8 @@ static OPUS_INLINE void encode_split(
 }
 
 static OPUS_INLINE void decode_split(
-    opus_int                    *p_child1,      /* O    pulse amplitude of first child subframe     */
-    opus_int                    *p_child2,      /* O    pulse amplitude of second child subframe    */
+    opus_int16                  *p_child1,      /* O    pulse amplitude of first child subframe     */
+    opus_int16                  *p_child2,      /* O    pulse amplitude of second child subframe    */
     ec_dec                      *psRangeDec,    /* I/O  Compressor data structure                   */
     const opus_int              p,              /* I    pulse amplitude of current subframe         */
     const opus_uint8            *shell_table    /* I    table of shell cdfs                         */
@@ -117,12 +117,12 @@ void silk_shell_encoder(
 
 /* Shell decoder, operates on one shell code frame of 16 pulses */
 void silk_shell_decoder(
-    opus_int                    *pulses0,                       /* O    data: nonnegative pulse amplitudes          */
+    opus_int16                  *pulses0,                       /* O    data: nonnegative pulse amplitudes          */
     ec_dec                      *psRangeDec,                    /* I/O  Compressor data structure                   */
     const opus_int              pulses4                         /* I    number of pulses per pulse-subframe         */
 )
 {
-    opus_int pulses3[ 2 ], pulses2[ 4 ], pulses1[ 8 ];
+    opus_int16 pulses3[ 2 ], pulses2[ 4 ], pulses1[ 8 ];
 
     /* this function operates on one shell code frame of 16 pulses */
     silk_assert( SHELL_CODEC_FRAME_LENGTH == 16 );
-- 
GitLab