Commit c2583620 authored by Jean-Marc Valin's avatar Jean-Marc Valin
Browse files

Delaying allocation of the SILK temporary output buffer to reduce peak stack

parent 5f807c17
......@@ -31,6 +31,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include "API.h"
#include "main.h"
#include "stack_alloc.h"
#include "os_support.h"
/************************/
/* Decoder Super Struct */
......@@ -90,7 +91,8 @@ opus_int silk_Decode( /* O Returns error co
opus_int i, n, decode_only_middle = 0, ret = SILK_NO_ERROR;
opus_int32 nSamplesOutDec, LBRR_symbol;
opus_int16 *samplesOut1_tmp[ 2 ];
VARDECL( opus_int16, samplesOut1_tmp_storage );
VARDECL( opus_int16, samplesOut1_tmp_storage1 );
VARDECL( opus_int16, samplesOut1_tmp_storage2 );
VARDECL( opus_int16, samplesOut2_tmp );
opus_int32 MS_pred_Q13[ 2 ] = { 0 };
opus_int16 *resample_out_ptr;
......@@ -98,6 +100,7 @@ opus_int silk_Decode( /* O Returns error co
silk_decoder_state *channel_state = psDec->channel_state;
opus_int has_side;
opus_int stereo_to_mono;
int delay_stack_alloc;
SAVE_STACK;
silk_assert( decControl->nChannelsInternal == 1 || decControl->nChannelsInternal == 2 );
......@@ -251,13 +254,22 @@ opus_int silk_Decode( /* O Returns error co
psDec->channel_state[ 1 ].first_frame_after_reset = 1;
}
ALLOC( samplesOut1_tmp_storage,
decControl->nChannelsInternal*(
channel_state[ 0 ].frame_length + 2 ),
/* Check if the temp buffer fits into the output PCM buffer. If it fits,
we can delay allocating the temp buffer until after the SILK peak stack
usage. We need to use a < and not a <= because of the two extra samples. */
delay_stack_alloc = decControl->internalSampleRate*decControl->nChannelsInternal
< decControl->API_sampleRate*decControl->nChannelsAPI;
ALLOC( samplesOut1_tmp_storage1, delay_stack_alloc ? ALLOC_NONE
: decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2 ),
opus_int16 );
samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage;
samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage
+ channel_state[ 0 ].frame_length + 2;
if ( delay_stack_alloc )
{
samplesOut1_tmp[ 0 ] = samplesOut;
samplesOut1_tmp[ 1 ] = samplesOut + channel_state[ 0 ].frame_length + 2;
} else {
samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage1;
samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage1 + channel_state[ 0 ].frame_length + 2;
}
if( lostFlag == FLAG_DECODE_NORMAL ) {
has_side = !decode_only_middle;
......@@ -312,6 +324,15 @@ opus_int silk_Decode( /* O Returns error co
resample_out_ptr = samplesOut;
}
ALLOC( samplesOut1_tmp_storage2, delay_stack_alloc
? decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2 )
: ALLOC_NONE,
opus_int16 );
if ( delay_stack_alloc ) {
OPUS_COPY(samplesOut1_tmp_storage2, samplesOut, decControl->nChannelsInternal*(channel_state[ 0 ].frame_length + 2));
samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage2;
samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage2 + channel_state[ 0 ].frame_length + 2;
}
for( n = 0; n < silk_min( decControl->nChannelsAPI, decControl->nChannelsInternal ); n++ ) {
/* Resample decoded signal to API_sampleRate */
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment