NSQ_del_dec.c 37.5 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
/***********************************************************************
Copyright (c) 2006-2011, Skype Limited. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, (subject to the limitations in the disclaimer below)
are permitted provided that the following conditions are met:
- Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of Skype Limited, nor the names of specific
contributors, may be used to endorse or promote products derived from
this software without specific prior written permission.
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED
BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
CONTRIBUTORS ''AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/

28
29
30
31
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

32
#include "main.h"
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56

typedef struct {
    opus_int32 sLPC_Q14[ MAX_FRAME_LENGTH / MAX_NB_SUBFR + NSQ_LPC_BUF_LENGTH ];
    opus_int32 RandState[ DECISION_DELAY ];
    opus_int32 Q_Q10[     DECISION_DELAY ];
    opus_int32 Xq_Q10[    DECISION_DELAY ];
    opus_int32 Pred_Q16[  DECISION_DELAY ];
    opus_int32 Shape_Q10[ DECISION_DELAY ];
    opus_int32 sAR2_Q14[ MAX_SHAPE_LPC_ORDER ];
    opus_int32 LF_AR_Q12;
    opus_int32 Seed;
    opus_int32 SeedInit;
    opus_int32 RD_Q10;
} NSQ_del_dec_struct;

typedef struct {
    opus_int32 Q_Q10;
    opus_int32 RD_Q10;
    opus_int32 xq_Q14;
    opus_int32 LF_AR_Q12;
    opus_int32 sLTP_shp_Q10;
    opus_int32 LPC_exc_Q16;
} NSQ_sample_struct;

57
static inline void silk_nsq_del_dec_scale_states(
58
59
60
61
62
63
64
65
66
67
68
69
    const silk_encoder_state *psEncC,               /* I    Encoder State                       */
    silk_nsq_state      *NSQ,                       /* I/O  NSQ state                           */
    NSQ_del_dec_struct  psDelDec[],                 /* I/O  Delayed decision states             */
    const opus_int16     x[],                        /* I    Input in Q0                         */
    opus_int32           x_sc_Q10[],                 /* O    Input scaled with 1/Gain in Q10     */
    const opus_int16     sLTP[],                     /* I    Re-whitened LTP state in Q0         */
    opus_int32           sLTP_Q16[],                 /* O    LTP state matching scaled input     */
    opus_int             subfr,                      /* I    Subframe number                     */
    opus_int             nStatesDelayedDecision,     /* I    Number of del dec states            */
    opus_int             smpl_buf_idx,               /* I    Index to newest samples in buffers  */
    const opus_int       LTP_scale_Q14,              /* I    LTP state scaling                   */
    const opus_int32     Gains_Q16[ MAX_NB_SUBFR ],  /* I                                        */
70
71
72
    const opus_int       pitchL[ MAX_NB_SUBFR ],     /* I    Pitch lag                           */
    const opus_int       signal_type,                /* I    Signal type                         */
    const opus_int       decisionDelay               /* I    Decision delay                      */
73
74
75
76
77
);

/******************************************/
/* Noise shape quantizer for one subframe */
/******************************************/
78
static inline void silk_noise_shape_quantizer_del_dec(
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
    silk_nsq_state      *NSQ,                   /* I/O  NSQ state                           */
    NSQ_del_dec_struct  psDelDec[],             /* I/O  Delayed decision states             */
    opus_int             signalType,             /* I    Signal type                         */
    const opus_int32     x_Q10[],                /* I                                        */
    opus_int8            pulses[],               /* O                                        */
    opus_int16           xq[],                   /* O                                        */
    opus_int32           sLTP_Q16[],             /* I/O  LTP filter state                    */
    opus_int32           delayedGain_Q16[],      /* I/O  Gain delay buffer                   */
    const opus_int16     a_Q12[],                /* I    Short term prediction coefs         */
    const opus_int16     b_Q14[],                /* I    Long term prediction coefs          */
    const opus_int16     AR_shp_Q13[],           /* I    Noise shaping coefs                 */
    opus_int             lag,                    /* I    Pitch lag                           */
    opus_int32           HarmShapeFIRPacked_Q14, /* I                                        */
    opus_int             Tilt_Q14,               /* I    Spectral tilt                       */
    opus_int32           LF_shp_Q14,             /* I                                        */
    opus_int32           Gain_Q16,               /* I                                        */
    opus_int             Lambda_Q10,             /* I                                        */
    opus_int             offset_Q10,             /* I                                        */
    opus_int             length,                 /* I    Input length                        */
    opus_int             subfr,                  /* I    Subframe number                     */
    opus_int             shapingLPCOrder,        /* I    Shaping LPC filter order            */
    opus_int             predictLPCOrder,        /* I    Prediction filter order             */
    opus_int             warping_Q16,            /* I                                        */
    opus_int             nStatesDelayedDecision, /* I    Number of states in decision tree   */
    opus_int             *smpl_buf_idx,          /* I    Index to newest samples in buffers  */
    opus_int             decisionDelay           /* I                                        */
);

void silk_NSQ_del_dec(
    const silk_encoder_state        *psEncC,                                    /* I/O  Encoder State                       */
    silk_nsq_state                  *NSQ,                                       /* I/O  NSQ state                           */
    SideInfoIndices                 *psIndices,                                 /* I/O  Quantization Indices                */
    const opus_int16                 x[],                                        /* I    Prefiltered input signal            */
    opus_int8                        pulses[],                                   /* O    Quantized pulse signal              */
    const opus_int16                 PredCoef_Q12[ 2 * MAX_LPC_ORDER ],          /* I    Prediction coefs                    */
    const opus_int16                 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ],    /* I    LT prediction coefs                 */
    const opus_int16                 AR2_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I                                     */
    const opus_int                   HarmShapeGain_Q14[ MAX_NB_SUBFR ],          /* I                                        */
    const opus_int                   Tilt_Q14[ MAX_NB_SUBFR ],                   /* I    Spectral tilt                       */
    const opus_int32                 LF_shp_Q14[ MAX_NB_SUBFR ],                 /* I                                        */
    const opus_int32                 Gains_Q16[ MAX_NB_SUBFR ],                  /* I                                        */
    const opus_int                   pitchL[ MAX_NB_SUBFR ],                     /* I                                        */
    const opus_int                   Lambda_Q10,                                 /* I                                        */
    const opus_int                   LTP_scale_Q14                               /* I    LTP state scaling                   */
)
{
    opus_int     i, k, lag, start_idx, LSF_interpolation_flag, Winner_ind, subfr;
    opus_int     last_smple_idx, smpl_buf_idx, decisionDelay;
    const opus_int16 *A_Q12, *B_Q14, *AR_shp_Q13;
    opus_int16   *pxq;
    opus_int32   sLTP_Q16[ 2 * MAX_FRAME_LENGTH ];
    opus_int16   sLTP[     2 * MAX_FRAME_LENGTH ];
    opus_int32   HarmShapeFIRPacked_Q14;
    opus_int     offset_Q10;
    opus_int32   RDmin_Q10;
    opus_int32   x_sc_Q10[ MAX_SUB_FRAME_LENGTH ];
    opus_int32   delayedGain_Q16[  DECISION_DELAY ];
    NSQ_del_dec_struct psDelDec[ MAX_DEL_DEC_STATES ];
    NSQ_del_dec_struct *psDD;

    /* Set unvoiced lag to the previous one, overwrite later for voiced */
    lag = NSQ->lagPrev;

142
    silk_assert( NSQ->prev_inv_gain_Q16 != 0 );
143
144

    /* Initialize delayed decision states */
145
    silk_memset( psDelDec, 0, psEncC->nStatesDelayedDecision * sizeof( NSQ_del_dec_struct ) );
146
147
148
149
150
151
152
    for( k = 0; k < psEncC->nStatesDelayedDecision; k++ ) {
        psDD                 = &psDelDec[ k ];
        psDD->Seed           = ( k + psIndices->Seed ) & 3;
        psDD->SeedInit       = psDD->Seed;
        psDD->RD_Q10         = 0;
        psDD->LF_AR_Q12      = NSQ->sLF_AR_shp_Q12;
        psDD->Shape_Q10[ 0 ] = NSQ->sLTP_shp_Q10[ psEncC->ltp_mem_length - 1 ];
153
154
        silk_memcpy( psDD->sLPC_Q14, NSQ->sLPC_Q14, NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) );
        silk_memcpy( psDD->sAR2_Q14, NSQ->sAR2_Q14, sizeof( NSQ->sAR2_Q14 ) );
155
156
157
158
159
    }

    offset_Q10   = silk_Quantization_Offsets_Q10[ psIndices->signalType >> 1 ][ psIndices->quantOffsetType ];
    smpl_buf_idx = 0; /* index of oldest samples */

160
    decisionDelay = silk_min_int( DECISION_DELAY, psEncC->subfr_length );
161
162
163
164

    /* For voiced frames limit the decision delay to lower than the pitch lag */
    if( psIndices->signalType == TYPE_VOICED ) {
        for( k = 0; k < psEncC->nb_subfr; k++ ) {
165
            decisionDelay = silk_min_int( decisionDelay, pitchL[ k ] - LTP_ORDER / 2 - 1 );
166
167
168
        }
    } else {
        if( lag > 0 ) {
169
            decisionDelay = silk_min_int( decisionDelay, lag - LTP_ORDER / 2 - 1 );
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
        }
    }

    if( psIndices->NLSFInterpCoef_Q2 == 4 ) {
        LSF_interpolation_flag = 0;
    } else {
        LSF_interpolation_flag = 1;
    }

    /* Setup pointers to start of sub frame */
    pxq                   = &NSQ->xq[ psEncC->ltp_mem_length ];
    NSQ->sLTP_shp_buf_idx = psEncC->ltp_mem_length;
    NSQ->sLTP_buf_idx     = psEncC->ltp_mem_length;
    subfr = 0;
    for( k = 0; k < psEncC->nb_subfr; k++ ) {
        A_Q12      = &PredCoef_Q12[ ( ( k >> 1 ) | ( 1 - LSF_interpolation_flag ) ) * MAX_LPC_ORDER ];
        B_Q14      = &LTPCoef_Q14[ k * LTP_ORDER           ];
        AR_shp_Q13 = &AR2_Q13[     k * MAX_SHAPE_LPC_ORDER ];

        /* Noise shape parameters */
190
191
192
        silk_assert( HarmShapeGain_Q14[ k ] >= 0 );
        HarmShapeFIRPacked_Q14  =                          silk_RSHIFT( HarmShapeGain_Q14[ k ], 2 );
        HarmShapeFIRPacked_Q14 |= silk_LSHIFT( ( opus_int32 )silk_RSHIFT( HarmShapeGain_Q14[ k ], 1 ), 16 );
193
194
195
196
197
198
199

        NSQ->rewhite_flag = 0;
        if( psIndices->signalType == TYPE_VOICED ) {
            /* Voiced */
            lag = pitchL[ k ];

            /* Re-whitening */
200
            if( ( k & ( 3 - silk_LSHIFT( LSF_interpolation_flag, 1 ) ) ) == 0 ) {
201
202
203
204
205
206
207
208
209
210
211
212
213
                if( k == 2 ) {
                    /* RESET DELAYED DECISIONS */
                    /* Find winner */
                    RDmin_Q10 = psDelDec[ 0 ].RD_Q10;
                    Winner_ind = 0;
                    for( i = 1; i < psEncC->nStatesDelayedDecision; i++ ) {
                        if( psDelDec[ i ].RD_Q10 < RDmin_Q10 ) {
                            RDmin_Q10 = psDelDec[ i ].RD_Q10;
                            Winner_ind = i;
                        }
                    }
                    for( i = 0; i < psEncC->nStatesDelayedDecision; i++ ) {
                        if( i != Winner_ind ) {
214
215
                            psDelDec[ i ].RD_Q10 += ( silk_int32_MAX >> 4 );
                            silk_assert( psDelDec[ i ].RD_Q10 >= 0 );
216
217
218
219
220
221
222
223
                        }
                    }

                    /* Copy final part of signals from winner state to output and long-term filter states */
                    psDD = &psDelDec[ Winner_ind ];
                    last_smple_idx = smpl_buf_idx + decisionDelay;
                    for( i = 0; i < decisionDelay; i++ ) {
                        last_smple_idx = ( last_smple_idx - 1 ) & DECISION_DELAY_MASK;
224
225
226
                        pulses[   i - decisionDelay ] = ( opus_int8 )silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 );
                        pxq[ i - decisionDelay ] = ( opus_int16 )silk_SAT16( silk_RSHIFT_ROUND(
                            silk_SMULWW( psDD->Xq_Q10[ last_smple_idx ], Gains_Q16[ 1 ] ), 10 ) );
227
228
229
230
231
232
233
234
                        NSQ->sLTP_shp_Q10[ NSQ->sLTP_shp_buf_idx - decisionDelay + i ] = psDD->Shape_Q10[ last_smple_idx ];
                    }

                    subfr = 0;
                }

                /* Rewhiten with new A coefs */
                start_idx = psEncC->ltp_mem_length - lag - psEncC->predictLPCOrder - LTP_ORDER / 2;
235
                silk_assert( start_idx > 0 );
236
237
238
239
240
241
242
243
244
245

                silk_LPC_analysis_filter( &sLTP[ start_idx ], &NSQ->xq[ start_idx + k * psEncC->subfr_length ],
                    A_Q12, psEncC->ltp_mem_length - start_idx, psEncC->predictLPCOrder );

                NSQ->sLTP_buf_idx = psEncC->ltp_mem_length;
                NSQ->rewhite_flag = 1;
            }
        }

        silk_nsq_del_dec_scale_states( psEncC, NSQ, psDelDec, x, x_sc_Q10, sLTP, sLTP_Q16, k,
246
            psEncC->nStatesDelayedDecision, smpl_buf_idx, LTP_scale_Q14, Gains_Q16, pitchL, psIndices->signalType, decisionDelay );
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273

        silk_noise_shape_quantizer_del_dec( NSQ, psDelDec, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q16,
            delayedGain_Q16, A_Q12, B_Q14, AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ],
            Gains_Q16[ k ], Lambda_Q10, offset_Q10, psEncC->subfr_length, subfr++, psEncC->shapingLPCOrder,
            psEncC->predictLPCOrder, psEncC->warping_Q16, psEncC->nStatesDelayedDecision, &smpl_buf_idx, decisionDelay );

        x      += psEncC->subfr_length;
        pulses += psEncC->subfr_length;
        pxq    += psEncC->subfr_length;
    }

    /* Find winner */
    RDmin_Q10 = psDelDec[ 0 ].RD_Q10;
    Winner_ind = 0;
    for( k = 1; k < psEncC->nStatesDelayedDecision; k++ ) {
        if( psDelDec[ k ].RD_Q10 < RDmin_Q10 ) {
            RDmin_Q10 = psDelDec[ k ].RD_Q10;
            Winner_ind = k;
        }
    }

    /* Copy final part of signals from winner state to output and long-term filter states */
    psDD = &psDelDec[ Winner_ind ];
    psIndices->Seed = psDD->SeedInit;
    last_smple_idx = smpl_buf_idx + decisionDelay;
    for( i = 0; i < decisionDelay; i++ ) {
        last_smple_idx = ( last_smple_idx - 1 ) & DECISION_DELAY_MASK;
274
275
276
        pulses[   i - decisionDelay ] = ( opus_int8 )silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 );
        pxq[ i - decisionDelay ] = ( opus_int16 )silk_SAT16( silk_RSHIFT_ROUND(
            silk_SMULWW( psDD->Xq_Q10[ last_smple_idx ], Gains_Q16[ psEncC->nb_subfr - 1 ] ), 10 ) );
277
278
        NSQ->sLTP_shp_Q10[ NSQ->sLTP_shp_buf_idx - decisionDelay + i ] = psDD->Shape_Q10[ last_smple_idx ];
    }
279
280
    silk_memcpy( NSQ->sLPC_Q14, &psDD->sLPC_Q14[ psEncC->subfr_length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) );
    silk_memcpy( NSQ->sAR2_Q14, psDD->sAR2_Q14, sizeof( psDD->sAR2_Q14 ) );
281
282
283
284
285
286

    /* Update states */
    NSQ->sLF_AR_shp_Q12 = psDD->LF_AR_Q12;
    NSQ->lagPrev        = pitchL[ psEncC->nb_subfr - 1 ];

    /* Save quantized speech and noise shaping signals */
287
288
    silk_memmove( NSQ->xq,           &NSQ->xq[           psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int16 ) );
    silk_memmove( NSQ->sLTP_shp_Q10, &NSQ->sLTP_shp_Q10[ psEncC->frame_length ], psEncC->ltp_mem_length * sizeof( opus_int32 ) );
289
290
291
292
293
294
295
296
297
298

#ifdef SAVE_ALL_INTERNAL_DATA
    DEBUG_STORE_DATA( xq.dat,       &pxq[ -psEncC->frame_length ],       psEncC->frame_length * sizeof( opus_int16 ) );
    DEBUG_STORE_DATA( q.dat,        &pulses[ -psEncC->frame_length ],    psEncC->frame_length * sizeof( opus_int8 ) );
#endif
}

/******************************************/
/* Noise shape quantizer for one subframe */
/******************************************/
299
static inline void silk_noise_shape_quantizer_del_dec(
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
    silk_nsq_state  *NSQ,                   /* I/O  NSQ state                           */
    NSQ_del_dec_struct  psDelDec[],             /* I/O  Delayed decision states             */
    opus_int             signalType,             /* I    Signal type                         */
    const opus_int32     x_Q10[],                /* I                                        */
    opus_int8            pulses[],               /* O                                        */
    opus_int16           xq[],                   /* O                                        */
    opus_int32           sLTP_Q16[],             /* I/O  LTP filter state                    */
    opus_int32           delayedGain_Q16[],      /* I/O  Gain delay buffer                   */
    const opus_int16     a_Q12[],                /* I    Short term prediction coefs         */
    const opus_int16     b_Q14[],                /* I    Long term prediction coefs          */
    const opus_int16     AR_shp_Q13[],           /* I    Noise shaping coefs                 */
    opus_int             lag,                    /* I    Pitch lag                           */
    opus_int32           HarmShapeFIRPacked_Q14, /* I                                        */
    opus_int             Tilt_Q14,               /* I    Spectral tilt                       */
    opus_int32           LF_shp_Q14,             /* I                                        */
    opus_int32           Gain_Q16,               /* I                                        */
    opus_int             Lambda_Q10,             /* I                                        */
    opus_int             offset_Q10,             /* I                                        */
    opus_int             length,                 /* I    Input length                        */
    opus_int             subfr,                  /* I    Subframe number                     */
    opus_int             shapingLPCOrder,        /* I    Shaping LPC filter order            */
    opus_int             predictLPCOrder,        /* I    Prediction filter order             */
    opus_int             warping_Q16,            /* I                                        */
    opus_int             nStatesDelayedDecision, /* I    Number of states in decision tree   */
    opus_int             *smpl_buf_idx,          /* I    Index to newest samples in buffers  */
    opus_int             decisionDelay           /* I                                        */
)
{
    opus_int     i, j, k, Winner_ind, RDmin_ind, RDmax_ind, last_smple_idx;
    opus_int32   Winner_rand_state;
    opus_int32   LTP_pred_Q14, LPC_pred_Q10, n_AR_Q10, n_LTP_Q14, LTP_Q10;
    opus_int32   n_LF_Q10, r_Q10, rr_Q10, rd1_Q10, rd2_Q10, RDmin_Q10, RDmax_Q10;
    opus_int32   q1_Q10, q2_Q10, dither, exc_Q10, LPC_exc_Q10, xq_Q10;
    opus_int32   tmp1, tmp2, sLF_AR_shp_Q10;
    opus_int32   *pred_lag_ptr, *shp_lag_ptr, *psLPC_Q14;
    NSQ_sample_struct  psSampleState[ MAX_DEL_DEC_STATES ][ 2 ];
    NSQ_del_dec_struct *psDD;
    NSQ_sample_struct  *psSS;

    shp_lag_ptr  = &NSQ->sLTP_shp_Q10[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ];
    pred_lag_ptr = &sLTP_Q16[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ];

    for( i = 0; i < length; i++ ) {
        /* Perform common calculations used in all states */

        /* Long-term prediction */
        if( signalType == TYPE_VOICED ) {
            /* Unrolled loop */
348
349
350
351
352
            LTP_pred_Q14 = silk_SMULWB(               pred_lag_ptr[  0 ], b_Q14[ 0 ] );
            LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -1 ], b_Q14[ 1 ] );
            LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -2 ], b_Q14[ 2 ] );
            LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -3 ], b_Q14[ 3 ] );
            LTP_pred_Q14 = silk_SMLAWB( LTP_pred_Q14, pred_lag_ptr[ -4 ], b_Q14[ 4 ] );
353
354
355
356
357
358
359
360
            pred_lag_ptr++;
        } else {
            LTP_pred_Q14 = 0;
        }

        /* Long-term shaping */
        if( lag > 0 ) {
            /* Symmetric, packed FIR coefficients */
361
362
363
            n_LTP_Q14 = silk_SMULWB( silk_ADD32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 );
            n_LTP_Q14 = silk_SMLAWT( n_LTP_Q14, shp_lag_ptr[ -1 ],                     HarmShapeFIRPacked_Q14 );
            n_LTP_Q14 = silk_LSHIFT( n_LTP_Q14, 6 );
364
365
            shp_lag_ptr++;

366
            LTP_Q10 = silk_RSHIFT( silk_SUB32( LTP_pred_Q14, n_LTP_Q14 ), 4 );
367
368
369
370
371
372
373
374
375
376
377
378
        } else {
            LTP_Q10 = 0;
        }

        for( k = 0; k < nStatesDelayedDecision; k++ ) {
            /* Delayed decision state */
            psDD = &psDelDec[ k ];

            /* Sample state */
            psSS = psSampleState[ k ];

            /* Generate dither */
379
            psDD->Seed = silk_RAND( psDD->Seed );
380
381

            /* dither = rand_seed < 0 ? 0xFFFFFFFF : 0; */
382
            dither = silk_RSHIFT( psDD->Seed, 31 );
383
384
385
386

            /* Pointer used in short term prediction and shaping */
            psLPC_Q14 = &psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 + i ];
            /* Short-term prediction */
387
388
389
            silk_assert( predictLPCOrder >= 10 );            /* check that unrolling works */
            silk_assert( ( predictLPCOrder  & 1 ) == 0 );    /* check that order is even */
            silk_assert( ( (opus_int64)a_Q12 & 3 ) == 0 );    /* check that array starts at 4-byte aligned address */
390
            /* Partially unrolled */
391
392
393
394
395
396
397
398
399
400
            LPC_pred_Q10 = silk_SMULWB(               psLPC_Q14[  0 ], a_Q12[ 0 ] );
            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -1 ], a_Q12[ 1 ] );
            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -2 ], a_Q12[ 2 ] );
            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -3 ], a_Q12[ 3 ] );
            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -4 ], a_Q12[ 4 ] );
            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -5 ], a_Q12[ 5 ] );
            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -6 ], a_Q12[ 6 ] );
            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -7 ], a_Q12[ 7 ] );
            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -8 ], a_Q12[ 8 ] );
            LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -9 ], a_Q12[ 9 ] );
401
            for( j = 10; j < predictLPCOrder; j ++ ) {
402
                LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -j ], a_Q12[ j ] );
403
404
405
            }

            /* Noise shape feedback */
406
            silk_assert( ( shapingLPCOrder & 1 ) == 0 );   /* check that order is even */
407
            /* Output of lowpass section */
408
            tmp2 = silk_SMLAWB( psLPC_Q14[ 0 ], psDD->sAR2_Q14[ 0 ], warping_Q16 );
409
            /* Output of allpass section */
410
            tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ 0 ], psDD->sAR2_Q14[ 1 ] - tmp2, warping_Q16 );
411
            psDD->sAR2_Q14[ 0 ] = tmp2;
412
            n_AR_Q10 = silk_SMULWB( tmp2, AR_shp_Q13[ 0 ] );
413
414
415
            /* Loop over allpass sections */
            for( j = 2; j < shapingLPCOrder; j += 2 ) {
                /* Output of allpass section */
416
                tmp2 = silk_SMLAWB( psDD->sAR2_Q14[ j - 1 ], psDD->sAR2_Q14[ j + 0 ] - tmp1, warping_Q16 );
417
                psDD->sAR2_Q14[ j - 1 ] = tmp1;
418
                n_AR_Q10 = silk_SMLAWB( n_AR_Q10, tmp1, AR_shp_Q13[ j - 1 ] );
419
                /* Output of allpass section */
420
                tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ j + 0 ], psDD->sAR2_Q14[ j + 1 ] - tmp2, warping_Q16 );
421
                psDD->sAR2_Q14[ j + 0 ] = tmp2;
422
                n_AR_Q10 = silk_SMLAWB( n_AR_Q10, tmp2, AR_shp_Q13[ j ] );
423
424
            }
            psDD->sAR2_Q14[ shapingLPCOrder - 1 ] = tmp1;
425
            n_AR_Q10 = silk_SMLAWB( n_AR_Q10, tmp1, AR_shp_Q13[ shapingLPCOrder - 1 ] );
426

427
428
            n_AR_Q10 = silk_RSHIFT( n_AR_Q10, 1 );           /* Q11 -> Q10 */
            n_AR_Q10 = silk_SMLAWB( n_AR_Q10, psDD->LF_AR_Q12, Tilt_Q14 );
429

430
431
            n_LF_Q10 = silk_LSHIFT( silk_SMULWB( psDD->Shape_Q10[ *smpl_buf_idx ], LF_shp_Q14 ), 2 );
            n_LF_Q10 = silk_SMLAWT( n_LF_Q10, psDD->LF_AR_Q12, LF_shp_Q14 );
432
433
434

            /* Input minus prediction plus noise feedback                       */
            /* r = x[ i ] - LTP_pred - LPC_pred + n_AR + n_Tilt + n_LF + n_LTP  */
435
436
437
438
            tmp1  = silk_ADD32( LTP_Q10, LPC_pred_Q10 );                         /* add Q10 stuff */
            tmp1  = silk_SUB32( tmp1, n_AR_Q10 );                                /* subtract Q10 stuff */
            tmp1  = silk_SUB32( tmp1, n_LF_Q10 );                                /* subtract Q10 stuff */
            r_Q10 = silk_SUB32( x_Q10[ i ], tmp1 );                              /* residual error Q10 */
439
440
441

            /* Flip sign depending on dither */
            r_Q10 = r_Q10 ^ dither;
442
            r_Q10 = silk_LIMIT_32( r_Q10, -(31 << 10), 30 << 10 );
443
444

            /* Find two quantization level candidates and measure their rate-distortion */
445
446
            q1_Q10 = silk_SUB32( r_Q10, offset_Q10 );
            q1_Q10 = silk_RSHIFT( q1_Q10, 10 );
447
            if( q1_Q10 > 0 ) {
448
449
450
451
452
                q1_Q10  = silk_SUB32( silk_LSHIFT( q1_Q10, 10 ), QUANT_LEVEL_ADJUST_Q10 );
                q1_Q10  = silk_ADD32( q1_Q10, offset_Q10 );
                q2_Q10  = silk_ADD32( q1_Q10, 1024 );
                rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 );
                rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 );
453
454
            } else if( q1_Q10 == 0 ) {
                q1_Q10  = offset_Q10;
455
456
457
                q2_Q10  = silk_ADD32( q1_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 );
                rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 );
                rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 );
458
459
            } else if( q1_Q10 == -1 ) {
                q2_Q10  = offset_Q10;
460
461
462
                q1_Q10  = silk_SUB32( q2_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 );
                rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 );
                rd2_Q10 = silk_SMULBB(  q2_Q10, Lambda_Q10 );
463
            } else {            /* Q1_Q10 < -1 */
464
465
466
467
468
                q1_Q10  = silk_ADD32( silk_LSHIFT( q1_Q10, 10 ), QUANT_LEVEL_ADJUST_Q10 );
                q1_Q10  = silk_ADD32( q1_Q10, offset_Q10 );
                q2_Q10  = silk_ADD32( q1_Q10, 1024 );
                rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 );
                rd2_Q10 = silk_SMULBB( -q2_Q10, Lambda_Q10 );
469
            }
470
471
472
473
            rr_Q10  = silk_SUB32( r_Q10, q1_Q10 );
            rd1_Q10 = silk_RSHIFT( silk_SMLABB( rd1_Q10, rr_Q10, rr_Q10 ), 10 );
            rr_Q10  = silk_SUB32( r_Q10, q2_Q10 );
            rd2_Q10 = silk_RSHIFT( silk_SMLABB( rd2_Q10, rr_Q10, rr_Q10 ), 10 );
474
475

            if( rd1_Q10 < rd2_Q10 ) {
476
477
                psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 );
                psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 );
478
479
480
                psSS[ 0 ].Q_Q10  = q1_Q10;
                psSS[ 1 ].Q_Q10  = q2_Q10;
            } else {
481
482
                psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 );
                psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 );
483
484
485
486
487
488
489
490
491
492
                psSS[ 0 ].Q_Q10  = q2_Q10;
                psSS[ 1 ].Q_Q10  = q1_Q10;
            }

            /* Update states for best quantization */

            /* Quantized excitation */
            exc_Q10 = psSS[ 0 ].Q_Q10 ^ dither;

            /* Add predictions */
493
494
            LPC_exc_Q10 = exc_Q10 + silk_RSHIFT_ROUND( LTP_pred_Q14, 4 );
            xq_Q10      = silk_ADD32( LPC_exc_Q10, LPC_pred_Q10 );
495
496

            /* Update states */
497
498
499
500
501
            sLF_AR_shp_Q10         = silk_SUB32(  xq_Q10, n_AR_Q10 );
            psSS[ 0 ].sLTP_shp_Q10 = silk_SUB32(  sLF_AR_shp_Q10, n_LF_Q10 );
            psSS[ 0 ].LF_AR_Q12    = silk_LSHIFT( sLF_AR_shp_Q10, 2 );
            psSS[ 0 ].xq_Q14       = silk_LSHIFT( xq_Q10,         4 );
            psSS[ 0 ].LPC_exc_Q16  = silk_LSHIFT( LPC_exc_Q10,    6 );
502
503
504
505
506
507
508

            /* Update states for second best quantization */

            /* Quantized excitation */
            exc_Q10 = psSS[ 1 ].Q_Q10 ^ dither;

            /* Add predictions */
509
510
            LPC_exc_Q10 = exc_Q10 + silk_RSHIFT_ROUND( LTP_pred_Q14, 4 );
            xq_Q10      = silk_ADD32( LPC_exc_Q10, LPC_pred_Q10 );
511
512

            /* Update states */
513
514
515
516
517
            sLF_AR_shp_Q10         = silk_SUB32(  xq_Q10, n_AR_Q10 );
            psSS[ 1 ].sLTP_shp_Q10 = silk_SUB32(  sLF_AR_shp_Q10, n_LF_Q10 );
            psSS[ 1 ].LF_AR_Q12    = silk_LSHIFT( sLF_AR_shp_Q10, 2 );
            psSS[ 1 ].xq_Q14       = silk_LSHIFT( xq_Q10,         4 );
            psSS[ 1 ].LPC_exc_Q16  = silk_LSHIFT( LPC_exc_Q10,    6 );
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
        }

        *smpl_buf_idx  = ( *smpl_buf_idx - 1 ) & DECISION_DELAY_MASK;                   /* Index to newest samples              */
        last_smple_idx = ( *smpl_buf_idx + decisionDelay ) & DECISION_DELAY_MASK;       /* Index to decisionDelay old samples   */

        /* Find winner */
        RDmin_Q10 = psSampleState[ 0 ][ 0 ].RD_Q10;
        Winner_ind = 0;
        for( k = 1; k < nStatesDelayedDecision; k++ ) {
            if( psSampleState[ k ][ 0 ].RD_Q10 < RDmin_Q10 ) {
                RDmin_Q10  = psSampleState[ k ][ 0 ].RD_Q10;
                Winner_ind = k;
            }
        }

        /* Increase RD values of expired states */
        Winner_rand_state = psDelDec[ Winner_ind ].RandState[ last_smple_idx ];
        for( k = 0; k < nStatesDelayedDecision; k++ ) {
            if( psDelDec[ k ].RandState[ last_smple_idx ] != Winner_rand_state ) {
537
538
539
                psSampleState[ k ][ 0 ].RD_Q10 = silk_ADD32( psSampleState[ k ][ 0 ].RD_Q10, ( silk_int32_MAX >> 4 ) );
                psSampleState[ k ][ 1 ].RD_Q10 = silk_ADD32( psSampleState[ k ][ 1 ].RD_Q10, ( silk_int32_MAX >> 4 ) );
                silk_assert( psSampleState[ k ][ 0 ].RD_Q10 >= 0 );
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
            }
        }

        /* Find worst in first set and best in second set */
        RDmax_Q10  = psSampleState[ 0 ][ 0 ].RD_Q10;
        RDmin_Q10  = psSampleState[ 0 ][ 1 ].RD_Q10;
        RDmax_ind = 0;
        RDmin_ind = 0;
        for( k = 1; k < nStatesDelayedDecision; k++ ) {
            /* find worst in first set */
            if( psSampleState[ k ][ 0 ].RD_Q10 > RDmax_Q10 ) {
                RDmax_Q10  = psSampleState[ k ][ 0 ].RD_Q10;
                RDmax_ind = k;
            }
            /* find best in second set */
            if( psSampleState[ k ][ 1 ].RD_Q10 < RDmin_Q10 ) {
                RDmin_Q10  = psSampleState[ k ][ 1 ].RD_Q10;
                RDmin_ind = k;
            }
        }

        /* Replace a state if best from second set outperforms worst in first set */
        if( RDmin_Q10 < RDmax_Q10 ) {
563
            silk_memcpy( ((opus_int32 *)&psDelDec[ RDmax_ind ]) + i,
564
                        ((opus_int32 *)&psDelDec[ RDmin_ind ]) + i, sizeof( NSQ_del_dec_struct ) - i * sizeof( opus_int32) );
565
            silk_memcpy( &psSampleState[ RDmax_ind ][ 0 ], &psSampleState[ RDmin_ind ][ 1 ], sizeof( NSQ_sample_struct ) );
566
567
568
569
570
        }

        /* Write samples from winner to output and long-term filter states */
        psDD = &psDelDec[ Winner_ind ];
        if( subfr > 0 || i >= decisionDelay ) {
571
572
573
            pulses[  i - decisionDelay ] = ( opus_int8 )silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 );
            xq[ i - decisionDelay ] = ( opus_int16 )silk_SAT16( silk_RSHIFT_ROUND(
                silk_SMULWW( psDD->Xq_Q10[ last_smple_idx ], delayedGain_Q16[ last_smple_idx ] ), 10 ) );
574
575
576
577
578
579
580
581
582
583
584
585
            NSQ->sLTP_shp_Q10[ NSQ->sLTP_shp_buf_idx - decisionDelay ] = psDD->Shape_Q10[ last_smple_idx ];
            sLTP_Q16[          NSQ->sLTP_buf_idx     - decisionDelay ] = psDD->Pred_Q16[  last_smple_idx ];
        }
        NSQ->sLTP_shp_buf_idx++;
        NSQ->sLTP_buf_idx++;

        /* Update states */
        for( k = 0; k < nStatesDelayedDecision; k++ ) {
            psDD                                     = &psDelDec[ k ];
            psSS                                     = &psSampleState[ k ][ 0 ];
            psDD->LF_AR_Q12                          = psSS->LF_AR_Q12;
            psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH + i ] = psSS->xq_Q14;
586
            psDD->Xq_Q10[    *smpl_buf_idx ]         = silk_RSHIFT( psSS->xq_Q14, 4 );
587
588
589
            psDD->Q_Q10[     *smpl_buf_idx ]         = psSS->Q_Q10;
            psDD->Pred_Q16[  *smpl_buf_idx ]         = psSS->LPC_exc_Q16;
            psDD->Shape_Q10[ *smpl_buf_idx ]         = psSS->sLTP_shp_Q10;
590
            psDD->Seed                               = silk_ADD32( psDD->Seed, silk_RSHIFT_ROUND( psSS->Q_Q10, 10 ) );
591
592
593
594
595
596
597
598
            psDD->RandState[ *smpl_buf_idx ]         = psDD->Seed;
            psDD->RD_Q10                             = psSS->RD_Q10;
        }
        delayedGain_Q16[     *smpl_buf_idx ]         = Gain_Q16;
    }
    /* Update LPC states */
    for( k = 0; k < nStatesDelayedDecision; k++ ) {
        psDD = &psDelDec[ k ];
599
        silk_memcpy( psDD->sLPC_Q14, &psDD->sLPC_Q14[ length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) );
600
601
602
    }
}

603
static inline void silk_nsq_del_dec_scale_states(
604
605
606
607
608
609
610
611
612
613
614
615
    const silk_encoder_state *psEncC,               /* I    Encoder State                       */
    silk_nsq_state      *NSQ,                       /* I/O  NSQ state                           */
    NSQ_del_dec_struct  psDelDec[],                 /* I/O  Delayed decision states             */
    const opus_int16     x[],                        /* I    Input in Q0                         */
    opus_int32           x_sc_Q10[],                 /* O    Input scaled with 1/Gain in Q10     */
    const opus_int16     sLTP[],                     /* I    Re-whitened LTP state in Q0         */
    opus_int32           sLTP_Q16[],                 /* O    LTP state matching scaled input     */
    opus_int             subfr,                      /* I    Subframe number                     */
    opus_int             nStatesDelayedDecision,     /* I    Number of del dec states            */
    opus_int             smpl_buf_idx,               /* I    Index to newest samples in buffers  */
    const opus_int       LTP_scale_Q14,              /* I    LTP state scaling                   */
    const opus_int32     Gains_Q16[ MAX_NB_SUBFR ],  /* I                                        */
616
617
618
    const opus_int       pitchL[ MAX_NB_SUBFR ],     /* I    Pitch lag                           */
    const opus_int       signal_type,                /* I    Signal type                         */
    const opus_int       decisionDelay               /* I    Decision delay                      */
619
620
621
622
623
624
)
{
    opus_int            i, k, lag;
    opus_int32          inv_gain_Q16, gain_adj_Q16, inv_gain_Q32;
    NSQ_del_dec_struct *psDD;

625
626
    inv_gain_Q16 = silk_INVERSE32_varQ( silk_max( Gains_Q16[ subfr ], 1 ), 32 );
    inv_gain_Q16 = silk_min( inv_gain_Q16, silk_int16_MAX );
627
628
629
630
    lag          = pitchL[ subfr ];

    /* After rewhitening the LTP state is un-scaled, so scale with inv_gain_Q16 */
    if( NSQ->rewhite_flag ) {
631
        inv_gain_Q32 = silk_LSHIFT( inv_gain_Q16, 16 );
632
633
        if( subfr == 0 ) {
            /* Do LTP downscaling */
634
            inv_gain_Q32 = silk_LSHIFT( silk_SMULWB( inv_gain_Q32, LTP_scale_Q14 ), 2 );
635
636
        }
        for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx; i++ ) {
637
638
            silk_assert( i < MAX_FRAME_LENGTH );
            sLTP_Q16[ i ] = silk_SMULWB( inv_gain_Q32, sLTP[ i ] );
639
640
641
642
643
644
645
646
647
        }
    }

    /* Adjust for changing gain */
    if( inv_gain_Q16 != NSQ->prev_inv_gain_Q16 ) {
        gain_adj_Q16 = silk_DIV32_varQ( inv_gain_Q16, NSQ->prev_inv_gain_Q16, 16 );

        /* Scale long-term shaping state */
        for( i = NSQ->sLTP_shp_buf_idx - psEncC->ltp_mem_length; i < NSQ->sLTP_shp_buf_idx; i++ ) {
648
            NSQ->sLTP_shp_Q10[ i ] = silk_SMULWW( gain_adj_Q16, NSQ->sLTP_shp_Q10[ i ] );
649
650
651
        }

        /* Scale long-term prediction state */
652
653
        if( signal_type == TYPE_VOICED && NSQ->rewhite_flag == 0 ) {
            for( i = NSQ->sLTP_buf_idx - lag - LTP_ORDER / 2; i < NSQ->sLTP_buf_idx - decisionDelay; i++ ) {
654
                sLTP_Q16[ i ] = silk_SMULWW( gain_adj_Q16, sLTP_Q16[ i ] );
655
656
657
658
659
660
661
            }
        }

        for( k = 0; k < nStatesDelayedDecision; k++ ) {
            psDD = &psDelDec[ k ];

            /* Scale scalar states */
662
            psDD->LF_AR_Q12 = silk_SMULWW( gain_adj_Q16, psDD->LF_AR_Q12 );
663
664
665

            /* Scale short-term prediction and shaping states */
            for( i = 0; i < NSQ_LPC_BUF_LENGTH; i++ ) {
666
                psDD->sLPC_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->sLPC_Q14[ i ] );
667
668
            }
            for( i = 0; i < MAX_SHAPE_LPC_ORDER; i++ ) {
669
                psDD->sAR2_Q14[ i ] = silk_SMULWW( gain_adj_Q16, psDD->sAR2_Q14[ i ] );
670
671
            }
            for( i = 0; i < DECISION_DELAY; i++ ) {
672
673
                psDD->Pred_Q16[  i ] = silk_SMULWW( gain_adj_Q16, psDD->Pred_Q16[  i ] );
                psDD->Shape_Q10[ i ] = silk_SMULWW( gain_adj_Q16, psDD->Shape_Q10[ i ] );
674
675
676
677
678
679
            }
        }
    }

    /* Scale input */
    for( i = 0; i < psEncC->subfr_length; i++ ) {
680
        x_sc_Q10[ i ] = silk_RSHIFT( silk_SMULBB( x[ i ], ( opus_int16 )inv_gain_Q16 ), 6 );
681
682
683
    }

    /* save inv_gain */
684
    silk_assert( inv_gain_Q16 != 0 );
685
686
    NSQ->prev_inv_gain_Q16 = inv_gain_Q16;
}