opus_encoder.c 92.8 KB
Newer Older
1
/* Copyright (c) 2010-2011 Xiph.Org Foundation, Skype Limited
2
   Written by Jean-Marc Valin and Koen Vos */
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
/*
   Redistribution and use in source and binary forms, with or without
   modification, are permitted provided that the following conditions
   are met:

   - Redistributions of source code must retain the above copyright
   notice, this list of conditions and the following disclaimer.

   - Redistributions in binary form must reproduce the above copyright
   notice, this list of conditions and the following disclaimer in the
   documentation and/or other materials provided with the distribution.

   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18
19
   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20
21
22
23
24
25
26
27
28
29
30
31
   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

32
#include <stdarg.h>
33
#include "celt.h"
Jean-Marc Valin's avatar
Jean-Marc Valin committed
34
35
#include "entenc.h"
#include "modes.h"
36
#include "API.h"
37
38
#include "stack_alloc.h"
#include "float_cast.h"
39
40
#include "opus.h"
#include "arch.h"
41
#include "pitch.h"
42
#include "opus_private.h"
43
#include "os_support.h"
44
#include "cpu_support.h"
45
46
#include "analysis.h"
#include "mathops.h"
47
#include "tuning_parameters.h"
48
#ifdef FIXED_POINT
49
#include "fixed/structs_FIX.h"
50
#else
51
#include "float/structs_FLP.h"
52
53
#endif

54
55
#define MAX_ENCODER_BUFFER 480

56
57
58
59
#ifndef DISABLE_FLOAT_API
#define PSEUDO_SNR_THRESHOLD 316.23f    /* 10^(25/10) */
#endif

60
61
62
63
64
65
typedef struct {
   opus_val32 XX, XY, YY;
   opus_val16 smoothed_width;
   opus_val16 max_follower;
} StereoWidthState;

66
67
68
69
struct OpusEncoder {
    int          celt_enc_offset;
    int          silk_enc_offset;
    silk_EncControlStruct silk_mode;
70
    int          application;
71
    int          channels;
72
    int          delay_compensation;
73
    int          force_channels;
74
75
    int          signal_type;
    int          user_bandwidth;
76
    int          max_bandwidth;
77
    int          user_forced_mode;
78
    int          voice_ratio;
79
    opus_int32   Fs;
80
81
    int          use_vbr;
    int          vbr_constraint;
82
    int          variable_duration;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
83
84
    opus_int32   bitrate_bps;
    opus_int32   user_bitrate_bps;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
85
    int          lsb_depth;
86
    int          encoder_buffer;
87
    int          lfe;
88
    int          arch;
89
    int          use_dtx;                 /* general DTX for both SILK and CELT */
Ralph Giles's avatar
Ralph Giles committed
90
91
92
#ifndef DISABLE_FLOAT_API
    TonalityAnalysisState analysis;
#endif
93
94
95

#define OPUS_ENCODER_RESET_START stream_channels
    int          stream_channels;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
96
    opus_int16   hybrid_stereo_width_Q14;
97
    opus_int32   variable_HP_smth2_Q15;
98
    opus_val16   prev_HB_gain;
99
    opus_val32   hp_mem[4];
100
101
    int          mode;
    int          prev_mode;
102
    int          prev_channels;
103
    int          prev_framesize;
104
    int          bandwidth;
105
106
    /* Bandwidth determined automatically from the rate (before any other adjustment) */
    int          auto_bandwidth;
107
    int          silk_bw_switch;
108
    /* Sampling rate (at the API level) */
109
    int          first;
110
    opus_val16 * energy_masking;
111
    StereoWidthState width_mem;
112
    opus_val16   delay_buffer[MAX_ENCODER_BUFFER*2];
113
#ifndef DISABLE_FLOAT_API
114
    int          detected_bandwidth;
115
    int          nb_no_activity_frames;
116
    opus_val32   peak_signal_energy;
117
#endif
118
    int          nonfinal_frame; /* current frame is not the final in a packet */
119
    opus_uint32  rangeFinal;
120
121
};

122
/* Transition tables for the voice and music. First column is the
123
124
   middle (memoriless) threshold. The second column is the hysteresis
   (difference with the middle) */
125
static const opus_int32 mono_voice_bandwidth_thresholds[8] = {
126
127
         9000,  700, /* NB<->MB */
         9000,  700, /* MB<->WB */
128
129
        13500, 1000, /* WB<->SWB */
        14000, 2000, /* SWB<->FB */
130
};
131
static const opus_int32 mono_music_bandwidth_thresholds[8] = {
132
133
         9000,  700, /* NB<->MB */
         9000,  700, /* MB<->WB */
134
135
        11000, 1000, /* WB<->SWB */
        12000, 2000, /* SWB<->FB */
136
};
137
static const opus_int32 stereo_voice_bandwidth_thresholds[8] = {
138
139
         9000,  700, /* NB<->MB */
         9000,  700, /* MB<->WB */
140
141
        13500, 1000, /* WB<->SWB */
        14000, 2000, /* SWB<->FB */
142
143
};
static const opus_int32 stereo_music_bandwidth_thresholds[8] = {
144
145
         9000,  700, /* NB<->MB */
         9000,  700, /* MB<->WB */
146
147
        11000, 1000, /* WB<->SWB */
        12000, 2000, /* SWB<->FB */
148
149
};
/* Threshold bit-rates for switching between mono and stereo */
150
151
static const opus_int32 stereo_voice_threshold = 19000;
static const opus_int32 stereo_music_threshold = 17000;
152
153
154
155

/* Threshold bit-rate for switching between SILK/hybrid and CELT-only */
static const opus_int32 mode_thresholds[2][2] = {
      /* voice */ /* music */
156
157
      {  64000,      10000}, /* mono */
      {  44000,      10000}, /* stereo */
158
};
159

160
161
162
163
164
165
166
167
static const opus_int32 fec_thresholds[] = {
        12000, 1000, /* NB */
        14000, 1000, /* MB */
        16000, 1000, /* WB */
        20000, 1000, /* SWB */
        22000, 1000, /* FB */
};

168
169
int opus_encoder_get_size(int channels)
{
170
171
    int silkEncSizeBytes, celtEncSizeBytes;
    int ret;
172
173
    if (channels<1 || channels > 2)
        return 0;
174
    ret = silk_Get_Encoder_Size( &silkEncSizeBytes );
175
    if (ret)
176
177
        return 0;
    silkEncSizeBytes = align(silkEncSizeBytes);
178
179
180
181
    celtEncSizeBytes = celt_encoder_get_size(channels);
    return align(sizeof(OpusEncoder))+silkEncSizeBytes+celtEncSizeBytes;
}

182
int opus_encoder_init(OpusEncoder* st, opus_int32 Fs, int channels, int application)
183
{
184
185
    void *silk_enc;
    CELTEncoder *celt_enc;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
186
    int err;
187
    int ret, silkEncSizeBytes;
188

189
190
191
   if((Fs!=48000&&Fs!=24000&&Fs!=16000&&Fs!=12000&&Fs!=8000)||(channels!=1&&channels!=2)||
        (application != OPUS_APPLICATION_VOIP && application != OPUS_APPLICATION_AUDIO
        && application != OPUS_APPLICATION_RESTRICTED_LOWDELAY))
192
        return OPUS_BAD_ARG;
193

194
    OPUS_CLEAR((char*)st, opus_encoder_get_size(channels));
195
196
    /* Create SILK encoder */
    ret = silk_Get_Encoder_Size( &silkEncSizeBytes );
197
    if (ret)
198
        return OPUS_BAD_ARG;
199
    silkEncSizeBytes = align(silkEncSizeBytes);
200
201
    st->silk_enc_offset = align(sizeof(OpusEncoder));
    st->celt_enc_offset = st->silk_enc_offset+silkEncSizeBytes;
202
203
204
    silk_enc = (char*)st+st->silk_enc_offset;
    celt_enc = (CELTEncoder*)((char*)st+st->celt_enc_offset);

205
    st->stream_channels = st->channels = channels;
206
207

    st->Fs = Fs;
208

209
210
    st->arch = opus_select_arch();

211
    ret = silk_InitEncoder( silk_enc, st->arch, &st->silk_mode );
212
    if(ret)return OPUS_INTERNAL_ERROR;
213

214
    /* default SILK parameters */
215
216
217
218
219
220
221
222
223
    st->silk_mode.nChannelsAPI              = channels;
    st->silk_mode.nChannelsInternal         = channels;
    st->silk_mode.API_sampleRate            = st->Fs;
    st->silk_mode.maxInternalSampleRate     = 16000;
    st->silk_mode.minInternalSampleRate     = 8000;
    st->silk_mode.desiredInternalSampleRate = 16000;
    st->silk_mode.payloadSize_ms            = 20;
    st->silk_mode.bitRate                   = 25000;
    st->silk_mode.packetLossPercentage      = 0;
224
    st->silk_mode.complexity                = 9;
225
226
227
    st->silk_mode.useInBandFEC              = 0;
    st->silk_mode.useDTX                    = 0;
    st->silk_mode.useCBR                    = 0;
228
    st->silk_mode.reducedDependency         = 0;
229

230
    /* Create CELT encoder */
231
    /* Initialize CELT encoder */
232
    err = celt_encoder_init(celt_enc, Fs, channels, st->arch);
233
234
    if(err!=OPUS_OK)return OPUS_INTERNAL_ERROR;

235
    celt_encoder_ctl(celt_enc, CELT_SET_SIGNALLING(0));
236
    celt_encoder_ctl(celt_enc, OPUS_SET_COMPLEXITY(st->silk_mode.complexity));
237

Jean-Marc Valin's avatar
Jean-Marc Valin committed
238
    st->use_vbr = 1;
239
240
    /* Makes constrained VBR the default (safer for real-time use) */
    st->vbr_constraint = 1;
241
    st->user_bitrate_bps = OPUS_AUTO;
242
    st->bitrate_bps = 3000+Fs*channels;
243
    st->application = application;
244
245
    st->signal_type = OPUS_AUTO;
    st->user_bandwidth = OPUS_AUTO;
246
    st->max_bandwidth = OPUS_BANDWIDTH_FULLBAND;
247
248
    st->force_channels = OPUS_AUTO;
    st->user_forced_mode = OPUS_AUTO;
249
    st->voice_ratio = -1;
250
    st->encoder_buffer = st->Fs/100;
Jean-Marc Valin's avatar
Jean-Marc Valin committed
251
    st->lsb_depth = 24;
252
    st->variable_duration = OPUS_FRAMESIZE_ARG;
253

254
    /* Delay compensation of 4 ms (2.5 ms for SILK's extra look-ahead
255
256
       + 1.5 ms for SILK resamplers and stereo prediction) */
    st->delay_compensation = st->Fs/250;
257

258
    st->hybrid_stereo_width_Q14 = 1 << 14;
259
    st->prev_HB_gain = Q15ONE;
260
    st->variable_HP_smth2_Q15 = silk_LSHIFT( silk_lin2log( VARIABLE_HP_MIN_CUTOFF_HZ ), 8 );
261
262
263
264
    st->first = 1;
    st->mode = MODE_HYBRID;
    st->bandwidth = OPUS_BANDWIDTH_FULLBAND;

265
#ifndef DISABLE_FLOAT_API
266
    tonality_analysis_init(&st->analysis, st->Fs);
267
    st->analysis.application = st->application;
268
269
#endif

270
    return OPUS_OK;
271
272
}

273
static unsigned char gen_toc(int mode, int framerate, int bandwidth, int channels)
274
275
276
277
278
279
280
281
282
283
284
{
   int period;
   unsigned char toc;
   period = 0;
   while (framerate < 400)
   {
       framerate <<= 1;
       period++;
   }
   if (mode == MODE_SILK_ONLY)
   {
285
       toc = (bandwidth-OPUS_BANDWIDTH_NARROWBAND)<<5;
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
       toc |= (period-2)<<3;
   } else if (mode == MODE_CELT_ONLY)
   {
       int tmp = bandwidth-OPUS_BANDWIDTH_MEDIUMBAND;
       if (tmp < 0)
           tmp = 0;
       toc = 0x80;
       toc |= tmp << 5;
       toc |= period<<3;
   } else /* Hybrid */
   {
       toc = 0x60;
       toc |= (bandwidth-OPUS_BANDWIDTH_SUPERWIDEBAND)<<4;
       toc |= (period-2)<<3;
   }
   toc |= (channels==2)<<2;
   return toc;
}
304
305

#ifndef FIXED_POINT
306
static void silk_biquad_float(
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
    const opus_val16      *in,            /* I:    Input signal                   */
    const opus_int32      *B_Q28,         /* I:    MA coefficients [3]            */
    const opus_int32      *A_Q28,         /* I:    AR coefficients [2]            */
    opus_val32            *S,             /* I/O:  State vector [2]               */
    opus_val16            *out,           /* O:    Output signal                  */
    const opus_int32      len,            /* I:    Signal length (must be even)   */
    int stride
)
{
    /* DIRECT FORM II TRANSPOSED (uses 2 element state vector) */
    opus_int   k;
    opus_val32 vout;
    opus_val32 inval;
    opus_val32 A[2], B[3];

322
323
324
325
326
    A[0] = (opus_val32)(A_Q28[0] * (1.f/((opus_int32)1<<28)));
    A[1] = (opus_val32)(A_Q28[1] * (1.f/((opus_int32)1<<28)));
    B[0] = (opus_val32)(B_Q28[0] * (1.f/((opus_int32)1<<28)));
    B[1] = (opus_val32)(B_Q28[1] * (1.f/((opus_int32)1<<28)));
    B[2] = (opus_val32)(B_Q28[2] * (1.f/((opus_int32)1<<28)));
327
328
329
330
331
332
333
334
335
336

    /* Negate A_Q28 values and split in two parts */

    for( k = 0; k < len; k++ ) {
        /* S[ 0 ], S[ 1 ]: Q12 */
        inval = in[ k*stride ];
        vout = S[ 0 ] + B[0]*inval;

        S[ 0 ] = S[1] - vout*A[0] + B[1]*inval;

337
        S[ 1 ] = - vout*A[1] + B[2]*inval + VERY_SMALL;
338
339
340
341
342
343
344

        /* Scale back to Q0 and saturate */
        out[ k*stride ] = vout;
    }
}
#endif

345
static void hp_cutoff(const opus_val16 *in, opus_int32 cutoff_Hz, opus_val16 *out, opus_val32 *hp_mem, int len, int channels, opus_int32 Fs, int arch)
346
347
348
{
   opus_int32 B_Q28[ 3 ], A_Q28[ 2 ];
   opus_int32 Fc_Q19, r_Q28, r_Q22;
349
   (void)arch;
350

351
352
353
   silk_assert( cutoff_Hz <= silk_int32_MAX / SILK_FIX_CONST( 1.5 * 3.14159 / 1000, 19 ) );
   Fc_Q19 = silk_DIV32_16( silk_SMULBB( SILK_FIX_CONST( 1.5 * 3.14159 / 1000, 19 ), cutoff_Hz ), Fs/1000 );
   silk_assert( Fc_Q19 > 0 && Fc_Q19 < 32768 );
354

355
   r_Q28 = SILK_FIX_CONST( 1.0, 28 ) - silk_MUL( SILK_FIX_CONST( 0.92, 9 ), Fc_Q19 );
356
357
358
359

   /* b = r * [ 1; -2; 1 ]; */
   /* a = [ 1; -2 * r * ( 1 - 0.5 * Fc^2 ); r^2 ]; */
   B_Q28[ 0 ] = r_Q28;
360
   B_Q28[ 1 ] = silk_LSHIFT( -r_Q28, 1 );
361
362
363
   B_Q28[ 2 ] = r_Q28;

   /* -r * ( 2 - Fc * Fc ); */
364
365
366
   r_Q22  = silk_RSHIFT( r_Q28, 6 );
   A_Q28[ 0 ] = silk_SMULWW( r_Q22, silk_SMULWW( Fc_Q19, Fc_Q19 ) - SILK_FIX_CONST( 2.0,  22 ) );
   A_Q28[ 1 ] = silk_SMULWW( r_Q22, r_Q22 );
367
368

#ifdef FIXED_POINT
Linfeng Zhang's avatar
Linfeng Zhang committed
369
370
371
   if( channels == 1 ) {
      silk_biquad_alt_stride1( in, B_Q28, A_Q28, hp_mem, out, len );
   } else {
372
      silk_biquad_alt_stride2( in, B_Q28, A_Q28, hp_mem, out, len, arch );
373
374
375
376
377
378
379
380
381
   }
#else
   silk_biquad_float( in, B_Q28, A_Q28, hp_mem, out, len, channels );
   if( channels == 2 ) {
       silk_biquad_float( in+1, B_Q28, A_Q28, hp_mem+2, out+1, len, channels );
   }
#endif
}

382
383
384
385
386
387
#ifdef FIXED_POINT
static void dc_reject(const opus_val16 *in, opus_int32 cutoff_Hz, opus_val16 *out, opus_val32 *hp_mem, int len, int channels, opus_int32 Fs)
{
   int c, i;
   int shift;

388
389
   /* Approximates -round(log2(6.3*cutoff_Hz/Fs)) */
   shift=celt_ilog2(Fs/(cutoff_Hz*4));
390
391
392
393
   for (c=0;c<channels;c++)
   {
      for (i=0;i<len;i++)
      {
394
         opus_val32 x, y;
395
         x = SHL32(EXTEND32(in[channels*i+c]), 14);
396
         y = x-hp_mem[2*c];
397
         hp_mem[2*c] = hp_mem[2*c] + PSHR32(x - hp_mem[2*c], shift);
398
         out[channels*i+c] = EXTRACT16(SATURATE(PSHR32(y, 14), 32767));
399
400
401
402
403
      }
   }
}

#else
404
405
static void dc_reject(const opus_val16 *in, opus_int32 cutoff_Hz, opus_val16 *out, opus_val32 *hp_mem, int len, int channels, opus_int32 Fs)
{
406
407
   int i;
   float coef, coef2;
408
   coef = 6.3f*cutoff_Hz/Fs;
409
410
   coef2 = 1-coef;
   if (channels==2)
411
   {
412
      float m0, m2;
413
414
415
416
      m0 = hp_mem[0];
      m2 = hp_mem[2];
      for (i=0;i<len;i++)
      {
417
         opus_val32 x0, x1, out0, out1;
418
419
         x0 = in[2*i+0];
         x1 = in[2*i+1];
420
421
         out0 = x0-m0;
         out1 = x1-m2;
422
423
         m0 = coef*x0 + VERY_SMALL + coef2*m0;
         m2 = coef*x1 + VERY_SMALL + coef2*m2;
424
425
         out[2*i+0] = out0;
         out[2*i+1] = out1;
426
427
428
429
      }
      hp_mem[0] = m0;
      hp_mem[2] = m2;
   } else {
430
      float m0;
431
      m0 = hp_mem[0];
432
433
      for (i=0;i<len;i++)
      {
434
         opus_val32 x, y;
435
         x = in[i];
436
         y = x-m0;
437
         m0 = coef*x + VERY_SMALL + coef2*m0;
438
         out[i] = y;
439
      }
440
      hp_mem[0] = m0;
441
442
   }
}
443
#endif
444

445
static void stereo_fade(const opus_val16 *in, opus_val16 *out, opus_val16 g1, opus_val16 g2,
446
        int overlap48, int frame_size, int channels, const opus_val16 *window, opus_int32 Fs)
447
448
{
    int i;
449
450
451
452
    int overlap;
    int inc;
    inc = 48000/Fs;
    overlap=overlap48/inc;
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
    g1 = Q15ONE-g1;
    g2 = Q15ONE-g2;
    for (i=0;i<overlap;i++)
    {
       opus_val32 diff;
       opus_val16 g, w;
       w = MULT16_16_Q15(window[i*inc], window[i*inc]);
       g = SHR32(MAC16_16(MULT16_16(w,g2),
             Q15ONE-w, g1), 15);
       diff = EXTRACT16(HALF32((opus_val32)in[i*channels] - (opus_val32)in[i*channels+1]));
       diff = MULT16_16_Q15(g, diff);
       out[i*channels] = out[i*channels] - diff;
       out[i*channels+1] = out[i*channels+1] + diff;
    }
    for (;i<frame_size;i++)
    {
       opus_val32 diff;
       diff = EXTRACT16(HALF32((opus_val32)in[i*channels] - (opus_val32)in[i*channels+1]));
       diff = MULT16_16_Q15(g2, diff);
       out[i*channels] = out[i*channels] - diff;
       out[i*channels+1] = out[i*channels+1] + diff;
    }
}

477
478
479
480
481
482
static void gain_fade(const opus_val16 *in, opus_val16 *out, opus_val16 g1, opus_val16 g2,
        int overlap48, int frame_size, int channels, const opus_val16 *window, opus_int32 Fs)
{
    int i;
    int inc;
    int overlap;
483
    int c;
484
485
    inc = 48000/Fs;
    overlap=overlap48/inc;
486
    if (channels==1)
487
    {
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
       for (i=0;i<overlap;i++)
       {
          opus_val16 g, w;
          w = MULT16_16_Q15(window[i*inc], window[i*inc]);
          g = SHR32(MAC16_16(MULT16_16(w,g2),
                Q15ONE-w, g1), 15);
          out[i] = MULT16_16_Q15(g, in[i]);
       }
    } else {
       for (i=0;i<overlap;i++)
       {
          opus_val16 g, w;
          w = MULT16_16_Q15(window[i*inc], window[i*inc]);
          g = SHR32(MAC16_16(MULT16_16(w,g2),
                Q15ONE-w, g1), 15);
          out[i*2] = MULT16_16_Q15(g, in[i*2]);
          out[i*2+1] = MULT16_16_Q15(g, in[i*2+1]);
       }
506
    }
507
    c=0;do {
508
       for (i=overlap;i<frame_size;i++)
509
510
511
       {
          out[i*channels+c] = MULT16_16_Q15(g2, in[i*channels+c]);
       }
512
    }
513
    while (++c<channels);
514
515
}

516
OpusEncoder *opus_encoder_create(opus_int32 Fs, int channels, int application, int *error)
517
{
518
   int ret;
519
   OpusEncoder *st;
520
521
522
523
524
525
526
527
   if((Fs!=48000&&Fs!=24000&&Fs!=16000&&Fs!=12000&&Fs!=8000)||(channels!=1&&channels!=2)||
       (application != OPUS_APPLICATION_VOIP && application != OPUS_APPLICATION_AUDIO
       && application != OPUS_APPLICATION_RESTRICTED_LOWDELAY))
   {
      if (error)
         *error = OPUS_BAD_ARG;
      return NULL;
   }
528
   st = (OpusEncoder *)opus_alloc(opus_encoder_get_size(channels));
529
   if (st == NULL)
530
531
532
533
534
   {
      if (error)
         *error = OPUS_ALLOC_FAIL;
      return NULL;
   }
535
   ret = opus_encoder_init(st, Fs, channels, application);
536
537
538
539
   if (error)
      *error = ret;
   if (ret != OPUS_OK)
   {
540
541
      opus_free(st);
      st = NULL;
542
   }
543
   return st;
544
}
545
546
547
548
549
550
551
552
553
554
555
556

static opus_int32 user_bitrate_to_bitrate(OpusEncoder *st, int frame_size, int max_data_bytes)
{
  if(!frame_size)frame_size=st->Fs/400;
  if (st->user_bitrate_bps==OPUS_AUTO)
    return 60*st->Fs/frame_size + st->Fs*st->channels;
  else if (st->user_bitrate_bps==OPUS_BITRATE_MAX)
    return max_data_bytes*8*st->Fs/frame_size;
  else
    return st->user_bitrate_bps;
}

557
#ifndef DISABLE_FLOAT_API
558
559
560
561
562
#ifdef FIXED_POINT
#define PCM2VAL(x) FLOAT2INT16(x)
#else
#define PCM2VAL(x) SCALEIN(x)
#endif
563
564

void downmix_float(const void *_x, opus_val32 *y, int subframe, int offset, int c1, int c2, int C)
565
566
567
{
   const float *x;
   int j;
568

569
570
   x = (const float *)_x;
   for (j=0;j<subframe;j++)
571
      y[j] = PCM2VAL(x[(j+offset)*C+c1]);
572
573
574
   if (c2>-1)
   {
      for (j=0;j<subframe;j++)
575
         y[j] += PCM2VAL(x[(j+offset)*C+c2]);
576
577
578
579
580
581
   } else if (c2==-2)
   {
      int c;
      for (c=1;c<C;c++)
      {
         for (j=0;j<subframe;j++)
582
            y[j] += PCM2VAL(x[(j+offset)*C+c]);
583
584
585
586
587
      }
   }
}
#endif

588
void downmix_int(const void *_x, opus_val32 *y, int subframe, int offset, int c1, int c2, int C)
589
590
591
{
   const opus_int16 *x;
   int j;
592

593
594
   x = (const opus_int16 *)_x;
   for (j=0;j<subframe;j++)
595
      y[j] = x[(j+offset)*C+c1];
596
597
598
   if (c2>-1)
   {
      for (j=0;j<subframe;j++)
599
         y[j] += x[(j+offset)*C+c2];
600
601
602
603
604
605
   } else if (c2==-2)
   {
      int c;
      for (c=1;c<C;c++)
      {
         for (j=0;j<subframe;j++)
606
            y[j] += x[(j+offset)*C+c];
607
608
609
610
      }
   }
}

611
612
613
614
615
616
617
opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_int32 Fs)
{
   int new_size;
   if (frame_size<Fs/400)
      return -1;
   if (variable_duration == OPUS_FRAMESIZE_ARG)
      new_size = frame_size;
618
619
620
621
622
623
624
   else if (variable_duration >= OPUS_FRAMESIZE_2_5_MS && variable_duration <= OPUS_FRAMESIZE_120_MS)
   {
      if (variable_duration <= OPUS_FRAMESIZE_40_MS)
         new_size = (Fs/400)<<(variable_duration-OPUS_FRAMESIZE_2_5_MS);
      else
         new_size = (variable_duration-OPUS_FRAMESIZE_2_5_MS-2)*Fs/50;
   }
625
626
627
628
   else
      return -1;
   if (new_size>frame_size)
      return -1;
629
630
631
   if (400*new_size!=Fs   && 200*new_size!=Fs   && 100*new_size!=Fs   &&
        50*new_size!=Fs   &&  25*new_size!=Fs   &&  50*new_size!=3*Fs &&
        50*new_size!=4*Fs &&  50*new_size!=5*Fs &&  50*new_size!=6*Fs)
632
633
634
635
      return -1;
   return new_size;
}

636
637
638
639
640
641
642
643
644
645
opus_val16 compute_stereo_width(const opus_val16 *pcm, int frame_size, opus_int32 Fs, StereoWidthState *mem)
{
   opus_val32 xx, xy, yy;
   opus_val16 sqrt_xx, sqrt_yy;
   opus_val16 qrrt_xx, qrrt_yy;
   int frame_rate;
   int i;
   opus_val16 short_alpha;

   frame_rate = Fs/frame_size;
646
   short_alpha = Q15ONE - MULT16_16(25, Q15ONE)/IMAX(50,frame_rate);
647
   xx=xy=yy=0;
648
649
650
651
   /* Unroll by 4. The frame size is always a multiple of 4 *except* for
      2.5 ms frames at 12 kHz. Since this setting is very rare (and very
      stupid), we just discard the last two samples. */
   for (i=0;i<frame_size-3;i+=4)
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
   {
      opus_val32 pxx=0;
      opus_val32 pxy=0;
      opus_val32 pyy=0;
      opus_val16 x, y;
      x = pcm[2*i];
      y = pcm[2*i+1];
      pxx = SHR32(MULT16_16(x,x),2);
      pxy = SHR32(MULT16_16(x,y),2);
      pyy = SHR32(MULT16_16(y,y),2);
      x = pcm[2*i+2];
      y = pcm[2*i+3];
      pxx += SHR32(MULT16_16(x,x),2);
      pxy += SHR32(MULT16_16(x,y),2);
      pyy += SHR32(MULT16_16(y,y),2);
      x = pcm[2*i+4];
      y = pcm[2*i+5];
      pxx += SHR32(MULT16_16(x,x),2);
      pxy += SHR32(MULT16_16(x,y),2);
      pyy += SHR32(MULT16_16(y,y),2);
      x = pcm[2*i+6];
      y = pcm[2*i+7];
      pxx += SHR32(MULT16_16(x,x),2);
      pxy += SHR32(MULT16_16(x,y),2);
      pyy += SHR32(MULT16_16(y,y),2);

      xx += SHR32(pxx, 10);
      xy += SHR32(pxy, 10);
      yy += SHR32(pyy, 10);
   }
682
683
684
685
686
687
#ifndef FIXED_POINT
   if (!(xx < 1e9f) || celt_isnan(xx) || !(yy < 1e9f) || celt_isnan(yy))
   {
      xy = xx = yy = 0;
   }
#endif
688
689
690
691
692
693
694
695
   mem->XX += MULT16_32_Q15(short_alpha, xx-mem->XX);
   mem->XY += MULT16_32_Q15(short_alpha, xy-mem->XY);
   mem->YY += MULT16_32_Q15(short_alpha, yy-mem->YY);
   mem->XX = MAX32(0, mem->XX);
   mem->XY = MAX32(0, mem->XY);
   mem->YY = MAX32(0, mem->YY);
   if (MAX32(mem->XX, mem->YY)>QCONST16(8e-4f, 18))
   {
696
697
698
      opus_val16 corr;
      opus_val16 ldiff;
      opus_val16 width;
699
700
701
702
703
704
705
706
      sqrt_xx = celt_sqrt(mem->XX);
      sqrt_yy = celt_sqrt(mem->YY);
      qrrt_xx = celt_sqrt(sqrt_xx);
      qrrt_yy = celt_sqrt(sqrt_yy);
      /* Inter-channel correlation */
      mem->XY = MIN32(mem->XY, sqrt_xx*sqrt_yy);
      corr = SHR32(frac_div32(mem->XY,EPSILON+MULT16_16(sqrt_xx,sqrt_yy)),16);
      /* Approximate loudness difference */
707
      ldiff = MULT16_16(Q15ONE, ABS16(qrrt_xx-qrrt_yy))/(EPSILON+qrrt_xx+qrrt_yy);
708
709
710
711
712
713
714
      width = MULT16_16_Q15(celt_sqrt(QCONST32(1.f,30)-MULT16_16(corr,corr)), ldiff);
      /* Smoothing over one second */
      mem->smoothed_width += (width-mem->smoothed_width)/frame_rate;
      /* Peak follower */
      mem->max_follower = MAX16(mem->max_follower-QCONST16(.02f,15)/frame_rate, mem->smoothed_width);
   }
   /*printf("%f %f %f %f %f ", corr/(float)Q15ONE, ldiff/(float)Q15ONE, width/(float)Q15ONE, mem->smoothed_width/(float)Q15ONE, mem->max_follower/(float)Q15ONE);*/
715
   return EXTRACT16(MIN32(Q15ONE, MULT16_16(20, mem->max_follower)));
716
717
}

718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
static int decide_fec(int useInBandFEC, int PacketLoss_perc, int last_fec, int mode, int *bandwidth, opus_int32 rate)
{
   int orig_bandwidth;
   if (!useInBandFEC || PacketLoss_perc == 0 || mode == MODE_CELT_ONLY)
      return 0;
   orig_bandwidth = *bandwidth;
   for (;;)
   {
      opus_int32 hysteresis;
      opus_int32 LBRR_rate_thres_bps;
      /* Compute threshold for using FEC at the current bandwidth setting */
      LBRR_rate_thres_bps = fec_thresholds[2*(*bandwidth - OPUS_BANDWIDTH_NARROWBAND)];
      hysteresis = fec_thresholds[2*(*bandwidth - OPUS_BANDWIDTH_NARROWBAND) + 1];
      if (last_fec == 1) LBRR_rate_thres_bps -= hysteresis;
      if (last_fec == 0) LBRR_rate_thres_bps += hysteresis;
      LBRR_rate_thres_bps = silk_SMULWB( silk_MUL( LBRR_rate_thres_bps,
            125 - silk_min( PacketLoss_perc, 25 ) ), SILK_FIX_CONST( 0.01, 16 ) );
      /* If loss <= 5%, we look at whether we have enough rate to enable FEC.
         If loss > 5%, we decrease the bandwidth until we can enable FEC. */
      if (rate > LBRR_rate_thres_bps)
         return 1;
      else if (PacketLoss_perc <= 5)
         return 0;
      else if (*bandwidth > OPUS_BANDWIDTH_NARROWBAND)
         (*bandwidth)--;
      else
         break;
   }
   /* Couldn't find any bandwidth to enable FEC, keep original bandwidth. */
   *bandwidth = orig_bandwidth;
   return 0;
}

751
static int compute_silk_rate_for_hybrid(int rate, int bandwidth, int frame20ms, int vbr, int fec, int channels) {
752
753
754
755
756
757
   int entry;
   int i;
   int N;
   int silk_rate;
   static int rate_table[][5] = {
  /*  |total| |-------- SILK------------|
758
759
              |-- No FEC -| |--- FEC ---|
               10ms   20ms   10ms   20ms */
760
      {    0,     0,     0,     0,     0},
761
762
763
764
765
766
      {12000, 10000, 10000, 11000, 11000},
      {16000, 13500, 13500, 15000, 15000},
      {20000, 16000, 16000, 18000, 18000},
      {24000, 18000, 18000, 21000, 21000},
      {32000, 22000, 22000, 28000, 28000},
      {64000, 38000, 38000, 50000, 50000}
767
   };
768
769
   /* Do the allocation per-channel. */
   rate /= channels;
770
   entry = 1 + frame20ms + 2*fec;
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
   N = sizeof(rate_table)/sizeof(rate_table[0]);
   for (i=1;i<N;i++)
   {
      if (rate_table[i][0] > rate) break;
   }
   if (i == N)
   {
      silk_rate = rate_table[i-1][entry];
      /* For now, just give 50% of the extra bits to SILK. */
      silk_rate += (rate-rate_table[i-1][0])/2;
   } else {
      opus_int32 lo, hi, x0, x1;
      lo = rate_table[i-1][entry];
      hi = rate_table[i][entry];
      x0 = rate_table[i-1][0];
      x1 = rate_table[i][0];
      silk_rate = (lo*(x1-rate) + hi*(rate-x0))/(x1-x0);
   }
   if (!vbr)
   {
791
792
      /* Tiny boost to SILK for CBR. We should probably tune this better. */
      silk_rate += 100;
793
   }
794
795
   if (bandwidth==OPUS_BANDWIDTH_SUPERWIDEBAND)
      silk_rate += 300;
796
   silk_rate *= channels;
797
   /* Small adjustment for stereo (calibrated for 32 kb/s, haven't tried other bitrates). */
798
   if (channels == 2 && rate >= 12000)
799
      silk_rate -= 1000;
800
801
802
   return silk_rate;
}

803
804
805
/* Returns the equivalent bitrate corresponding to 20 ms frames,
   complexity 10 VBR operation. */
static opus_int32 compute_equiv_rate(opus_int32 bitrate, int channels,
806
      int frame_rate, int vbr, int mode, int complexity, int loss)
807
{
808
   opus_int32 equiv;
809
810
   equiv = bitrate;
   /* Take into account overhead from smaller frames. */
811
812
   if (frame_rate > 50)
      equiv -= (40*channels+20)*(frame_rate - 50);
813
   /* CBR is about a 8% penalty for both SILK and CELT. */
814
   if (!vbr)
815
      equiv -= equiv/12;
816
   /* Complexity makes about 10% difference (from 0 to 10) in general. */
817
818
819
   equiv = equiv * (90+complexity)/100;
   if (mode == MODE_SILK_ONLY || mode == MODE_HYBRID)
   {
820
      /* SILK complexity 0-1 uses the non-delayed-decision NSQ, which
821
         costs about 20%. */
822
      if (complexity<2)
823
         equiv = equiv*4/5;
824
      equiv -= equiv*loss/(6*loss + 10);
825
826
827
828
829
830
831
   } else if (mode == MODE_CELT_ONLY) {
      /* CELT complexity 0-4 doesn't have the pitch filter, which costs
         about 10%. */
      if (complexity<5)
         equiv = equiv*9/10;
   } else {
      /* Mode not known yet */
832
833
      /* Half the SILK loss*/
      equiv -= equiv*loss/(12*loss + 20);
834
835
836
837
   }
   return equiv;
}

838
839
#ifndef DISABLE_FLOAT_API

840
int is_digital_silence(const opus_val16* pcm, int frame_size, int channels, int lsb_depth)
841
842
843
{
   int silence = 0;
   opus_val32 sample_max = 0;
844
845
846
#ifdef MLP_TRAINING
   return 0;
#endif
847
   sample_max = celt_maxabs16(pcm, frame_size*channels);
848
849
850

#ifdef FIXED_POINT
   silence = (sample_max == 0);
851
   (void)lsb_depth;
852
853
854
855
856
857
858
#else
   silence = (sample_max <= (opus_val16) 1 / (1 << lsb_depth));
#endif

   return silence;
}

859
#ifdef FIXED_POINT
860
static opus_val32 compute_frame_energy(const opus_val16 *pcm, int frame_size, int channels, int arch)
861
862
863
864
865
866
{
   int i;
   opus_val32 sample_max;
   int max_shift;
   int shift;
   opus_val32 energy = 0;
867
   int len = frame_size*channels;
868
   (void)arch;
869
   /* Max amplitude in the signal */
870
   sample_max = celt_maxabs16(pcm, len);
871
872

   /* Compute the right shift required in the MAC to avoid an overflow */
873
   max_shift = celt_ilog2(len);
874
875
876
   shift = IMAX(0, (celt_ilog2(sample_max) << 1) + max_shift - 28);

   /* Compute the energy */
877
   for (i=0; i<len; i++)
878
879
880
      energy += SHR32(MULT16_16(pcm[i], pcm[i]), shift);

   /* Normalize energy by the frame size and left-shift back to the original position */
881
   energy /= len;
882
883
884
885
   energy = SHL32(energy, shift);

   return energy;
}
886
#else
887
static opus_val32 compute_frame_energy(const opus_val16 *pcm, int frame_size, int channels, int arch)
888
{
889
890
   int len = frame_size*channels;
   return celt_inner_prod(pcm, pcm, len, arch)/len;
891
892
}
#endif
893

894
/* Decides if DTX should be turned on (=1) or off (=0) */
895
896
897
static int decide_dtx_mode(opus_int activity,            /* indicates if this frame contains speech/music */
                           int *nb_no_activity_frames    /* number of consecutive frames with no activity */
                           )
898

899
900
{
   if (!activity)
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
   {
      /* The number of consecutive DTX frames should be within the allowed bounds */
      (*nb_no_activity_frames)++;
      if (*nb_no_activity_frames > NB_SPEECH_FRAMES_BEFORE_DTX)
      {
         if (*nb_no_activity_frames <= (NB_SPEECH_FRAMES_BEFORE_DTX + MAX_CONSECUTIVE_DTX))
            /* Valid frame for DTX! */
            return 1;
         else
            (*nb_no_activity_frames) = NB_SPEECH_FRAMES_BEFORE_DTX;
      }
   } else
      (*nb_no_activity_frames) = 0;

   return 0;
}

918
919
#endif

920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
static opus_int32 encode_multiframe_packet(OpusEncoder *st,
                                           const opus_val16 *pcm,
                                           int nb_frames,
                                           int frame_size,
                                           unsigned char *data,
                                           opus_int32 out_data_bytes,
                                           int to_celt,
                                           int lsb_depth,
                                           int float_api)
{
   int i;
   int ret = 0;
   VARDECL(unsigned char, tmp_data);
   int bak_mode, bak_bandwidth, bak_channels, bak_to_mono;
   VARDECL(OpusRepacketizer, rp);
935
   int max_header_bytes;
936
937
938
939
940
941
   opus_int32 bytes_per_frame;
   opus_int32 cbr_bytes;
   opus_int32 repacketize_len;
   int tmp_len;
   ALLOC_STACK;

942
943
944
945
946
947
948
949
950
951
952
953
   /* Worst cases:
    * 2 frames: Code 2 with different compressed sizes
    * >2 frames: Code 3 VBR */
   max_header_bytes = nb_frames == 2 ? 3 : (2+(nb_frames-1)*2);

   if (st->use_vbr || st->user_bitrate_bps==OPUS_BITRATE_MAX)
      repacketize_len = out_data_bytes;
   else {
      cbr_bytes = 3*st->bitrate_bps/(3*8*st->Fs/(frame_size*nb_frames));
      repacketize_len = IMIN(cbr_bytes, out_data_bytes);
   }
   bytes_per_frame = IMIN(1276, 1+(repacketize_len-max_header_bytes)/nb_frames);
954

955
   ALLOC(tmp_data, nb_frames*bytes_per_frame, unsigned char);
956
957
958
959
960
961
962
963
964
965
966
   ALLOC(rp, 1, OpusRepacketizer);
   opus_repacketizer_init(rp);

   bak_mode = st->user_forced_mode;
   bak_bandwidth = st->user_bandwidth;
   bak_channels = st->force_channels;

   st->user_forced_mode = st->mode;
   st->user_bandwidth = st->bandwidth;
   st->force_channels = st->stream_channels;

967
   bak_to_mono = st->silk_mode.toMono;
968
969
970
971
972
973
974
975
   if (bak_to_mono)
      st->force_channels = 1;
   else
      st->prev_channels = st->stream_channels;

   for (i=0;i<nb_frames;i++)
   {
      st->silk_mode.toMono = 0;
976
      st->nonfinal_frame = i<(nb_frames-1);
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018

      /* When switching from SILK/Hybrid to CELT, only ask for a switch at the last frame */
      if (to_celt && i==nb_frames-1)
         st->user_forced_mode = MODE_CELT_ONLY;

      tmp_len = opus_encode_native(st, pcm+i*(st->channels*frame_size), frame_size,
         tmp_data+i*bytes_per_frame, bytes_per_frame, lsb_depth, NULL, 0, 0, 0, 0,
         NULL, float_api);

      if (tmp_len<0)
      {
         RESTORE_STACK;
         return OPUS_INTERNAL_ERROR;
      }

      ret = opus_repacketizer_cat(rp, tmp_data+i*bytes_per_frame, tmp_len);

      if (ret<0)
      {
         RESTORE_STACK;
         return OPUS_INTERNAL_ERROR;
      }
   }

   ret = opus_repacketizer_out_range_impl(rp, 0, nb_frames, data, repacketize_len, 0, !st->use_vbr);

   if (ret<0)
   {
      RESTORE_STACK;
      return OPUS_INTERNAL_ERROR;
   }

   /* Discard configs that were forced locally for the purpose of repacketization */
   st->user_forced_mode = bak_mode;
   st->user_bandwidth = bak_bandwidth;
   st->force_channels = bak_channels;
   st->silk_mode.toMono = bak_to_mono;

   RESTORE_STACK;
   return ret;
}

1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
static int compute_redundancy_bytes(opus_int32 max_data_bytes, opus_int32 bitrate_bps, int frame_rate, int channels)
{
   int redundancy_bytes_cap;
   int redundancy_bytes;
   opus_int32 redundancy_rate;
   int base_bits;
   opus_int32 available_bits;
   base_bits = (40*channels+20);

   /* Equivalent rate for 5 ms frames. */
   redundancy_rate = bitrate_bps + base_bits*(200 - frame_rate);
   /* For VBR, further increase the bitrate if we can afford it. It's pretty short
      and we'll avoid artefacts. */
   redundancy_rate = 3*redundancy_rate/2;
   redundancy_bytes = redundancy_rate/1600;

   /* Compute the max rate we can use given CBR or VBR with cap. */
   available_bits = max_data_bytes*8 - 2*base_bits;
   redundancy_bytes_cap = (available_bits*240/(240+48000/frame_rate) + base_bits)/8;
   redundancy_bytes = IMIN(redundancy_bytes, redundancy_bytes_cap);
   /* It we can't get enough bits for redundancy to be worth it, rely on the decoder PLC. */
   if (redundancy_bytes > 4 + 8*channels)
      redundancy_bytes = IMIN(257, redundancy_bytes);
   else
      redundancy_bytes = 0;
   return redundancy_bytes;
}

1047
opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
1048
                unsigned char *data, opus_int32 out_data_bytes, int lsb_depth,
1049
1050
                const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2,
                int analysis_channels, downmix_func downmix, int float_api)
1051
{
1052
1053
    void *silk_enc;
    CELTEncoder *celt_enc;
1054
    int i;
1055
    int ret=0;
1056
    opus_int32 nBytes;
1057
    ec_enc enc;
1058
    int bytes_target;
1059
    int prefill=0;
1060
    int start_band = 0;
1061
    int redundancy = 0;
1062
    int redundancy_bytes = 0; /* Number of bytes to use for redundancy frame */
1063
    int celt_to_silk = 0;
1064
    VARDECL(opus_val16, pcm_buf);
1065
1066
    int nb_compr_bytes;
    int to_celt = 0;
1067
    opus_uint32 redundant_rng = 0;
1068
    int cutoff_Hz, hp_freq_smth1;
1069
    int voice_est; /* Probability of voice in Q7 */
1070
    opus_int32 equiv_rate;
1071
    int delay_compensation;
1072
    int frame_rate;
1073
    opus_int32 max_rate; /* Max bitrate we're allowed to use */
1074
    int curr_bandwidth;
1075
    opus_val16 HB_gain;
1076
    opus_int32 max_data_bytes; /* Max number of bytes we're allowed to use */
1077
    int total_buffer;
1078
    opus_val16 stereo_width;
1079
    const CELTMode *celt_mode;
1080
#ifndef DISABLE_FLOAT_API
1081
    AnalysisInfo analysis_info;
1082
1083
    int analysis_read_pos_bak=-1;
    int analysis_read_subframe_bak=-1;
1084
    int is_silence = 0;
1085
#endif
1086
1087
    opus_int activity = VAD_NO_DECISION;

Jean-Marc Valin's avatar