From 1a2e7650f9b18519c7a02281507958b9246a9800 Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin <jmvalin@jmvalin.ca> Date: Sun, 6 Nov 2011 23:27:16 -0500 Subject: [PATCH] Adds an analysis function to control VBR Conflicts: src/opus_encoder.c --- celt/celt.c | 141 ++++++++++++++++++++++-------- celt/celt.h | 4 + src/analysis.c | 213 +++++++++++++++++++++++++++++++++++++++++++++ src/opus_encoder.c | 48 ++++++---- 4 files changed, 352 insertions(+), 54 deletions(-) create mode 100644 src/analysis.c diff --git a/celt/celt.c b/celt/celt.c index d20b02536..97d9edc0c 100644 --- a/celt/celt.c +++ b/celt/celt.c @@ -177,6 +177,8 @@ struct OpusCustomEncoder { int prefilter_tapset_old; #endif int consec_transient; + int frame_tonality; + int tonality_slope; opus_val32 preemph_memE[2]; opus_val32 preemph_memD[2]; @@ -699,6 +701,9 @@ static int tf_analysis(const CELTMode *m, int len, int C, int isTransient, return tf_select; } +extern int boost_band[2]; +extern float boost_amount[2]; + static void tf_encode(int start, int end, int isTransient, int *tf_res, int LM, int tf_select, ec_enc *enc) { int curr, i; @@ -790,7 +795,7 @@ static void init_caps(const CELTMode *m,int *cap,int LM,int C) } static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X, - const opus_val16 *bandLogE, int end, int LM, int C, int N0) + const opus_val16 *bandLogE, int end, int LM, int C, int N0, float tonality_slope) { int i; opus_val32 diff=0; @@ -831,6 +836,7 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X, result of a bug in the loop above */ diff /= 2*C*(end-1); /*printf("%f\n", diff);*/ +#if 1 if (diff > QCONST16(2.f, DB_SHIFT)) trim_index--; if (diff > QCONST16(8.f, DB_SHIFT)) @@ -839,11 +845,23 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X, trim_index++; if (diff < -QCONST16(10.f, DB_SHIFT)) trim_index++; - +#endif +#if 0 + if (tonality_slope > .15) + trim_index--; + if (tonality_slope > .3) + trim_index--; + if (tonality_slope < -.15) + trim_index++; + if (tonality_slope < -.3) + trim_index++; +#endif + //printf("%f\n", tonality_slope); if (trim_index<0) trim_index = 0; if (trim_index>10) trim_index = 10; + //printf("%f %d\n", tonality_slope, trim_index); #ifdef FUZZING trim_index = rand()%11; #endif @@ -1291,6 +1309,14 @@ int celt_encode_with_ec(CELTEncoder * restrict st, const opus_val16 * pcm, int f st->spread_decision = spreading_decision(st->mode, X, &st->tonal_average, st->spread_decision, &st->hf_average, &st->tapset_decision, pf_on&&!shortBlocks, effEnd, C, M); + /*if (st->frame_tonality > .7*32768) + st->spread_decision = SPREAD_NONE; + else if (st->frame_tonality > .3*32768) + st->spread_decision = SPREAD_LIGHT; + else if (st->frame_tonality > .1*32768) + st->spread_decision = SPREAD_NORMAL; + else + st->spread_decision = SPREAD_AGGRESSIVE;*/ } ec_enc_icdf(enc, st->spread_decision, spread_icdf, 5); } @@ -1336,6 +1362,18 @@ int celt_encode_with_ec(CELTEncoder * restrict st, const opus_val16 * pcm, int f #endif } } + if (0) + { + if (boost_amount[0]>.2) + offsets[boost_band[0]]+=2; + if (boost_amount[0]>.4) + offsets[boost_band[0]]+=2; + if (boost_amount[1]>.2) + offsets[boost_band[1]]+=2; + if (boost_amount[1]>.4) + offsets[boost_band[1]]+=2; + //printf("%f %f\n", boost_amount[0], boost_amount[1]); + } dynalloc_logp = 6; total_bits<<=BITRES; total_boost = 0; @@ -1374,18 +1412,48 @@ int celt_encode_with_ec(CELTEncoder * restrict st, const opus_val16 * pcm, int f if (tell+(6<<BITRES) <= total_bits - total_boost) { alloc_trim = alloc_trim_analysis(st->mode, X, bandLogE, - st->end, LM, C, N); + st->end, LM, C, N, st->tonality_slope/16384.); ec_enc_icdf(enc, alloc_trim, trim_icdf, 7); tell = ec_tell_frac(enc); } + if (C==2) + { + int effectiveRate; + + /* Always use MS for 2.5 ms frames until we can do a better analysis */ + if (LM!=0) + dual_stereo = stereo_analysis(st->mode, X, LM, N); + + /* Account for coarse energy */ + effectiveRate = (8*effectiveBytes - 80)>>LM; + + /* effectiveRate in kb/s */ + effectiveRate = 2*effectiveRate/5; + if (effectiveRate<35) + intensity = 8; + else if (effectiveRate<50) + intensity = 12; + else if (effectiveRate<68) + intensity = 16; + else if (effectiveRate<84) + intensity = 18; + else if (effectiveRate<102) + intensity = 19; + else if (effectiveRate<130) + intensity = 20; + else + intensity = 100; + intensity = IMIN(st->end,IMAX(st->start, intensity)); + } + /* Variable bitrate */ if (vbr_rate>0) { opus_val16 alpha; opus_int32 delta; /* The target rate in 8th bits per frame */ - opus_int32 target; + opus_int32 target, new_target; opus_int32 min_allowed; int lm_diff = st->mode->maxLM - LM; @@ -1397,14 +1465,30 @@ int celt_encode_with_ec(CELTEncoder * restrict st, const opus_val16 * pcm, int f target += (st->vbr_offset>>lm_diff); #ifdef FIXED_POINT - target = SHL32(MULT16_32_Q15(target, SUB16(tf_estimate, QCONST16(0.05, 14))),1); + new_target = SHL32(MULT16_32_Q15(target, SUB16(tf_estimate, QCONST16(0.05, 14))),1); #else - target *= tf_estimate-.05; + new_target = target*(tf_estimate-.05); #endif + if (1) { + int tonal_target; + float tonal; + int coded_bins; + int coded_bands; + tonal = st->frame_tonality/32768.; + tonal -= .06; + coded_bands = st->lastCodedBands ? st->lastCodedBands : st->mode->nbEBands; + //coded_bands = IMIN(coded_bands, st->mode->nbEBands-1); + coded_bins = st->mode->eBands[coded_bands]<<LM; + if (C==2) + coded_bins += st->mode->eBands[IMIN(intensity, coded_bands)]<<LM; + tonal_target = target + (coded_bins<<BITRES)*1.55*tonal; + new_target = IMAX(tonal_target,new_target); + } + /* The current offset is removed from the target and the space used so far is added*/ - target=target+tell; - + target=new_target+tell; + //printf("%d\n", target); /* In VBR mode the frame size must not be reduced so much that it would result in the encoder running out of bits. The margin of 2 bytes ensures that none of the bust-prevention logic @@ -1464,35 +1548,6 @@ int celt_encode_with_ec(CELTEncoder * restrict st, const opus_val16 * pcm, int f /* This moves the raw bits to take into account the new compressed size */ ec_enc_shrink(enc, nbCompressedBytes); } - if (C==2) - { - int effectiveRate; - - /* Always use MS for 2.5 ms frames until we can do a better analysis */ - if (LM!=0) - dual_stereo = stereo_analysis(st->mode, X, LM, N); - - /* Account for coarse energy */ - effectiveRate = (8*effectiveBytes - 80)>>LM; - - /* effectiveRate in kb/s */ - effectiveRate = 2*effectiveRate/5; - if (effectiveRate<35) - intensity = 8; - else if (effectiveRate<50) - intensity = 12; - else if (effectiveRate<68) - intensity = 16; - else if (effectiveRate<84) - intensity = 18; - else if (effectiveRate<102) - intensity = 19; - else if (effectiveRate<130) - intensity = 20; - else - intensity = 100; - intensity = IMIN(st->end,IMAX(st->start, intensity)); - } /* Bit allocation */ ALLOC(fine_quant, st->mode->nbEBands, int); @@ -1862,6 +1917,18 @@ int opus_custom_encoder_ctl(CELTEncoder * restrict st, int request, ...) st->signalling = value; } break; + case CELT_SET_TONALITY_REQUEST: + { + opus_int32 value = va_arg(ap, opus_int32); + st->frame_tonality = value; + } + break; + case CELT_SET_TONALITY_SLOPE_REQUEST: + { + opus_int32 value = va_arg(ap, opus_int32); + st->tonality_slope = value; + } + break; case CELT_GET_MODE_REQUEST: { const CELTMode ** value = va_arg(ap, const CELTMode**); diff --git a/celt/celt.h b/celt/celt.h index da4464ced..553670c90 100644 --- a/celt/celt.h +++ b/celt/celt.h @@ -86,6 +86,10 @@ extern "C" { #define CELT_SET_SIGNALLING_REQUEST 10016 #define CELT_SET_SIGNALLING(x) CELT_SET_SIGNALLING_REQUEST, __opus_check_int(x) +#define CELT_SET_TONALITY_REQUEST 10018 +#define CELT_SET_TONALITY(x) CELT_SET_TONALITY_REQUEST, __opus_check_int(x) +#define CELT_SET_TONALITY_SLOPE_REQUEST 10020 +#define CELT_SET_TONALITY_SLOPE(x) CELT_SET_TONALITY_SLOPE_REQUEST, __opus_check_int(x) /* Encoder stuff */ diff --git a/src/analysis.c b/src/analysis.c new file mode 100644 index 000000000..21a4a106c --- /dev/null +++ b/src/analysis.c @@ -0,0 +1,213 @@ +/* Copyright (c) 2011 Xiph.Org Foundation + Written by Jean-Marc Valin */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "kiss_fft.h" +#include "celt.h" +#include "modes.h" +#include "arch.h" +#include "quant_bands.h" +#include <stdio.h> + +#define NB_FRAMES 8 + +#define NB_TBANDS 17 +static const int tbands[NB_TBANDS+1] = { + 4, 6, 8, 10, 12, 14, 16, 20, 24, 32, 40, 48, 56, 68, 80, 96, 120, 156 +}; + +typedef struct { + float angle[240]; + float d_angle[240]; + float d2_angle[240]; + float prev_band_tonality[NB_TBANDS]; + float prev_tonality; + float E[NB_FRAMES][NB_TBANDS]; + int E_count; +} TonalityAnalysisState; + +int boost_band[2]; +float boost_amount[2]; + +float tonality_analysis(TonalityAnalysisState *tonal, CELTEncoder *celt_enc, const opus_val16 *x, int C, float *tslope) +{ + int i, b; + const CELTMode *mode; + const kiss_fft_state *kfft; + kiss_fft_cpx in[480], out[480]; + const opus_val16 *window; + int overlap = 240; + int N = 480, N2=240; + float * restrict A = tonal->angle; + float * restrict dA = tonal->d_angle; + float * restrict d2A = tonal->d2_angle; + float tonality[240]; + float band_tonality[NB_TBANDS]; + float frame_tonality; + const float pi4 = M_PI*M_PI*M_PI*M_PI; + float slope=0; + float max_tonality=-1; + int max_band=0; + celt_encoder_ctl(celt_enc, CELT_GET_MODE(&mode)); + + kfft = mode->mdct.kfft[0]; + window = mode->window; + if (C==1) + { + for (i=0;i<N2;i++) + { + float w = .5-.5*cos(M_PI*(i+1)/N2); + in[i].r = MULT16_16(w, x[i]); + in[i].i = MULT16_16(w, x[N-N2+i]); + in[N-i-1].r = MULT16_16(w, x[N-i-1]); + in[N-i-1].i = MULT16_16(w, x[2*N-N2-i-1]); + } + } else { + for (i=0;i<N2;i++) + { + float w = .5-.5*cos(M_PI*(i+1)/N2); + in[i].r = MULT16_16(w, x[2*i]+x[2*i+1]); + in[i].i = MULT16_16(w, x[2*(N-N2+i)]+x[2*(N-N2+i)+1]); + in[N-i-1].r = MULT16_16(w, x[2*(N-i-1)]+x[2*(N-i-1)+1]); + in[N-i-1].i = MULT16_16(w, x[2*(2*N-N2-i-1)]+x[2*(2*N-N2-i-1)+1]); + } + } + opus_fft(kfft, in, out); + + for (i=1;i<N2;i++) + { + float X1r, X2r, X1i, X2i; + float angle, d_angle, d2_angle; + float angle2, d_angle2, d2_angle2; + float mod1, mod2, avg_mod; + X1r = out[i].r+out[N-i].r; + X1i = out[i].i-out[N-i].i; + X2r = out[i].i+out[N-i].i; + X2i = out[N-i].r-out[i].r; + //printf("%f\n", X1r); + angle = (.5/M_PI)*atan2(X1i, X1r); + d_angle = angle - A[i]; + d2_angle = d_angle - dA[i]; + + angle2 = (.5/M_PI)*atan2(X2i, X2r); + d_angle2 = angle2 - angle; + d2_angle2 = d_angle2 - d_angle; + //printf("%f ", angle2); + + //printf("%f ", d2_angle); + mod1 = d2_angle - floor(.5+d2_angle); + //printf("%f ", mod1); + mod1 *= mod1; + mod1 *= mod1; + mod2 = d2_angle2 - floor(.5+d2_angle2); + mod2 *= mod2; + mod2 *= mod2; + + avg_mod = .25*(d2A[i]+2*mod1+mod2); + tonality[i] = 1./(1+40*16*pi4*avg_mod)-.015; + + A[i] = angle2; + dA[i] = d_angle2; + d2A[i] = mod2; + } + + frame_tonality = 0; + for (b=0;b<NB_TBANDS;b++) + { + float E=0, tE=0; + float L1, L2; + float stationarity; + for (i=tbands[b];i<tbands[b+1];i++) + { + float binE = out[i].r*out[i].r + out[N-i].r*out[N-i].r + + out[i].i*out[i].i + out[N-i].i*out[N-i].i; + E += binE; + tE += binE*tonality[i]; + } + tonal->E[tonal->E_count][b] = E; + L1=L2=0; + for (i=0;i<NB_FRAMES;i++) + { + L1 += sqrt(tonal->E[i][b]); + L2 += tonal->E[i][b]; + } + + stationarity = MIN16(0.99,L1/sqrt(EPSILON+NB_FRAMES*L2)); + stationarity *= stationarity; + stationarity *= stationarity; + //fprintf(stderr, "%f %f %f\n", L1, L2, stationarity); + //fprintf(stderr, "%f %f\n", tE, E); + //fprintf(stderr, "%f %f\n", stationarity, ); + //band_tonality[b] = tE/(1e-15+E); + band_tonality[b] = MAX16(tE/(1e-15+E), stationarity*tonal->prev_band_tonality[b]); + //if (band_tonality[b]>1) + // printf("%f %f %f\n", L1, L2, stationarity); + //fprintf(stdout, "%f ", band_tonality[b]); + if (b>=7) + frame_tonality += band_tonality[b]; + slope += band_tonality[b]*(b-8); + if (band_tonality[b] > boost_amount[1] && b>=7 && b < NB_TBANDS-1) + { + if (band_tonality[b] > boost_amount[0]) + { + boost_amount[1] = boost_amount[0]; + boost_band[1] = boost_band[0]; + boost_amount[0] = band_tonality[b]; + boost_band[0] = b; + } else { + boost_amount[1] = band_tonality[b]; + boost_band[1] = b; + } + } + tonal->prev_band_tonality[b] = band_tonality[b]; + } + frame_tonality /= NB_TBANDS-7; + frame_tonality = MAX16(frame_tonality, tonal->prev_tonality*.8); + //fprintf(stdout, "%f\n", frame_tonality); + tonal->prev_tonality = frame_tonality; + boost_amount[0] -= frame_tonality+.2; + boost_amount[1] -= frame_tonality+.2; + if (band_tonality[boost_band[0]] < band_tonality[boost_band[0]+1]+.15 + || band_tonality[boost_band[0]] < band_tonality[boost_band[0]-1]+.15) + boost_amount[0]=0; + if (band_tonality[boost_band[1]] < band_tonality[boost_band[1]+1]+.15 + || band_tonality[boost_band[1]] < band_tonality[boost_band[1]-1]+.15) + boost_amount[1]=0; + + //boost_band = 16; + //boost_amount = .6; + //printf("%d %f %f\n", max_band, max_tonality, frame_tonality); + slope /= 8*8; + *tslope = slope; + //fprintf(stdout, "%f %f\n", frame_tonality, slope); + + tonal->E_count = (tonal->E_count+1)%NB_FRAMES; + return frame_tonality; +} diff --git a/src/opus_encoder.c b/src/opus_encoder.c index a7622f905..af5460583 100644 --- a/src/opus_encoder.c +++ b/src/opus_encoder.c @@ -40,6 +40,7 @@ #include "arch.h" #include "opus_private.h" #include "os_support.h" +#include "analysis.c" #include "tuning_parameters.h" #ifdef FIXED_POINT @@ -101,7 +102,7 @@ static const opus_int32 mono_music_bandwidth_thresholds[8] = { 14000, 1000, /* MB not allowed */ 18000, 2000, /* MB<->WB */ 24000, 2000, /* WB<->SWB */ - 33000, 2000, /* SWB<->FB */ + 31000, 2000, /* SWB<->FB */ }; static const opus_int32 stereo_voice_bandwidth_thresholds[8] = { 11000, 1000, /* NB<->MB */ @@ -472,6 +473,7 @@ opus_int32 opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_s opus_int32 max_rate; int curr_bandwidth; opus_int32 max_data_bytes; + int extra_buffer, total_buffer; VARDECL(opus_val16, tmp_prefill); ALLOC_STACK; @@ -497,7 +499,11 @@ opus_int32 opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_s delay_compensation = 0; else delay_compensation = st->delay_compensation; - + if (1) + { + total_buffer = IMAX(240, delay_compensation); + } + extra_buffer = total_buffer-delay_compensation; st->bitrate_bps = user_bitrate_to_bitrate(st, frame_size, max_data_bytes); frame_rate = st->Fs/frame_size; @@ -823,9 +829,9 @@ opus_int32 opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_s ec_enc_init(&enc, data, max_data_bytes-1); - ALLOC(pcm_buf, (delay_compensation+frame_size)*st->channels, opus_val16); - for (i=0;i<delay_compensation*st->channels;i++) - pcm_buf[i] = st->delay_buffer[(st->encoder_buffer-delay_compensation)*st->channels+i]; + ALLOC(pcm_buf, (total_buffer+frame_size)*st->channels, opus_val16); + for (i=0;i<total_buffer*st->channels;i++) + pcm_buf[i] = st->delay_buffer[(st->encoder_buffer-total_buffer)*st->channels+i]; if (st->mode == MODE_CELT_ONLY) hp_freq_smth1 = silk_LSHIFT( silk_lin2log( VARIABLE_HP_MIN_CUTOFF_HZ ), 8 ); @@ -840,12 +846,20 @@ opus_int32 opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_s if (st->application == OPUS_APPLICATION_VOIP) { - hp_cutoff(pcm, cutoff_Hz, &pcm_buf[delay_compensation*st->channels], st->hp_mem, frame_size, st->channels, st->Fs); + hp_cutoff(pcm, cutoff_Hz, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs); } else { for (i=0;i<frame_size*st->channels;i++) - pcm_buf[delay_compensation*st->channels + i] = pcm[i]; + pcm_buf[total_buffer*st->channels + i] = pcm[i]; } + static TonalityAnalysisState tonal; + float tonality; + float tonality_slope; + tonality_analysis(&tonal, celt_enc, pcm_buf, st->channels, &tonality_slope); + tonality = tonality_analysis(&tonal, celt_enc, pcm_buf+(st->Fs/100)*st->channels, st->channels, &tonality_slope); + celt_encoder_ctl(celt_enc, CELT_SET_TONALITY(32768*tonality)); + celt_encoder_ctl(celt_enc, CELT_SET_TONALITY_SLOPE(16384*tonality_slope)); + /* SILK processing */ if (st->mode != MODE_CELT_ONLY) { @@ -951,10 +965,10 @@ opus_int32 opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_s } #ifdef FIXED_POINT - pcm_silk = pcm_buf+delay_compensation*st->channels; + pcm_silk = pcm_buf+total_buffer*st->channels; #else for (i=0;i<frame_size*st->channels;i++) - pcm_silk[i] = FLOAT2INT16(pcm_buf[delay_compensation*st->channels + i]); + pcm_silk[i] = FLOAT2INT16(pcm_buf[total_buffer*st->channels + i]); #endif ret = silk_Encode( silk_enc, &st->silk_mode, pcm_silk, frame_size, &enc, &nBytes, 0 ); if( ret ) { @@ -1055,13 +1069,13 @@ opus_int32 opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_s if (st->mode != MODE_SILK_ONLY && st->mode != st->prev_mode && st->prev_mode > 0) { for (i=0;i<st->channels*st->Fs/400;i++) - tmp_prefill[i] = st->delay_buffer[(st->encoder_buffer-st->delay_compensation-st->Fs/400)*st->channels + i]; + tmp_prefill[i] = st->delay_buffer[(st->encoder_buffer-total_buffer-st->Fs/400)*st->channels + i]; } - for (i=0;i<st->channels*(st->encoder_buffer-(frame_size+delay_compensation));i++) + for (i=0;i<st->channels*(st->encoder_buffer-(frame_size+total_buffer));i++) st->delay_buffer[i] = st->delay_buffer[i+st->channels*frame_size]; for (;i<st->encoder_buffer*st->channels;i++) - st->delay_buffer[i] = pcm_buf[(frame_size+delay_compensation-st->encoder_buffer)*st->channels+i]; + st->delay_buffer[i] = pcm_buf[(frame_size+total_buffer-st->encoder_buffer)*st->channels+i]; if (st->mode != MODE_HYBRID || st->stream_channels==1) @@ -1082,7 +1096,7 @@ opus_int32 opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_s g1 *= (1./16384); g2 *= (1./16384); #endif - stereo_fade(pcm_buf, pcm_buf, g1, g2, celt_mode->overlap, + stereo_fade(pcm_buf+extra_buffer*st->channels, pcm_buf+extra_buffer*st->channels, g1, g2, celt_mode->overlap, frame_size, st->channels, celt_mode->window, st->Fs); st->hybrid_stereo_width_Q14 = st->silk_mode.stereoWidth_Q14; } @@ -1134,7 +1148,7 @@ opus_int32 opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_s int err; celt_encoder_ctl(celt_enc, CELT_SET_START_BAND(0)); celt_encoder_ctl(celt_enc, OPUS_SET_VBR(0)); - err = celt_encode_with_ec(celt_enc, pcm_buf, st->Fs/200, data+nb_compr_bytes, redundancy_bytes, NULL); + err = celt_encode_with_ec(celt_enc, pcm_buf+extra_buffer*st->channels, st->Fs/200, data+nb_compr_bytes, redundancy_bytes, NULL); if (err < 0) { RESTORE_STACK; @@ -1160,7 +1174,7 @@ opus_int32 opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_s /* If false, we already busted the budget and we'll end up with a "PLC packet" */ if (ec_tell(&enc) <= 8*nb_compr_bytes) { - ret = celt_encode_with_ec(celt_enc, pcm_buf, frame_size, NULL, nb_compr_bytes, &enc); + ret = celt_encode_with_ec(celt_enc, pcm_buf+extra_buffer*st->channels, frame_size, NULL, nb_compr_bytes, &enc); if (ret < 0) { RESTORE_STACK; @@ -1183,9 +1197,9 @@ opus_int32 opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_s celt_encoder_ctl(celt_enc, CELT_SET_PREDICTION(0)); /* NOTE: We could speed this up slightly (at the expense of code size) by just adding a function that prefills the buffer */ - celt_encode_with_ec(celt_enc, pcm_buf+st->channels*(frame_size-N2-N4), N4, dummy, 2, NULL); + celt_encode_with_ec(celt_enc, pcm_buf+st->channels*(extra_buffer+frame_size-N2-N4), N4, dummy, 2, NULL); - err = celt_encode_with_ec(celt_enc, pcm_buf+st->channels*(frame_size-N2), N2, data+nb_compr_bytes, redundancy_bytes, NULL); + err = celt_encode_with_ec(celt_enc, pcm_buf+st->channels*(extra_buffer+frame_size-N2), N2, data+nb_compr_bytes, redundancy_bytes, NULL); if (err < 0) { RESTORE_STACK; -- GitLab