diff --git a/Makefile.am b/Makefile.am index b090ca09ff255443f71cf00c2f6366a533f9d3c6..edbcc42eb2e35fe7aa3e29ddc848f1b764d90b32 100644 --- a/Makefile.am +++ b/Makefile.am @@ -20,9 +20,10 @@ if FIXED_POINT SILK_SOURCES += $(SILK_SOURCES_FIXED) else SILK_SOURCES += $(SILK_SOURCES_FLOAT) -OPUS_SOURCES += $(OPUS_SOURCES_FLOAT) endif +OPUS_SOURCES += $(OPUS_SOURCES_FLOAT) + if CPU_ARM CELT_SOURCES += $(CELT_SOURCES_ARM) endif diff --git a/celt/arch.h b/celt/arch.h index 78e2635f006f39bddf94631a71e5d5cc72970e4f..f9c98567aa848944eae08fc25423c9d65db58c23 100644 --- a/celt/arch.h +++ b/celt/arch.h @@ -185,6 +185,7 @@ typedef float celt_ener; #define MAC16_32_Q15(c,a,b) ((c)+(a)*(b)) #define MULT16_16_Q11_32(a,b) ((a)*(b)) +#define MULT16_16_Q11(a,b) ((a)*(b)) #define MULT16_16_Q13(a,b) ((a)*(b)) #define MULT16_16_Q14(a,b) ((a)*(b)) #define MULT16_16_Q15(a,b) ((a)*(b)) diff --git a/celt/celt.h b/celt/celt.h index 0911c72f721066a71d48f35a06d7b5bd9fb42fa0..cdb76c8b920c7477aed61e0f1b5068b7040b350e 100644 --- a/celt/celt.h +++ b/celt/celt.h @@ -52,11 +52,11 @@ extern "C" { typedef struct { int valid; - opus_val16 tonality; - opus_val16 tonality_slope; - opus_val16 noisiness; - opus_val16 activity; - opus_val16 music_prob; + float tonality; + float tonality_slope; + float noisiness; + float activity; + float music_prob; int bandwidth; }AnalysisInfo; @@ -109,10 +109,7 @@ typedef struct { #define OPUS_SET_LFE_REQUEST 10024 #define OPUS_SET_LFE(x) OPUS_SET_LFE_REQUEST, __opus_check_int(x) -#define OPUS_SET_ENERGY_SAVE_REQUEST 10026 -#define OPUS_SET_ENERGY_SAVE(x) OPUS_SET_ENERGY_SAVE_REQUEST, __opus_check_val16_ptr(x) - -#define OPUS_SET_ENERGY_MASK_REQUEST 10028 +#define OPUS_SET_ENERGY_MASK_REQUEST 10026 #define OPUS_SET_ENERGY_MASK(x) OPUS_SET_ENERGY_MASK_REQUEST, __opus_check_val16_ptr(x) /* Encoder stuff */ @@ -193,6 +190,9 @@ extern const signed char tf_select_table[4][8]; int resampling_factor(opus_int32 rate); +void preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RESTRICT inp, + int N, int CC, int upsample, const opus_val16 *coef, celt_sig *mem, int clip); + void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N, opus_val16 g0, opus_val16 g1, int tapset0, int tapset1, const opus_val16 *window, int overlap); diff --git a/celt/celt_encoder.c b/celt/celt_encoder.c index 31bea1bbaa8b9a9c172c7c12ba22bf824a0412fd..241fb3721e832b3d3c7c32bdbf3b109eca0f0011 100644 --- a/celt/celt_encoder.c +++ b/celt/celt_encoder.c @@ -111,7 +111,6 @@ struct OpusCustomEncoder { opus_val32 overlap_max; opus_val16 stereo_saving; int intensity; - opus_val16 *energy_save; opus_val16 *energy_mask; opus_val16 spec_avg; @@ -452,7 +451,7 @@ static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig * OPUS } -static void preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RESTRICT inp, +void preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RESTRICT inp, int N, int CC, int upsample, const opus_val16 *coef, celt_sig *mem, int clip) { int i; @@ -744,7 +743,7 @@ static void tf_encode(int start, int end, int isTransient, int *tf_res, int LM, static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X, const opus_val16 *bandLogE, int end, int LM, int C, int N0, AnalysisInfo *analysis, opus_val16 *stereo_saving, opus_val16 tf_estimate, - int intensity) + int intensity, opus_val16 surround_trim) { int i; opus_val32 diff=0; @@ -818,11 +817,12 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X, if (diff < -QCONST16(10.f, DB_SHIFT)) trim_index++; trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), SHR16(diff+QCONST16(1.f, DB_SHIFT),DB_SHIFT-8)/6 )); + trim -= SHR16(surround_trim, DB_SHIFT-8); trim -= 2*SHR16(tf_estimate, 14-8); -#ifndef FIXED_POINT +#ifndef DISABLE_FLOAT_API if (analysis->valid) { - trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), 2*(analysis->tonality_slope+.05f))); + trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), QCONST16(2.f, 8)*(analysis->tonality_slope+.05f))); } #endif @@ -877,7 +877,7 @@ static int stereo_analysis(const CELTMode *m, const celt_norm *X, static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16 *bandLogE2, int nbEBands, int start, int end, int C, int *offsets, int lsb_depth, const opus_int16 *logN, int isTransient, int vbr, int constrained_vbr, const opus_int16 *eBands, int LM, - int effectiveBytes, opus_int32 *tot_boost_, int lfe) + int effectiveBytes, opus_int32 *tot_boost_, int lfe, opus_val16 *surround_dynalloc) { int i, c; opus_int32 tot_boost=0; @@ -940,6 +940,8 @@ static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16 follower[i] = MAX16(0, bandLogE[i]-follower[i]); } } + for (i=start;i<end;i++) + follower[i] = MAX16(follower[i], surround_dynalloc[i]); /* For non-transient CBR/CVBR frames, halve the dynalloc contribution */ if ((!vbr || constrained_vbr)&&!isTransient) { @@ -1140,7 +1142,7 @@ static int compute_vbr(const CELTMode *mode, AnalysisInfo *analysis, opus_int32 target = base_target; /*printf("%f %f %f %f %d %d ", st->analysis.activity, st->analysis.tonality, tf_estimate, st->stereo_saving, tot_boost, coded_bands);*/ -#ifndef FIXED_POINT +#ifndef DISABLE_FLOAT_API if (analysis->valid && analysis->activity<.4) target -= (opus_int32)((coded_bins<<BITRES)*(.4f-analysis->activity)); #endif @@ -1165,7 +1167,7 @@ static int compute_vbr(const CELTMode *mode, AnalysisInfo *analysis, opus_int32 QCONST16(0.02f,14) : QCONST16(0.04f,14); target += (opus_int32)SHL32(MULT16_32_Q15(tf_estimate-tf_calibration, target),1); -#ifndef FIXED_POINT +#ifndef DISABLE_FLOAT_API /* Apply tonality boost */ if (analysis->valid && !lfe) { @@ -1291,6 +1293,8 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, int transient_got_disabled=0; opus_val16 surround_masking=0; opus_val16 temporal_vbr=0; + opus_val16 surround_trim = 0; + VARDECL(opus_val16, surround_dynalloc); ALLOC_STACK; mode = st->mode; @@ -1526,37 +1530,83 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, } } amp2Log2(mode, effEnd, st->end, bandE, bandLogE, C); - if (st->energy_save) - { - opus_val16 offset = shortBlocks?HALF16(SHL16(LM, DB_SHIFT)):0; -#ifdef FIXED_POINT - /* Compensate for the 1/8 gain we apply in the fixed-point downshift to avoid overflows. */ - offset -= QCONST16(3.0f, DB_SHIFT); -#endif - for(i=0;i<C*nbEBands;i++) - st->energy_save[i]=bandLogE[i]-offset; - st->energy_save=NULL; - } + + ALLOC(surround_dynalloc, C*nbEBands, opus_val16); + for(i=0;i<st->end;i++) + surround_dynalloc[i] = 0; /* This computes how much masking takes place between surround channels */ - if (st->energy_mask&&!st->lfe) + if (st->start==0&&st->energy_mask&&!st->lfe) { + int mask_end; + int midband; + int count_dynalloc; opus_val32 mask_avg=0; - opus_val16 offset = shortBlocks?HALF16(SHL16(LM, DB_SHIFT)):0; + opus_val32 diff=0; + int count=0; + mask_end = st->lastCodedBands; for (c=0;c<C;c++) { - opus_val16 followE, followMask; - followE = followMask = -QCONST16(14.f, DB_SHIFT); - for(i=0;i<st->end;i++) + for(i=0;i<mask_end;i++) { - /* We use a simple follower to approximate the masking spreading function. */ - followE = MAX16(followE-QCONST16(1.f, DB_SHIFT), bandLogE[nbEBands*c+i]-offset); - followMask = MAX16(followMask-QCONST16(1.f, DB_SHIFT), st->energy_mask[nbEBands*c+i]); - mask_avg += followE-followMask; + opus_val16 mask; + mask = MAX16(MIN16(st->energy_mask[nbEBands*c+i], + QCONST16(.25f, DB_SHIFT)), -QCONST16(2.0f, DB_SHIFT)); + if (mask > 0) + mask = HALF16(mask); + mask_avg += MULT16_16(mask, eBands[i+1]-eBands[i]); + count += eBands[i+1]-eBands[i]; + diff += MULT16_16(mask, 1+2*i-mask_end); } } - surround_masking = DIV32_16(mask_avg,C*st->end) + QCONST16(.7f, DB_SHIFT); - surround_masking = MIN16(MAX16(surround_masking, -QCONST16(2.f, DB_SHIFT)), QCONST16(.2f, DB_SHIFT)); - surround_masking -= HALF16(HALF16(surround_masking)); + mask_avg = DIV32_16(mask_avg,count); + mask_avg += QCONST16(.2f, DB_SHIFT); + diff = diff*6/(C*(mask_end-1)*(mask_end+1)*mask_end); + /* Again, being conservative */ + diff = HALF32(diff); + diff = MAX32(MIN32(diff, QCONST32(.031f, DB_SHIFT)), -QCONST32(.031f, DB_SHIFT)); + /* Find the band that's in the middle of the coded spectrum */ + for (midband=0;eBands[midband+1] < eBands[mask_end]/2;midband++); + count_dynalloc=0; + for(i=0;i<mask_end;i++) + { + opus_val32 lin; + opus_val16 unmask; + lin = mask_avg + diff*(i-midband); + if (C==2) + unmask = MAX16(st->energy_mask[i], st->energy_mask[nbEBands+i]); + else + unmask = st->energy_mask[i]; + unmask = MIN16(unmask, QCONST16(.0f, DB_SHIFT)); + unmask -= lin; + if (unmask > QCONST16(.25f, DB_SHIFT)) + { + surround_dynalloc[i] = unmask - QCONST16(.25f, DB_SHIFT); + count_dynalloc++; + } + } + if (count_dynalloc>=3) + { + /* If we need dynalloc in many bands, it's probably because our + initial masking rate was too low. */ + mask_avg += QCONST16(.25f, DB_SHIFT); + if (mask_avg>0) + { + /* Something went really wrong in the original calculations, + disabling masking. */ + mask_avg = 0; + diff = 0; + for(i=0;i<mask_end;i++) + surround_dynalloc[i] = 0; + } else { + for(i=0;i<mask_end;i++) + surround_dynalloc[i] = MAX16(0, surround_dynalloc[i]-QCONST16(.25f, DB_SHIFT)); + } + } + mask_avg += QCONST16(.2f, DB_SHIFT); + /* Convert to 1/64th units used for the trim */ + surround_trim = 64*diff; + /*printf("%d %d ", mask_avg, surround_trim);*/ + surround_masking = mask_avg; } /* Temporal VBR (but not for LFE) */ if (!st->lfe) @@ -1683,7 +1733,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, maxDepth = dynalloc_analysis(bandLogE, bandLogE2, nbEBands, st->start, st->end, C, offsets, st->lsb_depth, mode->logN, isTransient, st->vbr, st->constrained_vbr, - eBands, LM, effectiveBytes, &tot_boost, st->lfe); + eBands, LM, effectiveBytes, &tot_boost, st->lfe, surround_dynalloc); /* For LFE, everything interesting is in the first band */ if (st->lfe) offsets[0] = IMIN(8, effectiveBytes/3); @@ -1756,7 +1806,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, alloc_trim = 5; else alloc_trim = alloc_trim_analysis(mode, X, bandLogE, - st->end, LM, C, N, &st->analysis, &st->stereo_saving, tf_estimate, st->intensity); + st->end, LM, C, N, &st->analysis, &st->stereo_saving, tf_estimate, st->intensity, surround_trim); ec_enc_icdf(enc, alloc_trim, trim_icdf, 7); tell = ec_tell_frac(enc); } @@ -1859,7 +1909,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, anti_collapse_rsv = isTransient&&LM>=2&&bits>=((LM+2)<<BITRES) ? (1<<BITRES) : 0; bits -= anti_collapse_rsv; signalBandwidth = st->end-1; -#ifndef FIXED_POINT +#ifndef DISABLE_FLOAT_API if (st->analysis.valid) { int min_bandwidth; @@ -2261,12 +2311,6 @@ int opus_custom_encoder_ctl(CELTEncoder * OPUS_RESTRICT st, int request, ...) st->lfe = value; } break; - case OPUS_SET_ENERGY_SAVE_REQUEST: - { - opus_val16 *value = va_arg(ap, opus_val16*); - st->energy_save=value; - } - break; case OPUS_SET_ENERGY_MASK_REQUEST: { opus_val16 *value = va_arg(ap, opus_val16*); diff --git a/celt/fixed_generic.h b/celt/fixed_generic.h index 0e77976e83bb4549090eadd1678e0b647df55237..657e67c8e7b1883fa36ba4faa20860f4d58af292 100644 --- a/celt/fixed_generic.h +++ b/celt/fixed_generic.h @@ -116,6 +116,7 @@ #define MAC16_32_Q15(c,a,b) ADD32(c,ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15))) #define MULT16_16_Q11_32(a,b) (SHR(MULT16_16((a),(b)),11)) +#define MULT16_16_Q11(a,b) (SHR(MULT16_16((a),(b)),11)) #define MULT16_16_Q13(a,b) (SHR(MULT16_16((a),(b)),13)) #define MULT16_16_Q14(a,b) (SHR(MULT16_16((a),(b)),14)) #define MULT16_16_Q15(a,b) (SHR(MULT16_16((a),(b)),15)) diff --git a/src/analysis.c b/src/analysis.c index a9d2073fed6e88df2f667c47fa7c1cbb62c58fe8..53247df93241488a01fe83b9e6c8fddce99b9403 100644 --- a/src/analysis.c +++ b/src/analysis.c @@ -184,12 +184,12 @@ void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int for (;i<DETECT_SIZE;i++) psum += tonal->pspeech[i]; psum = psum*tonal->music_confidence + (1-psum)*tonal->speech_confidence; - /*printf("%f %f\n", psum, info_out->music_prob);*/ + /*printf("%f %f %f\n", psum, info_out->music_prob, info_out->tonality);*/ info_out->music_prob = psum; } -void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, const CELTMode *celt_mode, const void *x, int len, int offset, int C, int lsb_depth, downmix_func downmix) +void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, const CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix) { int i, b; const kiss_fft_state *kfft; @@ -234,7 +234,7 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con kfft = celt_mode->mdct.kfft[0]; if (tonal->count==0) tonal->mem_fill = 240; - downmix(x, &tonal->inmem[tonal->mem_fill], IMIN(len, ANALYSIS_BUF_SIZE-tonal->mem_fill), offset, C); + downmix(x, &tonal->inmem[tonal->mem_fill], IMIN(len, ANALYSIS_BUF_SIZE-tonal->mem_fill), offset, c1, c2, C); if (tonal->mem_fill+len < ANALYSIS_BUF_SIZE) { tonal->mem_fill += len; @@ -253,14 +253,14 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con for (i=0;i<N2;i++) { float w = analysis_window[i]; - in[i].r = MULT16_16(w, tonal->inmem[i]); - in[i].i = MULT16_16(w, tonal->inmem[N2+i]); - in[N-i-1].r = MULT16_16(w, tonal->inmem[N-i-1]); - in[N-i-1].i = MULT16_16(w, tonal->inmem[N+N2-i-1]); + in[i].r = w*tonal->inmem[i]; + in[i].i = w*tonal->inmem[N2+i]; + in[N-i-1].r = w*tonal->inmem[N-i-1]; + in[N-i-1].i = w*tonal->inmem[N+N2-i-1]; } OPUS_MOVE(tonal->inmem, tonal->inmem+ANALYSIS_BUF_SIZE-240, 240); remaining = len - (ANALYSIS_BUF_SIZE-tonal->mem_fill); - downmix(x, &tonal->inmem[240], remaining, offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, C); + downmix(x, &tonal->inmem[240], remaining, offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, c1, c2, C); tonal->mem_fill = 240 + remaining; opus_fft(kfft, in, out); @@ -325,8 +325,12 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con float stationarity; for (i=tbands[b];i<tbands[b+1];i++) { - float binE = out[i].r*out[i].r + out[N-i].r*out[N-i].r - + out[i].i*out[i].i + out[N-i].i*out[N-i].i; + float binE = out[i].r*(float)out[i].r + out[N-i].r*(float)out[N-i].r + + out[i].i*(float)out[i].i + out[N-i].i*(float)out[N-i].i; +#ifdef FIXED_POINT + /* FIXME: It's probably best to change the BFCC filter initial state instead */ + binE *= 5.55e-17f; +#endif E += binE; tE += binE*tonality[i]; nE += binE*2.f*(.5f-noisiness[i]); @@ -334,7 +338,7 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con tonal->E[tonal->E_count][b] = E; frame_noisiness += nE/(1e-15f+E); - frame_loudness += celt_sqrt(E+1e-10f); + frame_loudness += sqrt(E+1e-10f); logE[b] = (float)log(E+1e-10f); tonal->lowE[b] = MIN32(logE[b], tonal->lowE[b]+.01f); tonal->highE[b] = MAX32(logE[b], tonal->highE[b]-.1f); @@ -343,21 +347,21 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con tonal->highE[b]+=.5f; tonal->lowE[b]-=.5f; } - relativeE += (logE[b]-tonal->lowE[b])/(EPSILON+tonal->highE[b]-tonal->lowE[b]); + relativeE += (logE[b]-tonal->lowE[b])/(1e-15+tonal->highE[b]-tonal->lowE[b]); L1=L2=0; for (i=0;i<NB_FRAMES;i++) { - L1 += celt_sqrt(tonal->E[i][b]); + L1 += sqrt(tonal->E[i][b]); L2 += tonal->E[i][b]; } - stationarity = MIN16(0.99f,L1/celt_sqrt(EPSILON+NB_FRAMES*L2)); + stationarity = MIN16(0.99f,L1/sqrt(1e-15+NB_FRAMES*L2)); stationarity *= stationarity; stationarity *= stationarity; frame_stationarity += stationarity; /*band_tonality[b] = tE/(1e-15+E)*/; - band_tonality[b] = MAX16(tE/(EPSILON+E), stationarity*tonal->prev_band_tonality[b]); + band_tonality[b] = MAX16(tE/(1e-15+E), stationarity*tonal->prev_band_tonality[b]); #if 0 if (b>=NB_TONAL_SKIP_BANDS) { @@ -379,6 +383,9 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con bandwidth = 0; maxE = 0; noise_floor = 5.7e-4f/(1<<(IMAX(0,lsb_depth-8))); +#ifdef FIXED_POINT + noise_floor *= 1<<(15+SIG_SHIFT); +#endif noise_floor *= noise_floor; for (b=0;b<NB_TOT_BANDS;b++) { @@ -389,8 +396,8 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con band_end = extra_bands[b+1]; for (i=band_start;i<band_end;i++) { - float binE = out[i].r*out[i].r + out[N-i].r*out[N-i].r - + out[i].i*out[i].i + out[N-i].i*out[N-i].i; + float binE = out[i].r*(float)out[i].r + out[N-i].r*(float)out[N-i].r + + out[i].i*(float)out[i].i + out[N-i].i*(float)out[N-i].i; E += binE; } maxE = MAX32(maxE, E); @@ -469,14 +476,14 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con tonal->mem[i] = BFCC[i]; } for (i=0;i<9;i++) - features[11+i] = celt_sqrt(tonal->std[i]); + features[11+i] = sqrt(tonal->std[i]); features[20] = info->tonality; features[21] = info->activity; features[22] = frame_stationarity; features[23] = info->tonality_slope; features[24] = tonal->lowECount; -#ifndef FIXED_POINT +#ifndef DISABLE_FLOAT_API mlp_process(&net, features, frame_probs); frame_probs[0] = .5f*(frame_probs[0]+1); /* Curve fitting between the MLP probability and the actual probability */ @@ -611,44 +618,30 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con RESTORE_STACK; } -int run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *pcm, - const void *analysis_pcm, int frame_size, int variable_duration, int C, opus_int32 Fs, int bitrate_bps, - int delay_compensation, int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info) +void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *analysis_pcm, + int analysis_frame_size, int frame_size, int c1, int c2, int C, opus_int32 Fs, + int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info) { int offset; int pcm_len; - /* Avoid overflow/wrap-around of the analysis buffer */ - frame_size = IMIN((DETECT_SIZE-5)*Fs/100, frame_size); - - pcm_len = frame_size - analysis->analysis_offset; - offset = 0; - do { - tonality_analysis(analysis, NULL, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, C, lsb_depth, downmix); - offset += 480; - pcm_len -= 480; - } while (pcm_len>0); - analysis->analysis_offset = frame_size; - - if (variable_duration == OPUS_FRAMESIZE_VARIABLE && frame_size >= Fs/200) + if (analysis_pcm != NULL) { - int LM = 3; - LM = optimize_framesize((const opus_val16*)pcm, frame_size, C, Fs, bitrate_bps, - analysis->prev_tonality, analysis->subframe_mem, delay_compensation, downmix); - while ((Fs/400<<LM)>frame_size) - LM--; - frame_size = (Fs/400<<LM); - } else { - frame_size = frame_size_select(frame_size, variable_duration, Fs); + /* Avoid overflow/wrap-around of the analysis buffer */ + analysis_frame_size = IMIN((DETECT_SIZE-5)*Fs/100, analysis_frame_size); + + pcm_len = analysis_frame_size - analysis->analysis_offset; + offset = analysis->analysis_offset; + do { + tonality_analysis(analysis, NULL, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, c1, c2, C, lsb_depth, downmix); + offset += 480; + pcm_len -= 480; + } while (pcm_len>0); + analysis->analysis_offset = analysis_frame_size; + + analysis->analysis_offset -= frame_size; } - if (frame_size<0) - return -1; - analysis->analysis_offset -= frame_size; - /* Only perform analysis up to 20-ms frames. Longer ones will be split if - they're in CELT-only mode. */ analysis_info->valid = 0; tonality_get_info(analysis, analysis_info, frame_size); - - return frame_size; } diff --git a/src/analysis.h b/src/analysis.h index 8cd788832ceea0dd1c0fd48a144281cd30ddb185..cf37792daa42cdfffd02178d73af7d659162f448 100644 --- a/src/analysis.h +++ b/src/analysis.h @@ -42,7 +42,7 @@ typedef struct { float angle[240]; float d_angle[240]; float d2_angle[240]; - float inmem[ANALYSIS_BUF_SIZE]; + opus_val32 inmem[ANALYSIS_BUF_SIZE]; int mem_fill; /* number of usable samples in the buffer */ float prev_band_tonality[NB_TBANDS]; float prev_tonality; @@ -79,12 +79,12 @@ typedef struct { } TonalityAnalysisState; void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, - const CELTMode *celt_mode, const void *x, int len, int offset, int C, int lsb_depth, downmix_func downmix); + const CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix); void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int len); -int run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *pcm, - const void *analysis_pcm, int frame_size, int variable_duration, int C, opus_int32 Fs, int bitrate_bps, - int delay_compensation, int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info); +void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *analysis_pcm, + int analysis_frame_size, int frame_size, int c1, int c2, int C, opus_int32 Fs, + int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info); #endif diff --git a/src/mlp.c b/src/mlp.c index 90e94a5fcb4cedd9f8560c11a5e231a6a89ceeeb..73b1d315b86b3a9372ec56b1def7a77e52e77e19 100644 --- a/src/mlp.c +++ b/src/mlp.c @@ -35,7 +35,7 @@ #include "tansig_table.h" #define MAX_NEURONS 100 -#ifdef FIXED_POINT +#if 0 static inline opus_val16 tansig_approx(opus_val32 _x) /* Q19 */ { int i; @@ -43,9 +43,9 @@ static inline opus_val16 tansig_approx(opus_val32 _x) /* Q19 */ /*double x, y;*/ opus_val16 dy, yy; /* Q14 */ /*x = 1.9073e-06*_x;*/ - if (_x>=QCONST32(10,19)) + if (_x>=QCONST32(8,19)) return QCONST32(1.,14); - if (_x<=-QCONST32(10,19)) + if (_x<=-QCONST32(8,19)) return -QCONST32(1.,14); xx = EXTRACT16(SHR32(_x, 8)); /*i = lrint(25*x);*/ @@ -62,11 +62,11 @@ static inline opus_val16 tansig_approx(opus_val32 _x) /* Q19 */ } #else /*extern const float tansig_table[501];*/ -static inline opus_val16 tansig_approx(opus_val16 x) +static inline float tansig_approx(float x) { int i; - opus_val16 y, dy; - opus_val16 sign=1; + float y, dy; + float sign=1; if (x>=8) return 1; if (x<=-8) @@ -85,6 +85,7 @@ static inline opus_val16 tansig_approx(opus_val16 x) } #endif +#if 0 void mlp_process(const MLP *m, const opus_val16 *in, opus_val16 *out) { int j; @@ -108,4 +109,28 @@ void mlp_process(const MLP *m, const opus_val16 *in, opus_val16 *out) out[j] = tansig_approx(EXTRACT16(PSHR32(sum,17))); } } - +#else +void mlp_process(const MLP *m, const float *in, float *out) +{ + int j; + float hidden[MAX_NEURONS]; + const float *W = m->weights; + /* Copy to tmp_in */ + for (j=0;j<m->topo[1];j++) + { + int k; + float sum = *W++; + for (k=0;k<m->topo[0];k++) + sum = sum + in[k]**W++; + hidden[j] = tansig_approx(sum); + } + for (j=0;j<m->topo[2];j++) + { + int k; + float sum = *W++; + for (k=0;k<m->topo[1];k++) + sum = sum + hidden[k]**W++; + out[j] = tansig_approx(sum); + } +} +#endif diff --git a/src/mlp.h b/src/mlp.h index 68ff68d82ef2fe5907c1c4cb486fbc2dd2fb3595..86c8e0617d0461797e6e104c1b06c20fb62a4f64 100644 --- a/src/mlp.h +++ b/src/mlp.h @@ -33,9 +33,9 @@ typedef struct { int layers; const int *topo; - const opus_val16 *weights; + const float *weights; } MLP; -void mlp_process(const MLP *m, const opus_val16 *in, opus_val16 *out); +void mlp_process(const MLP *m, const float *in, float *out); #endif /* _MLP_H_ */ diff --git a/src/opus_encoder.c b/src/opus_encoder.c index ac40edac7c742f1bf5528b0c30d398f8002421e5..c151fd078b08e862a93a1ecf688d193e97d22806 100644 --- a/src/opus_encoder.c +++ b/src/opus_encoder.c @@ -95,10 +95,10 @@ struct OpusEncoder { int silk_bw_switch; /* Sampling rate (at the API level) */ int first; - int energy_masking; + opus_val16 * energy_masking; StereoWidthState width_mem; opus_val16 delay_buffer[MAX_ENCODER_BUFFER*2]; -#ifndef FIXED_POINT +#ifndef DISABLE_FLOAT_API TonalityAnalysisState analysis; int detected_bandwidth; int analysis_offset; @@ -201,7 +201,7 @@ int opus_encoder_init(OpusEncoder* st, opus_int32 Fs, int channels, int applicat st->silk_mode.payloadSize_ms = 20; st->silk_mode.bitRate = 25000; st->silk_mode.packetLossPercentage = 0; - st->silk_mode.complexity = 10; + st->silk_mode.complexity = 9; st->silk_mode.useInBandFEC = 0; st->silk_mode.useDTX = 0; st->silk_mode.useCBR = 0; @@ -212,7 +212,7 @@ int opus_encoder_init(OpusEncoder* st, opus_int32 Fs, int channels, int applicat if(err!=OPUS_OK)return OPUS_INTERNAL_ERROR; celt_encoder_ctl(celt_enc, CELT_SET_SIGNALLING(0)); - celt_encoder_ctl(celt_enc, OPUS_SET_COMPLEXITY(10)); + celt_encoder_ctl(celt_enc, OPUS_SET_COMPLEXITY(st->silk_mode.complexity)); st->use_vbr = 1; /* Makes constrained VBR the default (safer for real-time use) */ @@ -551,7 +551,7 @@ static opus_int32 user_bitrate_to_bitrate(OpusEncoder *st, int frame_size, int m return st->user_bitrate_bps; } -#ifndef FIXED_POINT +#ifndef DISABLE_FLOAT_API /* Don't use more than 60 ms for the frame size analysis */ #define MAX_DYNAMIC_FRAMESIZE 24 /* Estimates how much the bitrate will be boosted based on the sub-frame energy */ @@ -685,32 +685,6 @@ static int transient_viterbi(const float *E, const float *E_1, int N, int frame_ return best_state; } -void downmix_float(const void *_x, float *sub, int subframe, int offset, int C) -{ - const float *x; - int c, j; - x = (const float *)_x; - for (j=0;j<subframe;j++) - sub[j] = x[(j+offset)*C]; - for (c=1;c<C;c++) - for (j=0;j<subframe;j++) - sub[j] += x[(j+offset)*C+c]; -} - -void downmix_int(const void *_x, float *sub, int subframe, int offset, int C) -{ - const opus_int16 *x; - int c, j; - x = (const opus_int16 *)_x; - for (j=0;j<subframe;j++) - sub[j] = x[(j+offset)*C]; - for (c=1;c<C;c++) - for (j=0;j<subframe;j++) - sub[j] += x[(j+offset)*C+c]; - for (j=0;j<subframe;j++) - sub[j] *= (1.f/32768); -} - int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs, int bitrate, opus_val16 tonality, opus_val32 *mem, int buffering, downmix_func downmix) @@ -723,10 +697,10 @@ int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs, int bestLM=0; int subframe; int pos; - VARDECL(opus_val16, sub); + VARDECL(opus_val32, sub); subframe = Fs/400; - ALLOC(sub, subframe, opus_val16); + ALLOC(sub, subframe, opus_val32); e[0]=mem[0]; e_1[0]=1.f/(EPSILON+mem[0]); if (buffering) @@ -754,7 +728,7 @@ int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs, int j; tmp=EPSILON; - downmix(x, sub, subframe, i*subframe, C); + downmix(x, sub, subframe, i*subframe, 0, -2, C); if (i==0) memx = sub[0]; for (j=0;j<subframe;j++) @@ -784,6 +758,76 @@ int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs, #endif +#ifndef DISABLE_FLOAT_API +void downmix_float(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C) +{ + const float *x; + opus_val32 scale; + int j; + x = (const float *)_x; + for (j=0;j<subframe;j++) + sub[j] = SCALEIN(x[(j+offset)*C+c1]); + if (c2>-1) + { + for (j=0;j<subframe;j++) + sub[j] += SCALEIN(x[(j+offset)*C+c2]); + } else if (c2==-2) + { + int c; + for (c=1;c<C;c++) + { + for (j=0;j<subframe;j++) + sub[j] += SCALEIN(x[(j+offset)*C+c]); + } + } +#ifdef FIXED_POINT + scale = (1<<SIG_SHIFT); +#else + scale = 1.f; +#endif + if (C==-2) + scale /= C; + else + scale /= 2; + for (j=0;j<subframe;j++) + sub[j] *= scale; +} +#endif + +void downmix_int(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C) +{ + const opus_int16 *x; + opus_val32 scale; + int j; + x = (const opus_int16 *)_x; + for (j=0;j<subframe;j++) + sub[j] = x[(j+offset)*C+c1]; + if (c2>-1) + { + for (j=0;j<subframe;j++) + sub[j] += x[(j+offset)*C+c2]; + } else if (c2==-2) + { + int c; + for (c=1;c<C;c++) + { + for (j=0;j<subframe;j++) + sub[j] += x[(j+offset)*C+c]; + } + } +#ifdef FIXED_POINT + scale = (1<<SIG_SHIFT); +#else + scale = 1.f/32768; +#endif + if (C==-2) + scale /= C; + else + scale /= 2; + for (j=0;j<subframe;j++) + sub[j] *= scale; +} + opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_int32 Fs) { int new_size; @@ -805,6 +849,29 @@ opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_ return new_size; } +opus_int32 compute_frame_size(const void *analysis_pcm, int frame_size, + int variable_duration, int C, opus_int32 Fs, int bitrate_bps, + int delay_compensation, downmix_func downmix, opus_val32 *subframe_mem) +{ +#ifndef DISABLE_FLOAT_API + if (variable_duration == OPUS_FRAMESIZE_VARIABLE && frame_size >= Fs/200) + { + int LM = 3; + LM = optimize_framesize(analysis_pcm, frame_size, C, Fs, bitrate_bps, + 0, subframe_mem, delay_compensation, downmix); + while ((Fs/400<<LM)>frame_size) + LM--; + frame_size = (Fs/400<<LM); + } else +#endif + { + frame_size = frame_size_select(frame_size, variable_duration, Fs); + } + if (frame_size<0) + return -1; + return frame_size; +} + opus_val16 compute_stereo_width(const opus_val16 *pcm, int frame_size, opus_int32 Fs, StereoWidthState *mem) { opus_val16 corr; @@ -883,11 +950,8 @@ opus_val16 compute_stereo_width(const opus_val16 *pcm, int frame_size, opus_int3 } opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size, - unsigned char *data, opus_int32 out_data_bytes, int lsb_depth -#ifndef FIXED_POINT - , AnalysisInfo *analysis_info -#endif - ) + unsigned char *data, opus_int32 out_data_bytes, int lsb_depth, + const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2, int analysis_channels, downmix_func downmix) { void *silk_enc; CELTEncoder *celt_enc; @@ -916,6 +980,10 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ opus_int32 max_data_bytes; /* Max number of bytes we're allowed to use */ int total_buffer; opus_val16 stereo_width; + const CELTMode *celt_mode; + AnalysisInfo analysis_info; + int analysis_read_pos_bak=-1; + int analysis_read_subframe_bak=-1; VARDECL(opus_val16, tmp_prefill); ALLOC_STACK; @@ -941,17 +1009,34 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ lsb_depth = IMIN(lsb_depth, st->lsb_depth); + analysis_info.valid = 0; + celt_encoder_ctl(celt_enc, CELT_GET_MODE(&celt_mode)); +#ifndef DISABLE_FLOAT_API +#ifdef FIXED_POINT + if (st->silk_mode.complexity >= 10 && st->Fs==48000) +#else + if (st->silk_mode.complexity >= 7 && st->Fs==48000) +#endif + { + analysis_read_pos_bak = st->analysis.read_pos; + analysis_read_subframe_bak = st->analysis.read_subframe; + run_analysis(&st->analysis, celt_mode, analysis_pcm, analysis_size, frame_size, + c1, c2, analysis_channels, st->Fs, + lsb_depth, downmix, &analysis_info); + } +#endif + st->voice_ratio = -1; -#ifndef FIXED_POINT +#ifndef DISABLE_FLOAT_API st->detected_bandwidth = 0; - if (analysis_info->valid) + if (analysis_info.valid) { int analysis_bandwidth; if (st->signal_type == OPUS_AUTO) - st->voice_ratio = (int)floor(.5+100*(1-analysis_info->music_prob)); + st->voice_ratio = (int)floor(.5+100*(1-analysis_info.music_prob)); - analysis_bandwidth = analysis_info->bandwidth; + analysis_bandwidth = analysis_info.bandwidth; if (analysis_bandwidth<=12) st->detected_bandwidth = OPUS_BANDWIDTH_NARROWBAND; else if (analysis_bandwidth<=14) @@ -1281,6 +1366,11 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ VARDECL(OpusRepacketizer, rp); opus_int32 bytes_per_frame; + if (analysis_read_pos_bak!= -1) + { + st->analysis.read_pos = analysis_read_pos_bak; + st->analysis.read_subframe = analysis_read_subframe_bak; + } nb_frames = frame_size > st->Fs/25 ? 3 : 2; bytes_per_frame = IMIN(1276,(out_data_bytes-3)/nb_frames); @@ -1310,11 +1400,9 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ /* When switching from SILK/Hybrid to CELT, only ask for a switch at the last frame */ if (to_celt && i==nb_frames-1) st->user_forced_mode = MODE_CELT_ONLY; - tmp_len = opus_encode_native(st, pcm+i*(st->channels*st->Fs/50), st->Fs/50, tmp_data+i*bytes_per_frame, bytes_per_frame, lsb_depth -#ifndef FIXED_POINT - , analysis_info -#endif - ); + tmp_len = opus_encode_native(st, pcm+i*(st->channels*st->Fs/50), st->Fs/50, + tmp_data+i*bytes_per_frame, bytes_per_frame, lsb_depth, + NULL, 0, c1, c2, analysis_channels, downmix); if (tmp_len<0) { RESTORE_STACK; @@ -1419,6 +1507,45 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ st->silk_mode.bitRate = total_bitRate; } + /* Surround masking for SILK */ + if (st->energy_masking && st->use_vbr && !st->lfe) + { + opus_val32 mask_sum=0; + opus_val16 masking_depth; + opus_int32 rate_offset; + int c; + int end = 17; + opus_int16 srate = 16000; + if (st->bandwidth == OPUS_BANDWIDTH_NARROWBAND) + { + end = 13; + srate = 8000; + } else if (st->bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) + { + end = 15; + srate = 12000; + } + for (c=0;c<st->channels;c++) + { + for(i=0;i<end;i++) + { + opus_val16 mask; + mask = MAX16(MIN16(st->energy_masking[21*c+i], + QCONST16(.25f, DB_SHIFT)), -QCONST16(2.0f, DB_SHIFT)); + if (mask > 0) + mask = HALF16(mask); + mask_sum += mask; + } + } + /* Conservative rate reduction, we cut the masking in half */ + masking_depth = HALF16(mask_sum / end*st->channels); + rate_offset = PSHR32(MULT16_16(srate, masking_depth), DB_SHIFT); + rate_offset = MAX32(rate_offset, -2*st->silk_mode.bitRate/3); + rate_offset += QCONST16(.4f, DB_SHIFT); + st->silk_mode.bitRate += rate_offset; + bytes_target += rate_offset * frame_size / (8 * st->Fs); + } + st->silk_mode.payloadSize_ms = 1000 * frame_size / st->Fs; st->silk_mode.nChannelsAPI = st->channels; st->silk_mode.nChannelsInternal = st->stream_channels; @@ -1476,9 +1603,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ if (prefill) { opus_int32 zero=0; - const CELTMode *celt_mode; int prefill_offset; - celt_encoder_ctl(celt_enc, CELT_GET_MODE(&celt_mode)); /* Use a smooth onset for the SILK prefill to avoid the encoder trying to encode a discontinuity. The exact location is what we need to avoid leaving any "gap" in the audio when mixing with the redundant CELT frame. Here we can afford to @@ -1589,12 +1714,12 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ if (st->use_vbr) { opus_int32 bonus=0; -#ifndef FIXED_POINT +#ifndef DISABLE_FLOAT_API if (st->variable_duration==OPUS_FRAMESIZE_VARIABLE && frame_size != st->Fs/50) { bonus = (60*st->stream_channels+40)*(st->Fs/frame_size-50); - if (analysis_info->valid) - bonus = (opus_int32)(bonus*(1.f+.5f*analysis_info->tonality)); + if (analysis_info.valid) + bonus = (opus_int32)(bonus*(1.f+.5f*analysis_info.tonality)); } #endif celt_encoder_ctl(celt_enc, OPUS_SET_VBR(1)); @@ -1625,9 +1750,6 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ /* gain_fade() and stereo_fade() need to be after the buffer copying because we don't want any of this to affect the SILK part */ if( st->prev_HB_gain < Q15ONE || HB_gain < Q15ONE ) { - const CELTMode *celt_mode; - - celt_encoder_ctl(celt_enc, CELT_GET_MODE(&celt_mode)); gain_fade(pcm_buf, pcm_buf, st->prev_HB_gain, HB_gain, celt_mode->overlap, frame_size, st->channels, celt_mode->window, st->Fs); } @@ -1638,9 +1760,6 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ /* Apply stereo width reduction (at low bitrates) */ if( st->hybrid_stereo_width_Q14 < (1 << 14) || st->silk_mode.stereoWidth_Q14 < (1 << 14) ) { opus_val16 g1, g2; - const CELTMode *celt_mode; - - celt_encoder_ctl(celt_enc, CELT_GET_MODE(&celt_mode)); g1 = st->hybrid_stereo_width_Q14; g2 = (opus_val16)(st->silk_mode.stereoWidth_Q14); #ifdef FIXED_POINT @@ -1697,9 +1816,9 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ ec_enc_shrink(&enc, nb_compr_bytes); } -#ifndef FIXED_POINT +#ifndef DISABLE_FLOAT_API if (redundancy || st->mode != MODE_SILK_ONLY) - celt_encoder_ctl(celt_enc, CELT_SET_ANALYSIS(analysis_info)); + celt_encoder_ctl(celt_enc, CELT_SET_ANALYSIS(&analysis_info)); #endif /* 5 ms redundant frame for CELT->SILK */ @@ -1825,114 +1944,88 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ #ifdef FIXED_POINT #ifndef DISABLE_FLOAT_API -opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int frame_size, +opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int analysis_frame_size, unsigned char *data, opus_int32 max_data_bytes) { int i, ret; + int frame_size; + int delay_compensation; VARDECL(opus_int16, in); ALLOC_STACK; - frame_size = frame_size_select(frame_size, st->variable_duration, st->Fs); - if(frame_size<0) - { - RESTORE_STACK; - return OPUS_BAD_ARG; - } + if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) + delay_compensation = 0; + else + delay_compensation = st->delay_compensation; + frame_size = compute_frame_size(pcm, analysis_frame_size, + st->variable_duration, st->channels, st->Fs, st->bitrate_bps, + delay_compensation, downmix_float, st->analysis.subframe_mem); ALLOC(in, frame_size*st->channels, opus_int16); for (i=0;i<frame_size*st->channels;i++) in[i] = FLOAT2INT16(pcm[i]); - ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16); + ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, pcm, analysis_frame_size, 0, -2, st->channels, downmix_float); RESTORE_STACK; return ret; } #endif -opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int frame_size, +opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int analysis_frame_size, unsigned char *data, opus_int32 out_data_bytes) { - frame_size = frame_size_select(frame_size, st->variable_duration, st->Fs); - if(frame_size<0) - { - return OPUS_BAD_ARG; - } - return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 16); + int frame_size; + int delay_compensation; + if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) + delay_compensation = 0; + else + delay_compensation = st->delay_compensation; + frame_size = compute_frame_size(pcm, analysis_frame_size, + st->variable_duration, st->channels, st->Fs, st->bitrate_bps, + delay_compensation, downmix_float, st->analysis.subframe_mem); + return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 16, pcm, analysis_frame_size, 0, -2, st->channels, downmix_int); } #else -opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int frame_size, +opus_int32 opus_encode(OpusEncoder *st, const opus_int16 *pcm, int analysis_frame_size, unsigned char *data, opus_int32 max_data_bytes) { int i, ret; - const CELTMode *celt_mode; + int frame_size; int delay_compensation; - int lsb_depth; VARDECL(float, in); - AnalysisInfo analysis_info; ALLOC_STACK; - opus_encoder_ctl(st, CELT_GET_MODE(&celt_mode)); if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) delay_compensation = 0; else delay_compensation = st->delay_compensation; - - lsb_depth = IMIN(16, st->lsb_depth); - - analysis_info.valid = 0; - if (st->silk_mode.complexity >= 7 && st->Fs==48000) - { - frame_size = run_analysis(&st->analysis, celt_mode, pcm, pcm+st->channels*st->analysis.analysis_offset, - frame_size, st->variable_duration, st->channels, st->Fs, st->bitrate_bps, delay_compensation, lsb_depth, downmix_int, &analysis_info); - } else { - frame_size = frame_size_select(frame_size, st->variable_duration, st->Fs); - } - if(frame_size<0) - { - RESTORE_STACK; - return OPUS_BAD_ARG; - } + frame_size = compute_frame_size(pcm, analysis_frame_size, + st->variable_duration, st->channels, st->Fs, st->bitrate_bps, + delay_compensation, downmix_float, st->analysis.subframe_mem); ALLOC(in, frame_size*st->channels, float); for (i=0;i<frame_size*st->channels;i++) in[i] = (1.0f/32768)*pcm[i]; - ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, &analysis_info); + ret = opus_encode_native(st, in, frame_size, data, max_data_bytes, 16, pcm, analysis_frame_size, 0, -2, st->channels, downmix_int); RESTORE_STACK; return ret; } -opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int frame_size, +opus_int32 opus_encode_float(OpusEncoder *st, const float *pcm, int analysis_frame_size, unsigned char *data, opus_int32 out_data_bytes) { - const CELTMode *celt_mode; + int frame_size; int delay_compensation; - int lsb_depth; - AnalysisInfo analysis_info; - - opus_encoder_ctl(st, CELT_GET_MODE(&celt_mode)); if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) delay_compensation = 0; else delay_compensation = st->delay_compensation; - - lsb_depth = IMIN(24, st->lsb_depth); - - analysis_info.valid = 0; - if (st->silk_mode.complexity >= 7 && st->Fs==48000) - { - frame_size = run_analysis(&st->analysis, celt_mode, pcm, pcm+st->channels*st->analysis.analysis_offset, - frame_size, st->variable_duration, st->channels, st->Fs, st->bitrate_bps, delay_compensation, lsb_depth, downmix_float, &analysis_info); - } else { - frame_size = frame_size_select(frame_size, st->variable_duration, st->Fs); - } - if(frame_size<0) - { - return OPUS_BAD_ARG; - } - - return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 24, &analysis_info); - + frame_size = compute_frame_size(pcm, analysis_frame_size, + st->variable_duration, st->channels, st->Fs, st->bitrate_bps, + delay_compensation, downmix_float, st->analysis.subframe_mem); + return opus_encode_native(st, pcm, frame_size, data, out_data_bytes, 24, + pcm, analysis_frame_size, 0, -2, st->channels, downmix_float); } #endif @@ -2349,20 +2442,10 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...) ret = celt_encoder_ctl(celt_enc, OPUS_SET_LFE(value)); } break; - case OPUS_SET_ENERGY_SAVE_REQUEST: - { - opus_val16 *value = va_arg(ap, opus_val16*); - if (!value) - { - goto bad_arg; - } - ret = celt_encoder_ctl(celt_enc, OPUS_SET_ENERGY_SAVE(value)); - } - break; case OPUS_SET_ENERGY_MASK_REQUEST: { opus_val16 *value = va_arg(ap, opus_val16*); - st->energy_masking = (value!=NULL); + st->energy_masking = value; ret = celt_encoder_ctl(celt_enc, OPUS_SET_ENERGY_MASK(value)); } break; diff --git a/src/opus_multistream_encoder.c b/src/opus_multistream_encoder.c index 4cddbffd552495b8a1dfa0caa665b28d504d9cb3..31447bf9c19383890e96baaca2eac4c1a20e53a6 100644 --- a/src/opus_multistream_encoder.c +++ b/src/opus_multistream_encoder.c @@ -36,8 +36,11 @@ #include <stdarg.h> #include "float_cast.h" #include "os_support.h" -#include "analysis.h" #include "mathops.h" +#include "mdct.h" +#include "modes.h" +#include "bands.h" +#include "quant_bands.h" typedef struct { int nb_streams; @@ -57,17 +60,65 @@ static const VorbisLayout vorbis_mappings[8] = { {5, 3, {0, 6, 1, 2, 3, 4, 5, 7}}, /* 8: 7.1 surround */ }; +typedef void (*opus_copy_channel_in_func)( + opus_val16 *dst, + int dst_stride, + const void *src, + int src_stride, + int src_channel, + int frame_size +); + struct OpusMSEncoder { - TonalityAnalysisState analysis; ChannelLayout layout; int lfe_stream; + int application; int variable_duration; int surround; opus_int32 bitrate_bps; opus_val32 subframe_mem[3]; /* Encoder states go here */ + /* then opus_val32 window_mem[channels*120]; */ + /* then opus_val32 preemph_mem[channels]; */ }; +static opus_val32 *ms_get_preemph_mem(OpusMSEncoder *st) +{ + int s; + char *ptr; + int coupled_size, mono_size; + + coupled_size = opus_encoder_get_size(2); + mono_size = opus_encoder_get_size(1); + ptr = (char*)st + align(sizeof(OpusMSEncoder)); + for (s=0;s<st->layout.nb_streams;s++) + { + if (s < st->layout.nb_coupled_streams) + ptr += align(coupled_size); + else + ptr += align(mono_size); + } + return (opus_val32*)(ptr+st->layout.nb_channels*120*sizeof(opus_val32)); +} + +static opus_val32 *ms_get_window_mem(OpusMSEncoder *st) +{ + int s; + char *ptr; + int coupled_size, mono_size; + + coupled_size = opus_encoder_get_size(2); + mono_size = opus_encoder_get_size(1); + ptr = (char*)st + align(sizeof(OpusMSEncoder)); + for (s=0;s<st->layout.nb_streams;s++) + { + if (s < st->layout.nb_coupled_streams) + ptr += align(coupled_size); + else + ptr += align(mono_size); + } + return (opus_val32*)ptr; +} static int validate_encoder_layout(const ChannelLayout *layout) { @@ -88,6 +139,206 @@ static int validate_encoder_layout(const ChannelLayout *layout) return 1; } +static void channel_pos(int channels, int pos[8]) +{ + /* Position in the mix: 0 don't mix, 1: left, 2: center, 3:right */ + if (channels==4) + { + pos[0]=1; + pos[1]=3; + pos[2]=1; + pos[3]=3; + } else if (channels==3||channels==5||channels==6) + { + pos[0]=1; + pos[1]=2; + pos[2]=3; + pos[3]=1; + pos[4]=3; + pos[5]=0; + } else if (channels==7) + { + pos[0]=1; + pos[1]=2; + pos[2]=3; + pos[3]=1; + pos[4]=3; + pos[5]=2; + pos[6]=0; + } else if (channels==8) + { + pos[0]=1; + pos[1]=2; + pos[2]=3; + pos[3]=1; + pos[4]=3; + pos[5]=1; + pos[6]=3; + pos[7]=0; + } +} + +#if 1 +/* Computes a rough approximation of log2(2^a + 2^b) */ +static opus_val16 logSum(opus_val16 a, opus_val16 b) +{ + opus_val16 max; + opus_val32 diff; + opus_val16 frac; + static const opus_val16 diff_table[17] = { + QCONST16(0.5000000f, DB_SHIFT), QCONST16(0.2924813f, DB_SHIFT), QCONST16(0.1609640f, DB_SHIFT), QCONST16(0.0849625f, DB_SHIFT), + QCONST16(0.0437314f, DB_SHIFT), QCONST16(0.0221971f, DB_SHIFT), QCONST16(0.0111839f, DB_SHIFT), QCONST16(0.0056136f, DB_SHIFT), + QCONST16(0.0028123f, DB_SHIFT) + }; + int low; + if (a>b) + { + max = a; + diff = SUB32(EXTEND32(a),EXTEND32(b)); + } else { + max = b; + diff = SUB32(EXTEND32(b),EXTEND32(a)); + } + if (diff >= QCONST16(8.f, DB_SHIFT)) + return max; +#ifdef FIXED_POINT + low = SHR32(diff, DB_SHIFT-1); + frac = SHL16(diff - SHL16(low, DB_SHIFT-1), 16-DB_SHIFT); +#else + low = floor(2*diff); + frac = 2*diff - low; +#endif + return max + diff_table[low] + MULT16_16_Q15(frac, SUB16(diff_table[low+1], diff_table[low])); +} +#else +opus_val16 logSum(opus_val16 a, opus_val16 b) +{ + return log2(pow(4, a)+ pow(4, b))/2; +} +#endif + +void surround_analysis(const CELTMode *celt_mode, const void *pcm, opus_val16 *bandLogE, opus_val32 *mem, opus_val32 *preemph_mem, + int len, int overlap, int channels, int rate, opus_copy_channel_in_func copy_channel_in +) +{ + int c; + int i; + int LM; + int pos[8] = {0}; + int upsample; + int frame_size; + opus_val32 bandE[21]; + opus_val16 maskLogE[3][21]; + VARDECL(opus_val32, in); + VARDECL(opus_val16, x); + VARDECL(opus_val32, out); + SAVE_STACK; + + upsample = resampling_factor(rate); + frame_size = len*upsample; + + for (LM=0;LM<=celt_mode->maxLM;LM++) + if (celt_mode->shortMdctSize<<LM==frame_size) + break; + + ALLOC(in, frame_size+overlap, opus_val32); + ALLOC(x, len, opus_val16); + ALLOC(freq, frame_size, opus_val32); + + channel_pos(channels, pos); + + for (c=0;c<3;c++) + for (i=0;i<21;i++) + maskLogE[c][i] = -QCONST16(28.f, DB_SHIFT); + + for (c=0;c<channels;c++) + { + OPUS_COPY(in, mem+c*overlap, overlap); + (*copy_channel_in)(x, 1, pcm, channels, c, len); + preemphasis(x, in+overlap, frame_size, 1, upsample, celt_mode->preemph, preemph_mem+c, 0); + clt_mdct_forward(&celt_mode->mdct, in, freq, celt_mode->window, overlap, celt_mode->maxLM-LM, 1); + if (upsample != 1) + { + int bound = len; + for (i=0;i<bound;i++) + freq[i] *= upsample; + for (;i<frame_size;i++) + freq[i] = 0; + } + + compute_band_energies(celt_mode, freq, bandE, 21, 1, 1<<LM); + amp2Log2(celt_mode, 21, 21, bandE, bandLogE+21*c, 1); + /* Apply spreading function with -6 dB/band going up and -12 dB/band going down. */ + for (i=1;i<21;i++) + bandLogE[21*c+i] = MAX16(bandLogE[21*c+i], bandLogE[21*c+i-1]-QCONST16(1.f, DB_SHIFT)); + for (i=19;i>=0;i--) + bandLogE[21*c+i] = MAX16(bandLogE[21*c+i], bandLogE[21*c+i+1]-QCONST16(2.f, DB_SHIFT)); + if (pos[c]==1) + { + for (i=0;i<21;i++) + maskLogE[0][i] = logSum(maskLogE[0][i], bandLogE[21*c+i]); + } else if (pos[c]==3) + { + for (i=0;i<21;i++) + maskLogE[2][i] = logSum(maskLogE[2][i], bandLogE[21*c+i]); + } else if (pos[c]==2) + { + for (i=0;i<21;i++) + { + maskLogE[0][i] = logSum(maskLogE[0][i], bandLogE[21*c+i]-QCONST16(.5f, DB_SHIFT)); + maskLogE[2][i] = logSum(maskLogE[2][i], bandLogE[21*c+i]-QCONST16(.5f, DB_SHIFT)); + } + } +#if 0 + for (i=0;i<21;i++) + printf("%f ", bandLogE[21*c+i]); +//#else + float sum=0; + for (i=0;i<21;i++) + sum += bandLogE[21*c+i]; + printf("%f ", sum/21); +#endif + OPUS_COPY(mem+c*overlap, in+frame_size, overlap); + } + for (i=0;i<21;i++) + maskLogE[1][i] = MIN32(maskLogE[0][i],maskLogE[2][i]); + for (c=0;c<3;c++) + for (i=0;i<21;i++) + maskLogE[c][i] += QCONST16(.5f, DB_SHIFT)*log2(2.f/(channels-1)); +#if 0 + for (c=0;c<3;c++) + { + for (i=0;i<21;i++) + printf("%f ", maskLogE[c][i]); + } +#endif + for (c=0;c<channels;c++) + { + opus_val16 *mask; + if (pos[c]!=0) + { + mask = &maskLogE[pos[c]-1][0]; + for (i=0;i<21;i++) + bandLogE[21*c+i] = bandLogE[21*c+i] - mask[i]; + } else { + for (i=0;i<21;i++) + bandLogE[21*c+i] = 0; + } +#if 0 + for (i=0;i<21;i++) + printf("%f ", bandLogE[21*c+i]); + printf("\n"); +#endif +#if 0 + float sum=0; + for (i=0;i<21;i++) + sum += bandLogE[21*c+i]; + printf("%f ", sum/21); + printf("\n"); +#endif + } + RESTORE_STACK; +} opus_int32 opus_multistream_encoder_get_size(int nb_streams, int nb_coupled_streams) { @@ -132,7 +383,9 @@ opus_int32 opus_multistream_surround_encoder_get_size(int channels, int mapping_ return 0; size = opus_multistream_encoder_get_size(nb_streams, nb_coupled_streams); if (channels>2) - size += align(opus_encoder_get_size(2)); + { + size += channels*(120*sizeof(opus_val32) + sizeof(opus_val32)); + } return size; } @@ -161,10 +414,10 @@ static int opus_multistream_encoder_init_impl( st->layout.nb_streams = streams; st->layout.nb_coupled_streams = coupled_streams; st->subframe_mem[0]=st->subframe_mem[1]=st->subframe_mem[2]=0; - OPUS_CLEAR(&st->analysis,1); if (!surround) st->lfe_stream = -1; st->bitrate_bps = OPUS_AUTO; + st->application = application; st->variable_duration = OPUS_FRAMESIZE_ARG; for (i=0;i<st->layout.nb_channels;i++) st->layout.mapping[i] = mapping[i]; @@ -192,10 +445,8 @@ static int opus_multistream_encoder_init_impl( } if (surround) { - OpusEncoder *downmix_enc; - downmix_enc = (OpusEncoder*)ptr; - ret = opus_encoder_init(downmix_enc, Fs, 2, OPUS_APPLICATION_AUDIO); - if(ret!=OPUS_OK)return ret; + OPUS_CLEAR(ms_get_preemph_mem(st), channels); + OPUS_CLEAR(ms_get_window_mem(st), channels*120); } st->surround = surround; return OPUS_OK; @@ -339,22 +590,6 @@ OpusMSEncoder *opus_multistream_surround_encoder_create( return st; } -typedef void (*opus_copy_channel_in_func)( - opus_val16 *dst, - int dst_stride, - const void *src, - int src_stride, - int src_channel, - int frame_size -); - -typedef void (*opus_surround_downmix_funct)( - opus_val16 *dst, - const void *src, - int channels, - int frame_size -); - static void surround_rate_allocation( OpusMSEncoder *st, opus_int32 *rate, @@ -433,15 +668,11 @@ static int opus_multistream_encode_native OpusMSEncoder *st, opus_copy_channel_in_func copy_channel_in, const void *pcm, - int frame_size, + int analysis_frame_size, unsigned char *data, opus_int32 max_data_bytes, int lsb_depth, - opus_surround_downmix_funct surround_downmix -#ifndef FIXED_POINT - , downmix_func downmix - , const void *pcm_analysis -#endif + downmix_func downmix ) { opus_int32 Fs; @@ -451,31 +682,29 @@ static int opus_multistream_encode_native char *ptr; int tot_size; VARDECL(opus_val16, buf); + VARDECL(opus_val16, bandSMR); unsigned char tmp_data[MS_FRAME_TMP]; OpusRepacketizer rp; opus_int32 complexity; -#ifndef FIXED_POINT - AnalysisInfo analysis_info; -#endif const CELTMode *celt_mode; opus_int32 bitrates[256]; opus_val16 bandLogE[42]; - opus_val16 bandLogE_mono[21]; + opus_val32 *mem = NULL; + opus_val32 *preemph_mem=NULL; + int frame_size; ALLOC_STACK; + if (st->surround) + { + preemph_mem = ms_get_preemph_mem(st); + mem = ms_get_window_mem(st); + } + ptr = (char*)st + align(sizeof(OpusMSEncoder)); opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_SAMPLE_RATE(&Fs)); opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_COMPLEXITY(&complexity)); opus_encoder_ctl((OpusEncoder*)ptr, CELT_GET_MODE(&celt_mode)); - if (400*frame_size < Fs) - { - RESTORE_STACK; - return OPUS_BAD_ARG; - } -#ifndef FIXED_POINT - analysis_info.valid = 0; - if (complexity >= 7 && Fs==48000) { opus_int32 delay_compensation; int channels; @@ -483,13 +712,15 @@ static int opus_multistream_encode_native channels = st->layout.nb_streams + st->layout.nb_coupled_streams; opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_LOOKAHEAD(&delay_compensation)); delay_compensation -= Fs/400; + frame_size = compute_frame_size(pcm, analysis_frame_size, + st->variable_duration, channels, Fs, st->bitrate_bps, + delay_compensation, downmix, st->subframe_mem); + } - frame_size = run_analysis(&st->analysis, celt_mode, pcm, pcm_analysis, - frame_size, st->variable_duration, channels, Fs, st->bitrate_bps, delay_compensation, lsb_depth, downmix, &analysis_info); - } else -#endif + if (400*frame_size < Fs) { - frame_size = frame_size_select(frame_size, st->variable_duration, Fs); + RESTORE_STACK; + return OPUS_BAD_ARG; } /* Validate frame_size before using it to allocate stack space. This mirrors the checks in opus_encode[_float](). */ @@ -504,42 +735,10 @@ static int opus_multistream_encode_native coupled_size = opus_encoder_get_size(2); mono_size = opus_encoder_get_size(1); + ALLOC(bandSMR, 21*st->layout.nb_channels, opus_val16); if (st->surround) { - int i; - unsigned char dummy[512]; - /* Temporary kludge -- remove */ - OpusEncoder *downmix_enc; - - ptr = (char*)st + align(sizeof(OpusMSEncoder)); - for (s=0;s<st->layout.nb_streams;s++) - { - if (s < st->layout.nb_coupled_streams) - ptr += align(coupled_size); - else - ptr += align(mono_size); - } - downmix_enc = (OpusEncoder*)ptr; - surround_downmix(buf, pcm, st->layout.nb_channels, frame_size); - opus_encoder_ctl(downmix_enc, OPUS_SET_ENERGY_SAVE(bandLogE)); - opus_encoder_ctl(downmix_enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_FULLBAND)); - opus_encoder_ctl(downmix_enc, OPUS_SET_FORCE_MODE(MODE_CELT_ONLY)); - opus_encoder_ctl(downmix_enc, OPUS_SET_FORCE_CHANNELS(2)); - opus_encode_native(downmix_enc, buf, frame_size, dummy, 512, lsb_depth -#ifndef FIXED_POINT - , &analysis_info -#endif - ); - /* Combines the left and right mask into a centre mask. We - use an approximation for the log of the sum of the energies. */ - for(i=0;i<21;i++) - { - opus_val16 diff; - diff = ABS16(SUB16(bandLogE[i], bandLogE[21+i])); - diff = diff + HALF16(diff); - diff = SHR32(HALF32(celt_exp2(-diff)), 16-DB_SHIFT); - bandLogE_mono[i] = MAX16(bandLogE[i], bandLogE[21+i]) + diff; - } + surround_analysis(celt_mode, pcm, bandSMR, mem, preemph_mem, frame_size, 120, st->layout.nb_channels, Fs, copy_channel_in); } if (max_data_bytes < 4*st->layout.nb_streams-1) @@ -563,10 +762,24 @@ static int opus_multistream_encode_native opus_encoder_ctl(enc, OPUS_SET_BITRATE(bitrates[s])); if (st->surround) { - opus_encoder_ctl(enc, OPUS_SET_FORCE_MODE(MODE_CELT_ONLY)); - opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_FULLBAND)); + opus_int32 equiv_rate; + equiv_rate = st->bitrate_bps; + if (frame_size*50 < Fs) + equiv_rate -= 60*(Fs/frame_size - 50)*st->layout.nb_channels; + if (equiv_rate > 112000) + opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_FULLBAND)); + else if (equiv_rate > 76000) + opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_SUPERWIDEBAND)); + else if (equiv_rate > 48000) + opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_WIDEBAND)); + else + opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_NARROWBAND)); if (s < st->layout.nb_coupled_streams) + { + /* To preserve the spatial image, force stereo CELT on coupled streams */ + opus_encoder_ctl(enc, OPUS_SET_FORCE_MODE(MODE_CELT_ONLY)); opus_encoder_ctl(enc, OPUS_SET_FORCE_CHANNELS(2)); + } } } @@ -578,11 +791,13 @@ static int opus_multistream_encode_native OpusEncoder *enc; int len; int curr_max; + int c1, c2; opus_repacketizer_init(&rp); enc = (OpusEncoder*)ptr; if (s < st->layout.nb_coupled_streams) { + int i; int left, right; left = get_left_channel(&st->layout, s, -1); right = get_right_channel(&st->layout, s, -1); @@ -591,28 +806,39 @@ static int opus_multistream_encode_native (*copy_channel_in)(buf+1, 2, pcm, st->layout.nb_channels, right, frame_size); ptr += align(coupled_size); - /* FIXME: This isn't correct for the coupled center channels in - 6.1 surround configuration */ if (st->surround) - opus_encoder_ctl(enc, OPUS_SET_ENERGY_MASK(bandLogE)); + { + for (i=0;i<21;i++) + { + bandLogE[i] = bandSMR[21*left+i]; + bandLogE[21+i] = bandSMR[21*right+i]; + } + } + c1 = left; + c2 = right; } else { + int i; int chan = get_mono_channel(&st->layout, s, -1); (*copy_channel_in)(buf, 1, pcm, st->layout.nb_channels, chan, frame_size); ptr += align(mono_size); if (st->surround) - opus_encoder_ctl(enc, OPUS_SET_ENERGY_MASK(bandLogE_mono)); + { + for (i=0;i<21;i++) + bandLogE[i] = bandSMR[21*chan+i]; + } + c1 = chan; + c2 = -1; } + if (st->surround) + opus_encoder_ctl(enc, OPUS_SET_ENERGY_MASK(bandLogE)); /* number of bytes left (+Toc) */ curr_max = max_data_bytes - tot_size; /* Reserve three bytes for the last stream and four for the others */ curr_max -= IMAX(0,4*(st->layout.nb_streams-s-1)-1); curr_max = IMIN(curr_max,MS_FRAME_TMP); - len = opus_encode_native(enc, buf, frame_size, tmp_data, curr_max, lsb_depth -#ifndef FIXED_POINT - , &analysis_info -#endif - ); + len = opus_encode_native(enc, buf, frame_size, tmp_data, curr_max, lsb_depth, + pcm, analysis_frame_size, c1, c2, st->layout.nb_channels, downmix); if (len<0) { RESTORE_STACK; @@ -626,50 +852,12 @@ static int opus_multistream_encode_native data += len; tot_size += len; } + /*printf("\n");*/ RESTORE_STACK; return tot_size; } -static void channel_pos(int channels, int pos[8]) -{ - /* Position in the mix: 0 don't mix, 1: left, 2: center, 3:right */ - if (channels==4) - { - pos[0]=1; - pos[1]=3; - pos[2]=1; - pos[3]=3; - } else if (channels==3||channels==5||channels==6) - { - pos[0]=1; - pos[1]=2; - pos[2]=3; - pos[3]=1; - pos[4]=3; - pos[5]=0; - } else if (channels==7) - { - pos[0]=1; - pos[1]=2; - pos[2]=3; - pos[3]=1; - pos[4]=3; - pos[5]=2; - pos[6]=0; - } else if (channels==8) - { - pos[0]=1; - pos[1]=2; - pos[2]=3; - pos[3]=1; - pos[4]=3; - pos[5]=1; - pos[6]=3; - pos[7]=0; - } -} - #if !defined(DISABLE_FLOAT_API) static void opus_copy_channel_in_float( opus_val16 *dst, @@ -690,57 +878,6 @@ static void opus_copy_channel_in_float( dst[i*dst_stride] = float_src[i*src_stride+src_channel]; #endif } - -static void opus_surround_downmix_float( - opus_val16 *dst, - const void *src, - int channels, - int frame_size -) -{ - const float *float_src; - opus_int32 i; - int pos[8] = {0}; - int c; - float_src = (const float *)src; - - channel_pos(channels, pos); - for (i=0;i<2*frame_size;i++) - dst[i]=0; - - for (c=0;c<channels;c++) - { - if (pos[c]==1) - { - for (i=0;i<frame_size;i++) -#if defined(FIXED_POINT) - dst[2*i] += SHR16(FLOAT2INT16(float_src[i*channels+c]),3); -#else - dst[2*i] += float_src[i*channels+c]; -#endif - } else if (pos[c]==3) - { - for (i=0;i<frame_size;i++) -#if defined(FIXED_POINT) - dst[2*i+1] += SHR16(FLOAT2INT16(float_src[i*channels+c]),3); -#else - dst[2*i+1] += float_src[i*channels+c]; -#endif - } else if (pos[c]==2) - { - for (i=0;i<frame_size;i++) - { -#if defined(FIXED_POINT) - dst[2*i] += SHR32(MULT16_16(QCONST16(.70711f,15), FLOAT2INT16(float_src[i*channels+c])),3+15); - dst[2*i+1] += SHR32(MULT16_16(QCONST16(.70711f,15), FLOAT2INT16(float_src[i*channels+c])),3+15); -#else - dst[2*i] += .707f*float_src[i*channels+c]; - dst[2*i+1] += .707f*float_src[i*channels+c]; -#endif - } - } - } -} #endif static void opus_copy_channel_in_short( @@ -763,57 +900,6 @@ static void opus_copy_channel_in_short( #endif } -static void opus_surround_downmix_short( - opus_val16 *dst, - const void *src, - int channels, - int frame_size -) -{ - const opus_int16 *short_src; - opus_int32 i; - int pos[8] = {0}; - int c; - short_src = (const opus_int16 *)src; - - channel_pos(channels, pos); - for (i=0;i<2*frame_size;i++) - dst[i]=0; - - for (c=0;c<channels;c++) - { - if (pos[c]==1) - { - for (i=0;i<frame_size;i++) -#if defined(FIXED_POINT) - dst[2*i] += SHR16(short_src[i*channels+c],3); -#else - dst[2*i] += (1/32768.f)*short_src[i*channels+c]; -#endif - } else if (pos[c]==3) - { - for (i=0;i<frame_size;i++) -#if defined(FIXED_POINT) - dst[2*i+1] += SHR16(short_src[i*channels+c],3); -#else - dst[2*i+1] += (1/32768.f)*short_src[i*channels+c]; -#endif - } else if (pos[c]==2) - { - for (i=0;i<frame_size;i++) - { -#if defined(FIXED_POINT) - dst[2*i] += SHR32(MULT16_16(QCONST16(.70711f,15), short_src[i*channels+c]),3+15); - dst[2*i+1] += SHR32(MULT16_16(QCONST16(.70711f,15), short_src[i*channels+c]),3+15); -#else - dst[2*i] += (.707f/32768.f)*short_src[i*channels+c]; - dst[2*i+1] += (.707f/32768.f)*short_src[i*channels+c]; -#endif - } - } - } -} - #ifdef FIXED_POINT int opus_multistream_encode( @@ -825,7 +911,7 @@ int opus_multistream_encode( ) { return opus_multistream_encode_native(st, opus_copy_channel_in_short, - pcm, frame_size, data, max_data_bytes, 16, opus_surround_downmix_short); + pcm, frame_size, data, max_data_bytes, 16, downmix_int); } #ifndef DISABLE_FLOAT_API @@ -838,7 +924,7 @@ int opus_multistream_encode_float( ) { return opus_multistream_encode_native(st, opus_copy_channel_in_float, - pcm, frame_size, data, max_data_bytes, 16, opus_surround_downmix_float); + pcm, frame_size, data, max_data_bytes, 16, downmix_float); } #endif @@ -853,9 +939,8 @@ int opus_multistream_encode_float opus_int32 max_data_bytes ) { - int channels = st->layout.nb_streams + st->layout.nb_coupled_streams; return opus_multistream_encode_native(st, opus_copy_channel_in_float, - pcm, frame_size, data, max_data_bytes, 24, opus_surround_downmix_float, downmix_float, pcm+channels*st->analysis.analysis_offset); + pcm, frame_size, data, max_data_bytes, 24, downmix_float); } int opus_multistream_encode( @@ -866,9 +951,8 @@ int opus_multistream_encode( opus_int32 max_data_bytes ) { - int channels = st->layout.nb_streams + st->layout.nb_coupled_streams; return opus_multistream_encode_native(st, opus_copy_channel_in_short, - pcm, frame_size, data, max_data_bytes, 16, opus_surround_downmix_short, downmix_int, pcm+channels*st->analysis.analysis_offset); + pcm, frame_size, data, max_data_bytes, 16, downmix_int); } #endif diff --git a/src/opus_private.h b/src/opus_private.h index 9d8210b5f05441812946dfb360c13a447429e1e6..0e739ebb0b0ed5691425b9a73a78805a930b3e73 100644 --- a/src/opus_private.h +++ b/src/opus_private.h @@ -82,9 +82,9 @@ int get_mono_channel(const ChannelLayout *layout, int stream_id, int prev); #define OPUS_SET_FORCE_MODE_REQUEST 11002 #define OPUS_SET_FORCE_MODE(x) OPUS_SET_FORCE_MODE_REQUEST, __opus_check_int(x) -typedef void (*downmix_func)(const void *, float *, int, int, int); -void downmix_float(const void *_x, float *sub, int subframe, int offset, int C); -void downmix_int(const void *_x, float *sub, int subframe, int offset, int C); +typedef void (*downmix_func)(const void *, opus_val32 *, int, int, int, int, int); +void downmix_float(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C); +void downmix_int(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C); int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs, int bitrate, opus_val16 tonality, opus_val32 *mem, int buffering, @@ -94,12 +94,13 @@ int encode_size(int size, unsigned char *data); opus_int32 frame_size_select(opus_int32 frame_size, int variable_duration, opus_int32 Fs); +opus_int32 compute_frame_size(const void *analysis_pcm, int frame_size, + int variable_duration, int C, opus_int32 Fs, int bitrate_bps, + int delay_compensation, downmix_func downmix, opus_val32 *subframe_mem); + opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_size, - unsigned char *data, opus_int32 out_data_bytes, int lsb_depth -#ifndef FIXED_POINT - , AnalysisInfo *analysis_info -#endif - ); + unsigned char *data, opus_int32 out_data_bytes, int lsb_depth, + const void *analysis_pcm, opus_int32 analysis_size, int c1, int c2, int analysis_channels, downmix_func downmix); int opus_decode_native(OpusDecoder *st, const unsigned char *data, opus_int32 len, opus_val16 *pcm, int frame_size, int decode_fec, int self_delimited, diff --git a/src/tansig_table.h b/src/tansig_table.h index 885ea3e8d7dbd35dace96078db39c53d808557a2..c76f844a72f428eb0c1abf87f399acc8074118ca 100644 --- a/src/tansig_table.h +++ b/src/tansig_table.h @@ -1,6 +1,6 @@ /* This file is auto-generated by gen_tables */ -static const opus_val16 tansig_table[201] = { +static const float tansig_table[201] = { 0.000000f, 0.039979f, 0.079830f, 0.119427f, 0.158649f, 0.197375f, 0.235496f, 0.272905f, 0.309507f, 0.345214f, 0.379949f, 0.413644f, 0.446244f, 0.477700f, 0.507977f,