From a4dccd3f659eb60b8b3caa8b04688246851454bf Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin <jmvalin@jmvalin.ca> Date: Sat, 4 May 2013 23:54:20 -0400 Subject: [PATCH] Implements basic surround masking The idea is that the rate of each stream is adjusted based on its contribution to the total energy of a stereo downmix. --- celt/celt.h | 6 + celt/celt_encoder.c | 52 ++++++++- include/opus_defines.h | 1 + src/opus_encoder.c | 16 ++- src/opus_multistream_encoder.c | 200 +++++++++++++++++++++++++++++++-- 5 files changed, 264 insertions(+), 11 deletions(-) diff --git a/celt/celt.h b/celt/celt.h index a8f7cb036..ea8c2f95e 100644 --- a/celt/celt.h +++ b/celt/celt.h @@ -110,6 +110,12 @@ typedef struct { #define OPUS_SET_LFE_REQUEST 10024 #define OPUS_SET_LFE(x) OPUS_SET_LFE_REQUEST, __opus_check_int(x) +#define OPUS_SET_ENERGY_SAVE_REQUEST 10026 +#define OPUS_SET_ENERGY_SAVE(x) OPUS_SET_ENERGY_SAVE_REQUEST, __opus_check_val16_ptr(x) + +#define OPUS_SET_ENERGY_MASK_REQUEST 10028 +#define OPUS_SET_ENERGY_MASK(x) OPUS_SET_ENERGY_MASK_REQUEST, __opus_check_val16_ptr(x) + /* Encoder stuff */ int celt_encoder_get_size(int channels); diff --git a/celt/celt_encoder.c b/celt/celt_encoder.c index 6ac2457db..2030ad08e 100644 --- a/celt/celt_encoder.c +++ b/celt/celt_encoder.c @@ -109,6 +109,8 @@ struct OpusCustomEncoder { opus_val16 overlap_max; opus_val16 stereo_saving; int intensity; + opus_val16 *energy_save; + opus_val16 *energy_mask; #ifdef RESYNTH /* +MAX_PERIOD/2 to make space for overlap */ @@ -1165,6 +1167,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, int secondMdct; int signalBandwidth; int transient_got_disabled=0; + opus_val16 surround_masking=0; ALLOC_STACK; mode = st->mode; @@ -1397,6 +1400,27 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, bandE[i] = IMIN(bandE[i], MULT16_32_Q15(QCONST16(1e-4f,15),bandE[0])); } amp2Log2(mode, effEnd, st->end, bandE, bandLogE, C); + if (st->energy_save) + { + opus_val16 offset = shortBlocks?HALF16(SHL16(LM, DB_SHIFT)):0; +#ifdef FIXED_POINT + /* Compensate for the 1/8 gain we apply in the fixed-point downshift to avoid overflows. */ + offset -= QCONST16(3.0f, DB_SHIFT); +#endif + for(i=0;i<C*nbEBands;i++) + st->energy_save[i]=bandLogE[i]-offset; + st->energy_save=NULL; + } + if (st->energy_mask&&!st->lfe) + { + opus_val32 mask_avg=0; + opus_val16 offset = shortBlocks?HALF16(SHL16(LM, DB_SHIFT)):0; + for (c=0;c<C;c++) + for(i=0;i<st->end;i++) + mask_avg += bandLogE[nbEBands*c+i]-offset-st->energy_mask[nbEBands*c+i]; + surround_masking = DIV32_16(mask_avg,C*st->end) + QCONST16(.0f, DB_SHIFT); + surround_masking = MIN16(MAX16(surround_masking,-QCONST16(1.5f, DB_SHIFT)), 0); + } /*for (i=0;i<21;i++) printf("%f ", bandLogE[i]); printf("\n");*/ @@ -1625,7 +1649,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, max_frac = DIV32_16(MULT16_16(QCONST16(0.8f, 15), coded_stereo_dof), coded_bins); /*printf("%d %d %d ", coded_stereo_dof, coded_bins, tot_boost);*/ target -= (opus_int32)MIN32(MULT16_32_Q15(max_frac,target), - SHR16(MULT16_16(st->stereo_saving-QCONST16(0.1f,8),(coded_stereo_dof<<BITRES)),8)); + SHR32(MULT16_16(st->stereo_saving-QCONST16(0.1f,8),(coded_stereo_dof<<BITRES)),8)); } /* Boost the rate according to dynalloc (minus the dynalloc average for calibration). */ target += tot_boost-(16<<LM); @@ -1649,6 +1673,13 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, } #endif + if (st->energy_mask&&!st->lfe) + { + opus_int32 surround_target = target + SHR32(MULT16_16(surround_masking,coded_bins<<BITRES), DB_SHIFT); + /*printf("%f %d %d %d %d %d %d ", surround_masking, coded_bins, st->end, st->intensity, surround_target, target, st->bitrate);*/ + target = IMAX(target/4, surround_target); + } + { opus_int32 floor_depth; int bins; @@ -1660,7 +1691,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, /*printf("%f %d\n", maxDepth, floor_depth);*/ } - if (st->constrained_vbr || st->bitrate<64000) + if ((!st->energy_mask||st->lfe) && (st->constrained_vbr || st->bitrate<64000)) { opus_val16 rate_factor; #ifdef FIXED_POINT @@ -1759,7 +1790,10 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, codedBands = compute_allocation(mode, st->start, st->end, offsets, cap, alloc_trim, &st->intensity, &dual_stereo, bits, &balance, pulses, fine_quant, fine_priority, C, LM, enc, 1, st->lastCodedBands, signalBandwidth); - st->lastCodedBands = codedBands; + if (st->lastCodedBands) + st->lastCodedBands = IMIN(st->lastCodedBands+1,IMAX(st->lastCodedBands-1,codedBands)); + else + st->lastCodedBands = codedBands; quant_fine_energy(mode, st->start, st->end, oldBandE, error, fine_quant, enc, C); @@ -2151,6 +2185,18 @@ int opus_custom_encoder_ctl(CELTEncoder * OPUS_RESTRICT st, int request, ...) st->lfe = value; } break; + case OPUS_SET_ENERGY_SAVE_REQUEST: + { + opus_val16 *value = va_arg(ap, opus_val16*); + st->energy_save=value; + } + break; + case OPUS_SET_ENERGY_MASK_REQUEST: + { + opus_val16 *value = va_arg(ap, opus_val16*); + st->energy_mask = value; + } + break; default: goto bad_request; } diff --git a/include/opus_defines.h b/include/opus_defines.h index 203144a77..00918b89f 100644 --- a/include/opus_defines.h +++ b/include/opus_defines.h @@ -158,6 +158,7 @@ extern "C" { #define __opus_check_int(x) (((void)((x) == (opus_int32)0)), (opus_int32)(x)) #define __opus_check_int_ptr(ptr) ((ptr) + ((ptr) - (opus_int32*)(ptr))) #define __opus_check_uint_ptr(ptr) ((ptr) + ((ptr) - (opus_uint32*)(ptr))) +#define __opus_check_val16_ptr(ptr) ((ptr) + ((ptr) - (opus_val16*)(ptr))) /** @endcond */ /** @defgroup opus_ctlvalues Pre-defined values for CTL interface diff --git a/src/opus_encoder.c b/src/opus_encoder.c index 235f5573b..e2a6347e8 100644 --- a/src/opus_encoder.c +++ b/src/opus_encoder.c @@ -94,6 +94,7 @@ struct OpusEncoder { int silk_bw_switch; /* Sampling rate (at the API level) */ int first; + int energy_masking; StereoWidthState width_mem; opus_val16 delay_buffer[MAX_ENCODER_BUFFER*2]; #ifndef FIXED_POINT @@ -1602,7 +1603,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ st->prev_HB_gain = HB_gain; if (st->mode != MODE_HYBRID || st->stream_channels==1) st->silk_mode.stereoWidth_Q14 = IMIN((1<<14),IMAX(0,st->bitrate_bps-32000)); - if( st->channels == 2 ) { + if( !st->energy_masking && st->channels == 2 ) { /* Apply stereo width reduction (at low bitrates) */ if( st->hybrid_stereo_width_Q14 < (1 << 14) || st->silk_mode.stereoWidth_Q14 < (1 << 14) ) { opus_val16 g1, g2; @@ -2216,6 +2217,19 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...) celt_encoder_ctl(celt_enc, OPUS_SET_LFE(value)); } break; + case OPUS_SET_ENERGY_SAVE_REQUEST: + { + opus_val16 *value = va_arg(ap, opus_val16*); + celt_encoder_ctl(celt_enc, OPUS_SET_ENERGY_SAVE(value)); + } + break; + case OPUS_SET_ENERGY_MASK_REQUEST: + { + opus_val16 *value = va_arg(ap, opus_val16*); + st->energy_masking = (value!=NULL); + celt_encoder_ctl(celt_enc, OPUS_SET_ENERGY_MASK(value)); + } + break; case CELT_GET_MODE_REQUEST: { diff --git a/src/opus_multistream_encoder.c b/src/opus_multistream_encoder.c index d94aa7051..163e73c21 100644 --- a/src/opus_multistream_encoder.c +++ b/src/opus_multistream_encoder.c @@ -61,6 +61,7 @@ struct OpusMSEncoder { ChannelLayout layout; int lfe_stream; int variable_duration; + int surround; opus_int32 bitrate_bps; opus_val32 subframe_mem[3]; /* Encoder states go here */ @@ -104,6 +105,7 @@ opus_int32 opus_multistream_surround_encoder_get_size(int channels, int mapping_ { int nb_streams; int nb_coupled_streams; + opus_int32 size; if (mapping_family==0) { @@ -127,7 +129,10 @@ opus_int32 opus_multistream_surround_encoder_get_size(int channels, int mapping_ nb_coupled_streams=0; } else return 0; - return opus_multistream_encoder_get_size(nb_streams, nb_coupled_streams); + size = opus_multistream_encoder_get_size(nb_streams, nb_coupled_streams); + if (channels>2) + size += align(opus_encoder_get_size(2)); + return size; } @@ -171,9 +176,9 @@ static int opus_multistream_encoder_init_impl( for (i=0;i<st->layout.nb_coupled_streams;i++) { ret = opus_encoder_init((OpusEncoder*)ptr, Fs, 2, application); + if(ret!=OPUS_OK)return ret; if (i==st->lfe_stream) opus_encoder_ctl((OpusEncoder*)ptr, OPUS_SET_LFE(1)); - if(ret!=OPUS_OK)return ret; ptr += align(coupled_size); } for (;i<st->layout.nb_streams;i++) @@ -184,6 +189,14 @@ static int opus_multistream_encoder_init_impl( if(ret!=OPUS_OK)return ret; ptr += align(mono_size); } + if (surround && st->layout.nb_channels>2) + { + OpusEncoder *downmix_enc; + downmix_enc = (OpusEncoder*)ptr; + ret = opus_encoder_init(downmix_enc, Fs, 2, OPUS_APPLICATION_AUDIO); + if(ret!=OPUS_OK)return ret; + } + st->surround = surround; return OPUS_OK; } @@ -332,6 +345,13 @@ typedef void (*opus_copy_channel_in_func)( int frame_size ); +typedef void (*opus_surround_downmix_funct)( + opus_val16 *dst, + const void *src, + int channels, + int frame_size +); + static void surround_rate_allocation( OpusMSEncoder *st, opus_int32 *rate, @@ -398,7 +418,8 @@ static int opus_multistream_encode_native int frame_size, unsigned char *data, opus_int32 max_data_bytes, - int lsb_depth + int lsb_depth, + opus_surround_downmix_funct surround_downmix #ifndef FIXED_POINT , downmix_func downmix , const void *pcm_analysis @@ -418,6 +439,8 @@ static int opus_multistream_encode_native AnalysisInfo analysis_info; const CELTMode *celt_mode; opus_int32 bitrates[256]; + opus_val16 bandLogE[42]; + opus_val16 bandLogE_mono[21]; ALLOC_STACK; ptr = (char*)st + align(sizeof(OpusMSEncoder)); @@ -461,6 +484,36 @@ static int opus_multistream_encode_native coupled_size = opus_encoder_get_size(2); mono_size = opus_encoder_get_size(1); + if (st->surround && st->layout.nb_channels>2) + { + int i; + unsigned char dummy[512]; + /* Temporary kludge -- remove */ + OpusEncoder *downmix_enc; + + ptr = (char*)st + align(sizeof(OpusMSEncoder)); + for (s=0;s<st->layout.nb_streams;s++) + { + if (s < st->layout.nb_coupled_streams) + ptr += align(coupled_size); + else + ptr += align(mono_size); + } + downmix_enc = (OpusEncoder*)ptr; + surround_downmix(buf, pcm, st->layout.nb_channels, frame_size); + opus_encoder_ctl(downmix_enc, OPUS_SET_ENERGY_SAVE(bandLogE)); + opus_encoder_ctl(downmix_enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_FULLBAND)); + opus_encoder_ctl(downmix_enc, OPUS_SET_FORCE_MODE(MODE_CELT_ONLY)); + opus_encoder_ctl(downmix_enc, OPUS_SET_FORCE_CHANNELS(2)); + opus_encode_native(downmix_enc, buf, frame_size, dummy, 512, lsb_depth +#ifndef FIXED_POINT + , &analysis_info +#endif + ); + for(i=0;i<21;i++) + bandLogE_mono[i] = MAX16(bandLogE[i], bandLogE[21+i]); + } + if (max_data_bytes < 4*st->layout.nb_streams-1) { RESTORE_STACK; @@ -480,6 +533,13 @@ static int opus_multistream_encode_native else ptr += align(mono_size); opus_encoder_ctl(enc, OPUS_SET_BITRATE(bitrates[s])); + if (st->surround) + { + opus_encoder_ctl(enc, OPUS_SET_FORCE_MODE(MODE_CELT_ONLY)); + opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_FULLBAND)); + if (s < st->layout.nb_coupled_streams) + opus_encoder_ctl(enc, OPUS_SET_FORCE_CHANNELS(2)); + } } ptr = (char*)st + align(sizeof(OpusMSEncoder)); @@ -503,11 +563,17 @@ static int opus_multistream_encode_native (*copy_channel_in)(buf+1, 2, pcm, st->layout.nb_channels, right, frame_size); ptr += align(coupled_size); + /* FIXME: This isn't correct for the coupled center channels in + 6.1 surround configuration */ + if (st->surround) + opus_encoder_ctl(enc, OPUS_SET_ENERGY_MASK(bandLogE)); } else { int chan = get_mono_channel(&st->layout, s, -1); (*copy_channel_in)(buf, 1, pcm, st->layout.nb_channels, chan, frame_size); ptr += align(mono_size); + if (st->surround) + opus_encoder_ctl(enc, OPUS_SET_ENERGY_MASK(bandLogE_mono)); } /* number of bytes left (+Toc) */ curr_max = max_data_bytes - tot_size; @@ -557,6 +623,85 @@ static void opus_copy_channel_in_float( dst[i*dst_stride] = float_src[i*src_stride+src_channel]; #endif } + +static void channel_pos(int channels, int pos[8]) +{ + /* Position in the mix: 0 don't mix, 1: left, 2: center, 3:right */ + if (channels==4) + { + pos[0]=1; + pos[1]=3; + pos[2]=1; + pos[3]=3; + } else if (channels==3||channels==5||channels==6) + { + pos[0]=1; + pos[1]=2; + pos[2]=3; + pos[3]=1; + pos[4]=3; + pos[5]=0; + } else if (channels==7) + { + pos[0]=1; + pos[1]=2; + pos[2]=3; + pos[3]=1; + pos[4]=3; + pos[5]=2; + pos[6]=0; + } else if (channels==8) + { + pos[0]=1; + pos[1]=2; + pos[2]=3; + pos[3]=1; + pos[4]=3; + pos[5]=1; + pos[6]=3; + pos[7]=0; + } +} + +static void opus_surround_downmix_float( + opus_val16 *dst, + const void *src, + int channels, + int frame_size +) +{ + const float *float_src; + opus_int32 i; + int pos[8] = {0}; + int c; + float_src = (const float *)src; + + channel_pos(channels, pos); + for (i=0;i<2*frame_size;i++) + dst[i]=0; + + for (c=0;c<channels;c++) + { + if (pos[c]==1||pos[c]==2) + { + for (i=0;i<frame_size;i++) +#if defined(FIXED_POINT) + dst[2*i] += SHR16(FLOAT2INT16(float_src[i*channels+c]),3); +#else + dst[2*i] += float_src[i*channels+c]; +#endif + } + if (pos[c]==2||pos[c]==3) + { + for (i=0;i<frame_size;i++) +#if defined(FIXED_POINT) + dst[2*i+1] += SHR16(FLOAT2INT16(float_src[i*channels+c]),3); +#else + dst[2*i+1] += float_src[i*channels+c]; +#endif + } + } +} #endif static void opus_copy_channel_in_short( @@ -579,6 +724,47 @@ static void opus_copy_channel_in_short( #endif } +static void opus_surround_downmix_short( + opus_val16 *dst, + const void *src, + int channels, + int frame_size +) +{ + const opus_int16 *short_src; + opus_int32 i; + int pos[8] = {0}; + int c; + short_src = (const opus_int16 *)src; + + channel_pos(channels, pos); + for (i=0;i<2*frame_size;i++) + dst[i]=0; + + for (c=0;c<channels;c++) + { + if (pos[c]==1||pos[c]==2) + { + for (i=0;i<frame_size;i++) +#if defined(FIXED_POINT) + dst[2*i] += SHR16(short_src[i*channels+c],3); +#else + dst[2*i] += (1/32768.f)*short_src[i*channels+c]; +#endif + } + if (pos[c]==2||pos[c]==3) + { + for (i=0;i<frame_size;i++) +#if defined(FIXED_POINT) + dst[2*i+1] += SHR16(short_src[i*channels+c],3); +#else + dst[2*i+1] += (1/32768.f)*short_src[i*channels+c]; +#endif + } + } +} + + #ifdef FIXED_POINT int opus_multistream_encode( OpusMSEncoder *st, @@ -589,7 +775,7 @@ int opus_multistream_encode( ) { return opus_multistream_encode_native(st, opus_copy_channel_in_short, - pcm, frame_size, data, max_data_bytes, 16); + pcm, frame_size, data, max_data_bytes, 16, opus_surround_downmix_float); } #ifndef DISABLE_FLOAT_API @@ -602,7 +788,7 @@ int opus_multistream_encode_float( ) { return opus_multistream_encode_native(st, opus_copy_channel_in_float, - pcm, frame_size, data, max_data_bytes, 16); + pcm, frame_size, data, max_data_bytes, 16, opus_surround_downmix_short); } #endif @@ -619,7 +805,7 @@ int opus_multistream_encode_float { int channels = st->layout.nb_streams + st->layout.nb_coupled_streams; return opus_multistream_encode_native(st, opus_copy_channel_in_float, - pcm, frame_size, data, max_data_bytes, 24, downmix_float, pcm+channels*st->analysis.analysis_offset); + pcm, frame_size, data, max_data_bytes, 24, opus_surround_downmix_float, downmix_float, pcm+channels*st->analysis.analysis_offset); } int opus_multistream_encode( @@ -632,7 +818,7 @@ int opus_multistream_encode( { int channels = st->layout.nb_streams + st->layout.nb_coupled_streams; return opus_multistream_encode_native(st, opus_copy_channel_in_short, - pcm, frame_size, data, max_data_bytes, 16, downmix_int, pcm+channels*st->analysis.analysis_offset); + pcm, frame_size, data, max_data_bytes, 16, opus_surround_downmix_short, downmix_int, pcm+channels*st->analysis.analysis_offset); } #endif -- GitLab