From 3252bf2a68272b887fabbd654e10ca88378039ca Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin <jmvalin@jmvalin.ca> Date: Fri, 19 Apr 2013 03:14:28 -0400 Subject: [PATCH] Variable framesize improvements - Properly apply the transient boost by counting all the bits in the cost - Disable the post-filter for non-20-ms frames that follow a transient (applies only to variable framesize) --- celt/celt_encoder.c | 14 ++++++++++++-- src/opus_encoder.c | 21 ++++++++++++++------- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/celt/celt_encoder.c b/celt/celt_encoder.c index d93b15a6e..c22100caf 100644 --- a/celt/celt_encoder.c +++ b/celt/celt_encoder.c @@ -73,6 +73,7 @@ struct OpusCustomEncoder { int constrained_vbr; /* If zero, VBR can do whatever it likes with the rate */ int loss_rate; int lsb_depth; + int variable_duration; /* Everything beyond this point gets cleared on a reset */ #define ENCODER_RESET_START rng @@ -1162,6 +1163,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, const opus_int16 *eBands; int secondMdct; int signalBandwidth; + int transient_got_disabled; ALLOC_STACK; mode = st->mode; @@ -1325,7 +1327,8 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, { int enabled; int qg; - enabled = nbAvailableBytes>12*C && st->start==0 && !silence && !st->disable_pf && st->complexity >= 5; + enabled = nbAvailableBytes>12*C && st->start==0 && !silence && !st->disable_pf + && st->complexity >= 5 && !(st->consec_transient && LM!=3 && st->variable_duration); prefilter_tapset = st->tapset_decision; pf_on = run_prefilter(st, in, prefilter_mem, CC, N, prefilter_tapset, &pitch_index, &gain1, &qg, enabled, nbAvailableBytes); @@ -1364,6 +1367,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, shortBlocks = M; } else { isTransient = 0; + transient_got_disabled=1; } ALLOC(freq, CC*N, celt_sig); /**< Interleaved signal MDCTs */ @@ -1874,7 +1878,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, } } while (++c<CC); - if (isTransient) + if (isTransient || transient_got_disabled) st->consec_transient++; else st->consec_transient=0; @@ -2058,6 +2062,12 @@ int opus_custom_encoder_ctl(CELTEncoder * OPUS_RESTRICT st, int request, ...) *value=st->lsb_depth; } break; + case OPUS_SET_EXPERT_FRAME_DURATION_REQUEST: + { + opus_int32 value = va_arg(ap, opus_int32); + st->variable_duration = value; + } + break; case OPUS_RESET_STATE: { int i; diff --git a/src/opus_encoder.c b/src/opus_encoder.c index b03686140..88bf5aff8 100644 --- a/src/opus_encoder.c +++ b/src/opus_encoder.c @@ -597,11 +597,17 @@ static int transient_viterbi(const float *E, const float *E_1, int N, int frame_ int states[MAX_DYNAMIC_FRAMESIZE][16]; float best_cost; int best_state; - + float factor; + /* Take into account that we damp VBR in the 32 kb/s to 64 kb/s range. */ + if (rate<80) + factor=0; + else if (rate>160) + factor=1; + else + factor = (rate-80.f)/80.f; /* Makes variable framesize less aggressive at lower bitrates, but I can't - find any valid theretical justification for this (other than it seems + find any valid theoretical justification for this (other than it seems to help) */ - frame_cost *= 720/rate; for (i=0;i<16;i++) { /* Impossible state */ @@ -610,7 +616,7 @@ static int transient_viterbi(const float *E, const float *E_1, int N, int frame_ } for (i=0;i<4;i++) { - cost[0][1<<i] = frame_cost + rate*(1<<i)*transient_boost(E, E_1, i, N+1); + cost[0][1<<i] = (frame_cost + rate*(1<<i))*(1+factor*transient_boost(E, E_1, i, N+1)); states[0][1<<i] = i; } for (i=1;i<N;i++) @@ -641,7 +647,7 @@ static int transient_viterbi(const float *E, const float *E_1, int N, int frame_ min_cost = tmp; } } - curr_cost = frame_cost+rate*(1<<j)*transient_boost(E+i, E_1+i, j, N-i+1); + curr_cost = (frame_cost + rate*(1<<j))*(1+factor*transient_boost(E+i, E_1+i, j, N-i+1)); cost[i][1<<j] = min_cost; /* If part of the frame is outside the analysis window, only count part of the cost */ if (N-i < (1<<j)) @@ -760,7 +766,7 @@ int optimize_framesize(const opus_val16 *x, int len, int C, opus_int32 Fs, e[i+pos] = e[i+pos-1]; if (buffering) N=IMIN(MAX_DYNAMIC_FRAMESIZE, N+2); - bestLM = transient_viterbi(e, e_1, N, (1.f+.5*tonality)*(40*C+40), bitrate/400); + bestLM = transient_viterbi(e, e_1, N, (1.f+.5*tonality)*(60*C+40), bitrate/400); mem[0] = e[1<<bestLM]; if (buffering) { @@ -1548,7 +1554,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ #ifndef FIXED_POINT if (st->variable_duration==OPUS_FRAMESIZE_VARIABLE && frame_size != st->Fs/50) { - bonus = (40*st->stream_channels+40)*(st->Fs/frame_size-50); + bonus = (60*st->stream_channels+40)*(st->Fs/frame_size-50); if (analysis_info->valid) bonus = bonus*(1.f+.5*analysis_info->tonality); } @@ -2159,6 +2165,7 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...) { opus_int32 value = va_arg(ap, opus_int32); st->variable_duration = value; + celt_encoder_ctl(celt_enc, OPUS_SET_EXPERT_FRAME_DURATION(value)); } break; case OPUS_GET_EXPERT_FRAME_DURATION_REQUEST: -- GitLab