From 0892c169c6b868c4a49d679f5379e517311d229e Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin <jmvalin@jmvalin.ca> Date: Thu, 12 Jan 2012 03:44:49 -0500 Subject: [PATCH] Tonality and pitch tuning Tuned the tonality estimator to trigger on signals where only part of the spectrum is tonal. Also tuned the pitch detector not to be confused by short-term correlation. --- celt/celt.c | 12 +++++++----- celt/pitch.c | 10 +++++++++- src/analysis.c | 11 +++++------ 3 files changed, 21 insertions(+), 12 deletions(-) diff --git a/celt/celt.c b/celt/celt.c index a8a194393..a079702de 100644 --- a/celt/celt.c +++ b/celt/celt.c @@ -442,7 +442,7 @@ static int transient_analysis(const opus_val32 * restrict in, int len, int C, #ifdef FUZZING is_transient = rand()&0x1; #endif - /*printf("%d %d %d %f %f\n", is_transient, *tf_estimate, tf_max, 0., 1.);*/ + /*printf("%d %f %d %f %f ", is_transient, *tf_estimate, tf_max, analysis->tonality, analysis->noisiness);*/ return is_transient; } @@ -1206,8 +1206,10 @@ int celt_encode_with_ec(CELTEncoder * restrict st, const opus_val16 * pcm, int f ALLOC(pitch_buf, (COMBFILTER_MAXPERIOD+N)>>1, opus_val16); pitch_downsample(pre, pitch_buf, COMBFILTER_MAXPERIOD+N, CC); + /* Don't search for the fir last 1.5 octave of the range because + there's too many false-positives due to short-term correlation */ pitch_search(pitch_buf+(COMBFILTER_MAXPERIOD>>1), pitch_buf, N, - COMBFILTER_MAXPERIOD-COMBFILTER_MINPERIOD, &pitch_index); + COMBFILTER_MAXPERIOD-3*COMBFILTER_MINPERIOD, &pitch_index); pitch_index = COMBFILTER_MAXPERIOD-pitch_index; gain1 = remove_doubling(pitch_buf, COMBFILTER_MAXPERIOD, COMBFILTER_MINPERIOD, @@ -1619,11 +1621,11 @@ int celt_encode_with_ec(CELTEncoder * restrict st, const opus_val16 * pcm, int f if (st->analysis.valid) { int tonal_target; float tonal; - tonal = MAX16(0,st->analysis.tonality-.2)*(.5+st->analysis.tonality); - tonal_target = target + (coded_bins<<BITRES)*1.6f*tonal; + tonal = MAX16(0,st->analysis.tonality-.2); + tonal_target = new_target + (coded_bins<<BITRES)*2.0f*tonal; if (pitch_change) tonal_target += (coded_bins<<BITRES)*.8; - /*printf("%f %d\n", tonal, tonal_target);*/ + /*printf("%f %f ", st->analysis.tonality, tonal);*/ new_target = IMAX(tonal_target,new_target); } #endif diff --git a/celt/pitch.c b/celt/pitch.c index 8e906878b..beea61fed 100644 --- a/celt/pitch.c +++ b/celt/pitch.c @@ -331,6 +331,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, int T1, T1b; opus_val16 g1; opus_val16 cont=0; + opus_val16 thresh; T1 = (2*T0+k)/(2*k); if (T1 < minperiod) break; @@ -372,7 +373,14 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, cont = HALF32(prev_gain); else cont = 0; - if (g1 > QCONST16(.3f,15) + MULT16_16_Q15(QCONST16(.4f,15),g0)-cont) + thresh = MAX16(QCONST16(.3f,15), MULT16_16_Q15(QCONST16(.7,15),g0)-cont); + /* Bias against very high pitch (very short period) to avoid false-positives + due to short-term correlation */ + if (T1<3*minperiod) + thresh = MAX16(QCONST16(.4f,15), MULT16_16_Q15(QCONST16(.85,15),g0)-cont); + else if (T1<2*minperiod) + thresh = MAX16(QCONST16(.5f,15), MULT16_16_Q15(QCONST16(.9,15),g0)-cont); + if (g1 > thresh) { best_xy = xy; best_yy = yy; diff --git a/src/analysis.c b/src/analysis.c index 764b37038..1336628c7 100644 --- a/src/analysis.c +++ b/src/analysis.c @@ -74,7 +74,7 @@ static const float tweight[NB_TBANDS+1] = { .3, .4, .5, .6, .7, .8, .9, 1., 1., 1., 1., 1., 1., 1., .8, .7, .6, .5 }; -#define NB_TONAL_SKIP_BANDS 0 +#define NB_TONAL_SKIP_BANDS 9 typedef struct { float angle[240]; @@ -265,8 +265,7 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEnc frame_stationarity += stationarity; /*band_tonality[b] = tE/(1e-15+E)*/; band_tonality[b] = MAX16(tE/(EPSILON+E), stationarity*tonal->prev_band_tonality[b]); - //printf("%f ", band_tonality[b]); -#if 1 +#if 0 if (b>=NB_TONAL_SKIP_BANDS) { frame_tonality += tweight[b]*band_tonality[b]; @@ -277,7 +276,7 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEnc if (b>=NB_TBANDS-NB_TONAL_SKIP_BANDS) frame_tonality -= band_tonality[b-NB_TBANDS+NB_TONAL_SKIP_BANDS]; #endif - max_frame_tonality = MAX16(max_frame_tonality, frame_tonality); + max_frame_tonality = MAX16(max_frame_tonality, (1+.03*(b-NB_TBANDS))*frame_tonality); slope += band_tonality[b]*(b-8); /*printf("%f %f ", band_tonality[b], stationarity);*/ if (band_tonality[b] > info->boost_amount[1] && b>=7 && b < NB_TBANDS-1) @@ -295,7 +294,7 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEnc } tonal->prev_band_tonality[b] = band_tonality[b]; } - //printf("\n"); + frame_loudness = 20*log10(frame_loudness); tonal->Etracker = MAX32(tonal->Etracker-.03, frame_loudness); tonal->lowECount *= (1-alphaE); @@ -320,7 +319,7 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEnc #else info->activity = .5*(1+frame_noisiness-frame_stationarity); #endif - frame_tonality = (max_frame_tonality/(tw_sum)); + frame_tonality = (max_frame_tonality/(NB_TBANDS-NB_TONAL_SKIP_BANDS)); frame_tonality = MAX16(frame_tonality, tonal->prev_tonality*.8); tonal->prev_tonality = frame_tonality; info->boost_amount[0] -= frame_tonality+.2; -- GitLab