Skip to content
Snippets Groups Projects
Commit a2054578 authored by Jean-Marc Valin's avatar Jean-Marc Valin
Browse files

Adds automatic bandwidth detection

Haven't tested it much and not sure not to use it without risk
for high-quality streams that may change bandwidth
parent 3e52daed
No related branches found
No related tags found
No related merge requests found
...@@ -80,6 +80,7 @@ typedef struct { ...@@ -80,6 +80,7 @@ typedef struct {
float prev_tonality; float prev_tonality;
float E[NB_FRAMES][NB_TBANDS]; float E[NB_FRAMES][NB_TBANDS];
float lowE[NB_TBANDS], highE[NB_TBANDS]; float lowE[NB_TBANDS], highE[NB_TBANDS];
float meanE[NB_TBANDS], meanRE[NB_TBANDS];
float mem[32]; float mem[32];
float cmean[8]; float cmean[8];
float std[9]; float std[9];
...@@ -90,6 +91,7 @@ typedef struct { ...@@ -90,6 +91,7 @@ typedef struct {
int last_music; int last_music;
int last_transition; int last_transition;
int count; int count;
int opus_bandwidth;
} TonalityAnalysisState; } TonalityAnalysisState;
void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEncoder *celt_enc, const opus_val16 *x, int C) void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEncoder *celt_enc, const opus_val16 *x, int C)
...@@ -115,13 +117,17 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEnc ...@@ -115,13 +117,17 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEnc
float frame_stationarity; float frame_stationarity;
float relativeE; float relativeE;
float frame_prob; float frame_prob;
float alpha, alphaE; float alpha, alphaE, alphaE2;
float frame_loudness; float frame_loudness;
float bandwidth_mask;
int bandwidth=0;
float bandE[NB_TBANDS];
celt_encoder_ctl(celt_enc, CELT_GET_MODE(&mode)); celt_encoder_ctl(celt_enc, CELT_GET_MODE(&mode));
tonal->last_transition++; tonal->last_transition++;
alpha = 1.f/IMIN(20, 1+tonal->count); alpha = 1.f/IMIN(20, 1+tonal->count);
alphaE = 1.f/IMIN(50, 1+tonal->count); alphaE = 1.f/IMIN(50, 1+tonal->count);
alphaE2 = 1.f/IMIN(6000, 1+tonal->count);
if (tonal->count<4) if (tonal->count<4)
tonal->music_prob = .5; tonal->music_prob = .5;
...@@ -201,6 +207,7 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEnc ...@@ -201,6 +207,7 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEnc
info->boost_amount[0]=info->boost_amount[1]=0; info->boost_amount[0]=info->boost_amount[1]=0;
info->boost_band[0]=info->boost_band[1]=0; info->boost_band[0]=info->boost_band[1]=0;
frame_loudness = 0; frame_loudness = 0;
bandwidth_mask = 0;
for (b=0;b<NB_TBANDS;b++) for (b=0;b<NB_TBANDS;b++)
{ {
float E=0, tE=0, nE=0; float E=0, tE=0, nE=0;
...@@ -214,11 +221,19 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEnc ...@@ -214,11 +221,19 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEnc
tE += binE*tonality[i]; tE += binE*tonality[i];
nE += binE*2*(.5-noisiness[i]); nE += binE*2*(.5-noisiness[i]);
} }
bandE[b] = E;
tonal->E[tonal->E_count][b] = E; tonal->E[tonal->E_count][b] = E;
frame_noisiness += nE/(1e-15+E); frame_noisiness += nE/(1e-15+E);
frame_loudness += sqrt(E+1e-10); frame_loudness += sqrt(E+1e-10);
/* Add a reasonable noise floor */ /* Add a reasonable noise floor */
tonal->meanE[b] = (1-alphaE2)*tonal->meanE[b] + alphaE2*E;
tonal->meanRE[b] = (1-alphaE2)*tonal->meanRE[b] + alphaE2*sqrt(E);
/* 13 dB slope for spreading function */
bandwidth_mask = MAX32(.05*bandwidth_mask, E);
/* Checks if band looks like stationary noise or if it's below a (trivial) masking curve */
if (tonal->meanRE[b]*tonal->meanRE[b] < tonal->meanE[b]*.95 && E>.1*bandwidth_mask)
bandwidth = b;
logE[b] = log(E+1e-10); logE[b] = log(E+1e-10);
tonal->lowE[b] = MIN32(logE[b], tonal->lowE[b]+.01); tonal->lowE[b] = MIN32(logE[b], tonal->lowE[b]+.01);
tonal->highE[b] = MAX32(logE[b], tonal->highE[b]-.1); tonal->highE[b] = MAX32(logE[b], tonal->highE[b]-.1);
...@@ -260,6 +275,7 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEnc ...@@ -260,6 +275,7 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEnc
} }
tonal->prev_band_tonality[b] = band_tonality[b]; tonal->prev_band_tonality[b] = band_tonality[b];
} }
frame_loudness = 20*log10(frame_loudness); frame_loudness = 20*log10(frame_loudness);
tonal->Etracker = MAX32(tonal->Etracker-.03, frame_loudness); tonal->Etracker = MAX32(tonal->Etracker-.03, frame_loudness);
tonal->lowECount *= (1-alphaE); tonal->lowECount *= (1-alphaE);
...@@ -369,5 +385,20 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEnc ...@@ -369,5 +385,20 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEnc
printf("%f ", features[i]); printf("%f ", features[i]);
printf("\n");*/ printf("\n");*/
/* FIXME: Can't detect SWB for now because the last band ends at 12 kHz */
if (bandwidth == NB_TBANDS-1 || tonal->count<100)
{
tonal->opus_bandwidth = OPUS_BANDWIDTH_FULLBAND;
} else {
int close_enough = 0;
if (bandE[bandwidth-1] < 3000*bandE[NB_TBANDS-1] && bandwidth < NB_TBANDS-1)
close_enough=1;
if (bandwidth<=11 || (bandwidth==12 && close_enough))
tonal->opus_bandwidth = OPUS_BANDWIDTH_NARROWBAND;
else if (bandwidth<=13)
tonal->opus_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND;
else if (bandwidth<=15 || (bandwidth==16 && close_enough))
tonal->opus_bandwidth = OPUS_BANDWIDTH_WIDEBAND;
}
info->valid = 1; info->valid = 1;
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment