From c681bd0480a8c6a99ff30e240ec52fe129f8eff9 Mon Sep 17 00:00:00 2001
From: Jean-Marc Valin <jmvalin@jmvalin.ca>
Date: Fri, 2 Sep 2011 14:47:26 -0400
Subject: [PATCH] Improved mode/channel/bandwidth control mechanism

Now has tuning parameters for mono/stereo and voice/music. Also switches
to stereo during swb and without reducing the bandwidth.
---
 src/opus_encoder.c | 168 +++++++++++++++++++++++++--------------------
 src/test_opus.c    |  12 ++++
 2 files changed, 107 insertions(+), 73 deletions(-)

diff --git a/src/opus_encoder.c b/src/opus_encoder.c
index 02c3ef721..25baaa464 100644
--- a/src/opus_encoder.c
+++ b/src/opus_encoder.c
@@ -83,23 +83,43 @@ struct OpusEncoder {
     int          rangeFinal;
 };
 
-/* Transition tables for the voice and audio modes. First column is the
+/* Transition tables for the voice and music. First column is the
    middle (memoriless) threshold. The second column is the hysteresis
    (difference with the middle) */
-static const int voice_bandwidth_thresholds[10] = {
+static const opus_int32 mono_voice_bandwidth_thresholds[8] = {
         11000, 1000, /* NB<->MB */
         14000, 1000, /* MB<->WB */
         21000, 2000, /* WB<->SWB */
         29000, 2000, /* SWB<->FB */
 };
-static const int audio_bandwidth_thresholds[10] = {
-        30000,    0, /* MB not allowed */
-        20000, 2000, /* MB<->WB */
-        26000, 2000, /* WB<->SWB */
+static const opus_int32 mono_music_bandwidth_thresholds[8] = {
+        14000, 1000, /* MB not allowed */
+        18000, 2000, /* MB<->WB */
+        24000, 2000, /* WB<->SWB */
         33000, 2000, /* SWB<->FB */
 };
-
-
+static const opus_int32 stereo_voice_bandwidth_thresholds[8] = {
+        11000, 1000, /* NB<->MB */
+        14000, 1000, /* MB<->WB */
+        21000, 2000, /* WB<->SWB */
+        32000, 2000, /* SWB<->FB */
+};
+static const opus_int32 stereo_music_bandwidth_thresholds[8] = {
+        14000, 1000, /* MB not allowed */
+        18000, 2000, /* MB<->WB */
+        24000, 2000, /* WB<->SWB */
+        48000, 2000, /* SWB<->FB */
+};
+/* Threshold bit-rates for switching between mono and stereo */
+static const opus_int32 stereo_voice_threshold = 26000;
+static const opus_int32 stereo_music_threshold = 36000;
+
+/* Threshold bit-rate for switching between SILK/hybrid and CELT-only */
+static const opus_int32 mode_thresholds[2][2] = {
+      /* voice */ /* music */
+      {  48000,      24000}, /* mono */
+      {  48000,      24000}, /* stereo */
+};
 int opus_encoder_get_size(int channels)
 {
     int silkEncSizeBytes, celtEncSizeBytes;
@@ -173,7 +193,7 @@ int opus_encoder_init(OpusEncoder* st, int Fs, int channels, int application)
     st->application = application;
     st->signal_type = OPUS_SIGNAL_AUTO;
     st->user_bandwidth = OPUS_BANDWIDTH_AUTO;
-    st->voice_ratio = 90;
+    st->voice_ratio = -1;
     st->encoder_buffer = st->Fs/100;
 
     st->delay_compensation = st->Fs/400;
@@ -350,9 +370,10 @@ int opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
     VARDECL(opus_val16, pcm_buf);
     int nb_compr_bytes;
     int to_celt = 0;
-    opus_int32 mono_rate;
     opus_uint32 redundant_rng = 0;
     int cutoff_Hz, hp_freq_smth1;
+    int voice_est;
+    opus_int32 equiv_rate;
     ALLOC_STACK;
 
     st->rangeFinal = 0;
@@ -370,41 +391,45 @@ int opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
     else
         st->bitrate_bps = st->user_bitrate_bps;
 
+    /* Equivalent 20-ms rate for mode/channel/bandwidth decisions */
+    equiv_rate = st->bitrate_bps - 60*(st->Fs/frame_size - 50);
+
+    if (st->signal_type == OPUS_SIGNAL_VOICE)
+       voice_est = 127;
+    else if (st->signal_type == OPUS_SIGNAL_MUSIC)
+       voice_est = 0;
+    else if (st->voice_ratio >= 0)
+       voice_est = st->voice_ratio*327>>8;
+    else if (st->application == OPUS_APPLICATION_VOIP)
+       voice_est = 115;
+    else
+       voice_est = 64;
+
+#ifdef FUZZING
+    /* Random mono/stereo decision */
+    if (st->channels == 2 && (rand()&0x1F)==0)
+       st->stream_channels = 3-st->stream_channels;
+#else
     /* Rate-dependent mono-stereo decision */
     if (st->force_mono)
     {
         st->stream_channels = 1;
-    } else if (st->mode == MODE_CELT_ONLY && st->channels == 2)
+    } else if (st->channels == 2)
     {
-        opus_int32 decision_rate;
-        decision_rate = st->bitrate_bps + st->voice_ratio*st->voice_ratio;
-        /* Add some hysteresis */
-        if (st->stream_channels == 2)
-            decision_rate += 4000;
-        else
-            decision_rate -= 4000;
-        if (decision_rate>48000)
-            st->stream_channels = 2;
-        else
-            st->stream_channels = 1;
+       opus_int32 stereo_threshold;
+       stereo_threshold = stereo_music_threshold + ((voice_est*voice_est*(stereo_voice_threshold-stereo_music_threshold))>>14);
+       if (st->stream_channels == 2)
+          stereo_threshold -= 4000;
+       else
+          stereo_threshold += 4000;
+       st->stream_channels = (equiv_rate > stereo_threshold) ? 2 : 1;
     } else {
             st->stream_channels = st->channels;
     }
-
-#ifdef FUZZING
-    if (st->channels == 2 && (rand()&0x1F)==0)
-       st->stream_channels = 3-st->stream_channels;
 #endif
 
-    /* Equivalent bit-rate for mono */
-    mono_rate = st->bitrate_bps;
-    if (st->stream_channels==2)
-        mono_rate = 2*mono_rate/3;
-    /* Compensate for smaller frame sizes assuming an equivalent overhead
-       of 60 bits/frame */
-    mono_rate -= 60*(st->Fs/frame_size - 50);
-
 #ifdef FUZZING
+    /* Random mode switching */
     if ((rand()&0xF)==0)
     {
        if ((rand()&0x1)==0)
@@ -419,44 +444,26 @@ int opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
     }
 #else
     /* Mode selection depending on application and signal type */
-    if (st->application==OPUS_APPLICATION_VOIP)
     {
-        opus_int32 threshold;
-        threshold = 20000;
-        /* OPUS_APPLICATION_VOIP default to auto high-pass */
-        /* Hysteresis */
-        if (st->prev_mode == MODE_CELT_ONLY)
-            threshold -= 4000;
-        else if (st->prev_mode>0)
-            threshold += 4000;
-
-        /* OPUS_APPLICATION_VOIP defaults to MODE_SILK_ONLY */
-        if (st->signal_type == OPUS_SIGNAL_MUSIC && mono_rate > threshold)
-            st->mode = MODE_CELT_ONLY;
-        else
-            st->mode = MODE_SILK_ONLY;
-    } else {/* OPUS_APPLICATION_AUDIO */
-        opus_int32 threshold;
-        /* SILK/CELT threshold is higher for voice than for music */
-        threshold = 36000;
-        /* OPUS_APPLICATION_AUDIO disables the high-pass */
-        if (st->signal_type == OPUS_SIGNAL_MUSIC)
-            threshold -= 20000;
-        else if (st->signal_type == OPUS_SIGNAL_VOICE)
-            threshold += 8000;
-
-        /* Hysteresis */
-        if (st->prev_mode == MODE_CELT_ONLY)
-            threshold -= 4000;
-        else if (st->prev_mode>0)
-            threshold += 4000;
-
-        if (mono_rate>threshold)
-            st->mode = MODE_CELT_ONLY;
-        else
-            st->mode = MODE_SILK_ONLY;
+       int chan;
+       opus_int32 mode_voice, mode_music;
+       opus_int32 threshold;
+
+       chan = (st->channels==2) && !st->force_mono;
+       mode_voice = mode_thresholds[chan][0];
+       mode_music = mode_thresholds[chan][1];
+       threshold = mode_music + ((voice_est*voice_est*(mode_voice-mode_music))>>14);
+
+       /* Hysteresis */
+       if (st->prev_mode == MODE_CELT_ONLY)
+           threshold -= 4000;
+       else if (st->prev_mode>0)
+           threshold += 4000;
+
+       st->mode = (equiv_rate >= threshold) ? MODE_CELT_ONLY: MODE_SILK_ONLY;
     }
 #endif
+
     /* Override the chosen mode to make sure we meet the requested frame size */
     if (st->mode == MODE_CELT_ONLY && frame_size > st->Fs/50)
        st->mode = MODE_SILK_ONLY;
@@ -491,10 +498,24 @@ int opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
     /* Automatic (rate-dependent) bandwidth selection */
     if (st->mode == MODE_CELT_ONLY || st->first || st->silk_mode.allowBandwidthSwitch)
     {
-        const int *bandwidth_thresholds;
+        const opus_int32 *voice_bandwidth_thresholds, *music_bandwidth_thresholds;
+        opus_int32 bandwidth_thresholds[8];
         int bandwidth = OPUS_BANDWIDTH_FULLBAND;
 
-        bandwidth_thresholds = st->mode == MODE_CELT_ONLY ? audio_bandwidth_thresholds : voice_bandwidth_thresholds;
+        if (st->channels==2 && !st->force_mono)
+        {
+           voice_bandwidth_thresholds = stereo_voice_bandwidth_thresholds;
+           music_bandwidth_thresholds = stereo_music_bandwidth_thresholds;
+        } else {
+           voice_bandwidth_thresholds = mono_voice_bandwidth_thresholds;
+           music_bandwidth_thresholds = mono_music_bandwidth_thresholds;
+        }
+        /* Interpolate bandwidth thresholds depending on voice estimation */
+        for (i=0;i<8;i++)
+        {
+           bandwidth_thresholds[i] = music_bandwidth_thresholds[i]
+                    + ((voice_est*voice_est*(voice_bandwidth_thresholds[i]-music_bandwidth_thresholds[i]))>>14);
+        }
         do {
             int threshold, hysteresis;
             threshold = bandwidth_thresholds[2*(bandwidth-OPUS_BANDWIDTH_MEDIUMBAND)];
@@ -506,7 +527,7 @@ int opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
                 else
                     threshold += hysteresis;
             }
-            if (mono_rate >= threshold)
+            if (equiv_rate >= threshold)
                 break;
         } while (--bandwidth>OPUS_BANDWIDTH_NARROWBAND);
         st->bandwidth = bandwidth;
@@ -545,6 +566,7 @@ int opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_size,
     if (st->mode == MODE_HYBRID && st->bandwidth <= OPUS_BANDWIDTH_WIDEBAND)
         st->mode = MODE_SILK_ONLY;
 
+    /* printf("%d %d %d %d\n", st->bitrate_bps, st->stream_channels, st->mode, st->bandwidth); */
     bytes_target = st->bitrate_bps * frame_size / (st->Fs * 8) - 1;
 
     data += 1;
@@ -1040,7 +1062,7 @@ int opus_encoder_ctl(OpusEncoder *st, int request, ...)
         case OPUS_SET_VOICE_RATIO_REQUEST:
         {
             opus_int32 value = va_arg(ap, opus_int32);
-            if (value>100 || value<0)
+            if (value>100 || value<-1)
                 goto bad_arg;
             st->voice_ratio = value;
         }
diff --git a/src/test_opus.c b/src/test_opus.c
index 7ec1e0593..62af93ec5 100644
--- a/src/test_opus.c
+++ b/src/test_opus.c
@@ -116,6 +116,7 @@ int main(int argc, char *argv[])
     int encode_only=0, decode_only=0;
     int max_frame_size = 960*6;
     int curr_read=0;
+    int sweep_bps = 0;
 
     if (argc < 7 )
     {
@@ -223,6 +224,9 @@ int main(int argc, char *argv[])
         } else if( STR_CASEINSENSITIVE_COMPARE( argv[ args ], "-loss" ) == 0 ) {
             packet_loss_perc = atoi( argv[ args + 1 ] );
             args += 2;
+        } else if( STR_CASEINSENSITIVE_COMPARE( argv[ args ], "-sweep" ) == 0 ) {
+            sweep_bps = atoi( argv[ args + 1 ] );
+            args += 2;
         } else {
             printf( "Error: unrecognized setting: %s\n\n", argv[ args ] );
             print_usage( argv );
@@ -363,6 +367,14 @@ int main(int argc, char *argv[])
             }
 
             len[toggle] = opus_encode(enc, in, frame_size, data[toggle], max_payload_bytes);
+            if (sweep_bps!=0)
+            {
+               bitrate_bps += sweep_bps;
+               /* safety */
+               if (bitrate_bps<1000)
+                  bitrate_bps = 1000;
+               opus_encoder_ctl(enc, OPUS_SET_BITRATE(bitrate_bps));
+            }
             opus_encoder_ctl(enc, OPUS_GET_FINAL_RANGE(&enc_final_range[toggle]));
             if (len[toggle] < 0)
             {
-- 
GitLab