From 1e03a6eb0472cc78e33541af11733f4f27ed3219 Mon Sep 17 00:00:00 2001
From: "Timothy B. Terriberry" <tterribe@xiph.org>
Date: Fri, 14 Oct 2011 16:14:36 -0700
Subject: [PATCH] Fix side-channel index for VAD/LBRR flags.

Neither the encoder nor decoder were incrementing the side-channel
 index for a mid-only frame.
Unfortunately, they used that index to index the VAD flags and LBRR
 flags.
This made the VAD flags for the side channel particularly useless,
 as you couldn't tell which frame a flag belonged to without
 decoding most of the packet.
It also desynched the LBRR information, as look at the wrong LBRR
 flags to decide when it had to code a mid-only flag.
If some frames were skipped in the side channel, then the last few
 VAD flags and LBRR flags would be garbage, but still get encoded.

This patch fixes this by continuing to increment nFramesDecoded or
 nFramesEncoded, even when skipping a frame in the side channel.
This makes the side-channel VAD and LBRR flags correspond to the
 correct time periods for frames greater than 20 ms.
It also fixes a bug where if DTX was not used on the packet where
 the side channel got turned off, it would never get used again
 until the encoder attempted to encode something in the side
 channel again.
---
 silk/dec_API.c                | 5 +++--
 silk/decode_frame.c           | 1 -
 silk/enc_API.c                | 5 ++++-
 silk/fixed/encode_frame_FIX.c | 3 ---
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/silk/dec_API.c b/silk/dec_API.c
index 018173efa..9b7bd4a74 100644
--- a/silk/dec_API.c
+++ b/silk/dec_API.c
@@ -104,8 +104,8 @@ opus_int silk_Decode(
         ret += silk_init_decoder( &channel_state[ 1 ] );
     }
 
-    for( n = 0; n < decControl->nChannelsInternal; n++ ) {
-        if( channel_state[ n ].nFramesDecoded == 0 ) {
+    if( channel_state[ 0 ].nFramesDecoded == 0 ) {
+        for( n = 0; n < decControl->nChannelsInternal; n++ ) {
             opus_int fs_kHz_dec;
             if( decControl->payloadSize_ms == 0 ) {
                 /* Assuming packet loss, use 10 ms */
@@ -258,6 +258,7 @@ opus_int silk_Decode(
         } else {
             silk_memset( &samplesOut1_tmp[ n ][ 2 + delay ], 0, nSamplesOutDec * sizeof( opus_int16 ) );
         }
+        channel_state[ n ].nFramesDecoded++;
     }
 
     if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 ) {
diff --git a/silk/decode_frame.c b/silk/decode_frame.c
index 54bb920bf..83c92210c 100644
--- a/silk/decode_frame.c
+++ b/silk/decode_frame.c
@@ -127,7 +127,6 @@ TOC(decode_core)
 
     /* Update some decoder state variables */
     psDec->lagPrev = sDecCtrl.pitchL[ psDec->nb_subfr - 1 ];
-    psDec->nFramesDecoded++;
 
     /* Set output frame length */
     *pN = L;
diff --git a/silk/enc_API.c b/silk/enc_API.c
index 7177d4d58..3b4d1a9b6 100644
--- a/silk/enc_API.c
+++ b/silk/enc_API.c
@@ -222,6 +222,7 @@ opus_int silk_Encode(
                 psEnc->state_Fxx[ n ].sCmn.LBRR_flags[ i ] = 0;
             }
         }
+        psEnc->state_Fxx[ n ].sCmn.inDTX = psEnc->state_Fxx[ n ].sCmn.useDTX;
     }
     silk_assert( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 0 ].sCmn.fs_kHz == psEnc->state_Fxx[ 1 ].sCmn.fs_kHz );
 
@@ -435,10 +436,12 @@ opus_int silk_Encode(
                     if( ( ret = silk_encode_frame_Fxx( &psEnc->state_Fxx[ n ], nBytesOut, psRangeEnc, condCoding ) ) != 0 ) {
                         silk_assert( 0 );
                     }
-                    psEnc->state_Fxx[ n ].sCmn.nFramesEncoded++;
+                } else {
+                    psEnc->state_Fxx[ n ].sCmn.VAD_flags[ psEnc->state_Fxx[ n ].sCmn.nFramesEncoded ] = 0;
                 }
                 psEnc->state_Fxx[ n ].sCmn.controlled_since_last_payload = 0;
                 psEnc->state_Fxx[ n ].sCmn.inputBufIx = 0;
+                psEnc->state_Fxx[ n ].sCmn.nFramesEncoded++;
             }
             psEnc->prev_decode_only_middle = psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded - 1 ];
 
diff --git a/silk/fixed/encode_frame_FIX.c b/silk/fixed/encode_frame_FIX.c
index c3da9bcfd..8899c2678 100644
--- a/silk/fixed/encode_frame_FIX.c
+++ b/silk/fixed/encode_frame_FIX.c
@@ -69,9 +69,6 @@ TOC(VAD)
     /**************************************************/
     /* Convert speech activity into VAD and DTX flags */
     /**************************************************/
-    if( psEnc->sCmn.nFramesEncoded == 0 ) {
-        psEnc->sCmn.inDTX = psEnc->sCmn.useDTX;
-    }
     if( psEnc->sCmn.speech_activity_Q8 < SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 ) ) {
         psEnc->sCmn.indices.signalType = TYPE_NO_VOICE_ACTIVITY;
         psEnc->sCmn.noSpeechCounter++;
-- 
GitLab