From 0f5ff80bacdde3f68e746c5eeede9590149b6ac2 Mon Sep 17 00:00:00 2001
From: Jean-Marc Valin <jmvalin@jmvalin.ca>
Date: Mon, 1 Jul 2013 18:39:34 -0400
Subject: [PATCH] Commenting the speech/music Markov code

---
 src/analysis.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/src/analysis.c b/src/analysis.c
index e063c7881..a890a06b3 100644
--- a/src/analysis.c
+++ b/src/analysis.c
@@ -484,8 +484,14 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
 
     /*printf("%f %f ", frame_probs[0], frame_probs[1]);*/
     {
-       float tau, beta;
+       /* Probability of state transition */
+       float tau;
+       /* Represents independence of the MLP probabilities, where
+          beta=1 means fully independent. */
+       float beta;
+       /* Denormalized probability of speech (p0) and music (p1) after update */
        float p0, p1;
+       /* Delayed decision variables */
        float s0, m0;
        float psum;
        float speech0;
@@ -501,13 +507,20 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info_out, con
           q = MAX16(.05f,MIN16(.95f,tonal->music_prob));
           beta = .01f+.05f*ABS16(p-q)/(p*(1-q)+q*(1-p));
        }
+       /* p0 and p1 are the probabilities of speech and music at this frame
+          using only information from previous frame and applying the
+          state transition model */
        p0 = (1-tonal->music_prob)*(1-tau) +    tonal->music_prob *tau;
        p1 =    tonal->music_prob *(1-tau) + (1-tonal->music_prob)*tau;
+       /* We apply the current probability with exponent beta to work around
+          the fact that the probability estimates aren't independent. */
        p0 *= (float)pow(1-frame_probs[0], beta);
        p1 *= (float)pow(frame_probs[0], beta);
+       /* Normalise the probabilities to get the Marokv probability of music. */
        tonal->music_prob = p1/(p0+p1);
        info->music_prob = tonal->music_prob;
 
+       /* This chunk of code deals with delayed decision. */
        psum=1e-20f;
        speech0 = (float)pow(1-frame_probs[0], beta);
        music0  = (float)pow(frame_probs[0], beta);
-- 
GitLab