Implemented two pre-echo avoidance techniques: time-domain pre-emphasis and

per-band IDCT.

Implemented two pre-echo avoidance techniques: time-domain pre-emphasis and
12b22484 · Jean-Marc Valin · b0c153b1 · 12b22484 · 12b22484 · 12b22484
Commit 12b22484 authored 16 years ago by Jean-Marc Valin
--- a/libcelt/bands.c
+++ b/libcelt/bands.c
@@ -42,6 +42,23 @@
 #include "os_support.h"
 #include "mathops.h"

+static void dctIV(float *X, int len, int dim)
+{
+   int d, n, k;
+   for (d=0;d<dim;d++)
+   {
+      float x[len];
+      for (n=0;n<len;n++)
+         x[n] = X[dim*n+d];
+      for (k=0;k<len;k++)
+      {
+         float sum = 0;
+         for (n=0;n<len;n++)
+            sum += x[n]*cos(M_PI/len*(n+.5)*(k+.5));
+         X[dim*k+d] = sqrt(2.f/len)*sum;
+      }
+   }
+}
 #if 0
 void exp_rotation(celt_norm_t *X, int len, int dir, int stride, int iter)
 {
@@ -377,7 +394,7 @@ void stereo_decision(const CELTMode *m, celt_norm_t * restrict X, int *stereo_mo


 /* Quantisation of the residual */
-void quant_bands(const CELTMode *m, celt_norm_t * restrict X, celt_norm_t *P, celt_mask_t *W, const celt_ener_t *bandE, const int *stereo_mode, int total_bits, ec_enc *enc)
+void quant_bands(const CELTMode *m, celt_norm_t * restrict X, celt_norm_t *P, celt_mask_t *W, const celt_ener_t *bandE, const int *stereo_mode, int total_bits, int time_domain, ec_enc *enc)
 {
   int i, j, bits;
   const celt_int16_t * restrict eBands = m->eBands;
@@ -411,14 +428,19 @@ void quant_bands(const CELTMode *m, celt_norm_t * restrict X, celt_norm_t *P, ce
      q = pulses[i];
      n = SHL16(celt_sqrt(C*(eBands[i+1]-eBands[i])),11);

+      if (time_domain)
+         dctIV(X+C*eBands[i], eBands[i+1]-eBands[i], C);
      /* If pitch isn't available, use intra-frame prediction */
      if (eBands[i] >= m->pitchEnd || q<=0)
      {
         q -= 1;
-         if (q<0)
-            intra_fold(m, X+C*eBands[i], eBands[i+1]-eBands[i], norm, P+C*eBands[i], eBands[i], eBands[m->nbEBands+1]);
-         else
-            intra_prediction(m, X+C*eBands[i], W+C*eBands[i], eBands[i+1]-eBands[i], q, norm, P+C*eBands[i], eBands[i], eBands[m->nbEBands+1], enc);
+         if (!time_domain)
+         {
+            if (q<0)
+               intra_fold(m, X+C*eBands[i], eBands[i+1]-eBands[i], norm, P+C*eBands[i], eBands[i], eBands[m->nbEBands+1]);
+            else
+               intra_prediction(m, X+C*eBands[i], W+C*eBands[i], eBands[i+1]-eBands[i], q, norm, P+C*eBands[i], eBands[i], eBands[m->nbEBands+1], enc);
+         }
      }
      
      if (q > 0)
@@ -438,6 +460,8 @@ void quant_bands(const CELTMode *m, celt_norm_t * restrict X, celt_norm_t *P, ce
         for (j=C*eBands[i];j<C*eBands[i+1];j++)
            X[j] = P[j];
      }
+      if (time_domain)
+         dctIV(X+C*eBands[i], eBands[i+1]-eBands[i], C);
      for (j=C*eBands[i];j<C*eBands[i+1];j++)
         norm[j] = MULT16_16_Q15(n,X[j]);
   }
@@ -445,7 +469,7 @@ void quant_bands(const CELTMode *m, celt_norm_t * restrict X, celt_norm_t *P, ce
 }

 /* Decoding of the residual */
-void unquant_bands(const CELTMode *m, celt_norm_t * restrict X, celt_norm_t *P, const celt_ener_t *bandE, const int *stereo_mode, int total_bits, ec_dec *dec)
+void unquant_bands(const CELTMode *m, celt_norm_t * restrict X, celt_norm_t *P, const celt_ener_t *bandE, const int *stereo_mode, int total_bits, int time_domain, ec_dec *dec)
 {
   int i, j, bits;
   const celt_int16_t * restrict eBands = m->eBands;
@@ -478,10 +502,13 @@ void unquant_bands(const CELTMode *m, celt_norm_t * restrict X, celt_norm_t *P,
      if (eBands[i] >= m->pitchEnd || q<=0)
      {
         q -= 1;
-         if (q<0)
-            intra_fold(m, X+C*eBands[i], eBands[i+1]-eBands[i], norm, P+C*eBands[i], eBands[i], eBands[m->nbEBands+1]);
-         else
-            intra_unquant(m, X+C*eBands[i], eBands[i+1]-eBands[i], q, norm, P+C*eBands[i], eBands[i], eBands[m->nbEBands+1], dec);
+         if (!time_domain)
+         {
+            if (q<0)
+               intra_fold(m, X+C*eBands[i], eBands[i+1]-eBands[i], norm, P+C*eBands[i], eBands[i], eBands[m->nbEBands+1]);
+            else
+               intra_unquant(m, X+C*eBands[i], eBands[i+1]-eBands[i], q, norm, P+C*eBands[i], eBands[i], eBands[m->nbEBands+1], dec);
+         }
      }
      
      if (q > 0)
@@ -498,6 +525,8 @@ void unquant_bands(const CELTMode *m, celt_norm_t * restrict X, celt_norm_t *P,
         for (j=C*eBands[i];j<C*eBands[i+1];j++)
            X[j] = P[j];
      }
+      if (time_domain)
+         dctIV(X+C*eBands[i], eBands[i+1]-eBands[i], C);
      for (j=C*eBands[i];j<C*eBands[i+1];j++)
         norm[j] = MULT16_16_Q15(n,X[j]);
   }

--- a/libcelt/bands.h
+++ b/libcelt/bands.h
@@ -86,7 +86,7 @@ void pitch_quant_bands(const CELTMode *m, celt_norm_t * restrict P, const celt_p
 * @param total_bits Total number of bits that can be used for the frame (including the ones already spent)
 * @param enc Entropy encoder
 */
-void quant_bands(const CELTMode *m, celt_norm_t * restrict X, celt_norm_t *P, celt_mask_t *W, const celt_ener_t *bandE, const int *stereo_mode, int total_bits, ec_enc *enc);
+void quant_bands(const CELTMode *m, celt_norm_t * restrict X, celt_norm_t *P, celt_mask_t *W, const celt_ener_t *bandE, const int *stereo_mode, int total_bits, int time_domain, ec_enc *enc);

 /** Decoding of the residual spectrum
 * @param m Mode data 
@@ -95,7 +95,7 @@ void quant_bands(const CELTMode *m, celt_norm_t * restrict X, celt_norm_t *P, ce
 * @param total_bits Total number of bits that can be used for the frame (including the ones already spent)
 * @param dec Entropy decoder
 */
-void unquant_bands(const CELTMode *m, celt_norm_t * restrict X, celt_norm_t *P, const celt_ener_t *bandE, const int *stereo_mode, int total_bits, ec_dec *dec);
+void unquant_bands(const CELTMode *m, celt_norm_t * restrict X, celt_norm_t *P, const celt_ener_t *bandE, const int *stereo_mode, int total_bits, int time_domain, ec_dec *dec);

 void stereo_decision(const CELTMode *m, celt_norm_t * restrict X, int *stereo_mode, int len);


--- a/libcelt/celt.c
+++ b/libcelt/celt.c
@@ -52,7 +52,10 @@

 static const celt_word16_t preemph = QCONST16(0.8f,15);

-
+static const float gainWindow[16] = {
+   0.0085135, 0.0337639, 0.0748914, 0.1304955, 0.1986827, 0.2771308, 0.3631685, 0.4538658,
+   0.5461342, 0.6368315, 0.7228692, 0.8013173, 0.8695045, 0.9251086, 0.9662361, 0.9914865};
+   
 /** Encoder state 
 @brief Encoder state
 */
@@ -187,7 +190,7 @@ static void compute_mdcts(const CELTMode *mode, const celt_word16_t * restrict w
 }

 /** Compute the IMDCT and apply window for all sub-frames and all channels in a frame */
-static void compute_inv_mdcts(const CELTMode *mode, const celt_word16_t * restrict window, celt_sig_t *X, celt_sig_t * restrict out_mem)
+static void compute_inv_mdcts(const CELTMode *mode, const celt_word16_t * restrict window, celt_sig_t *X, int transient_time, float transient_gain, celt_sig_t * restrict out_mem)
 {
   int c, N4;
   const int C = CHANNELS(mode);
@@ -198,7 +201,7 @@ static void compute_inv_mdcts(const CELTMode *mode, const celt_word16_t * restri
   for (c=0;c<C;c++)
   {
      int j;
-      if (C==1) {
+      if (transient_time<0 && C==1) {
         mdct_backward(lookup, X, out_mem+C*(MAX_PERIOD-N-N4), window, overlap);
      } else {
         VARDECL(celt_word32_t, x);
@@ -212,6 +215,13 @@ static void compute_inv_mdcts(const CELTMode *mode, const celt_word16_t * restri
         /* Prevents problems from the imdct doing the overlap-add */
         CELT_MEMSET(x+N4, 0, overlap);
         mdct_backward(lookup, tmp, x, window, overlap);
+         if (transient_time >= 0)
+         {
+            for (j=0;j<16;j++)
+               x[N4+transient_time+j-16] *= 1+gainWindow[j]*(transient_gain-1);
+            for (j=transient_time;j<N+overlap;j++)
+               x[N4+j] *= transient_gain;
+         }
         /* The first and last part would need to be set to zero if we actually
         wanted to use them. */
         for (j=0;j<overlap;j++)
@@ -241,6 +251,9 @@ int celt_encode(CELTEncoder * restrict st, celt_int16_t * restrict pcm, unsigned
 #ifdef EXP_PSY
   VARDECL(celt_word32_t, mask);
 #endif
+   int time_domain=0;
+   int transient_time;
+   float transient_gain;
   const int C = CHANNELS(st->mode);
   SAVE_STACK;

@@ -267,9 +280,76 @@ int celt_encode(CELTEncoder * restrict st, celt_int16_t * restrict pcm, unsigned
      }
   }
   CELT_COPY(st->in_mem, in+C*(2*N-2*N4-st->overlap), C*st->overlap);
-
+   if (1) {
+      int len = N+st->overlap;
+      float maxR, maxD;
+      float begin[C*len], end[C*len];
+      begin[0] = in[0]*in[0];
+      for (i=1;i<len*C;i++)
+         begin[i] = begin[i-1]+in[i]*in[i];
+      end[len-1] = in[len-1]*in[len-1];
+      for (i=C*len-2;i>=0;i--)
+         end[i] = end[i+1] + in[i]*in[i];
+      maxD = 0;
+      maxR = 1;
+      transient_time = -1;
+      for (i=8*C;i<C*(len-8);i++)
+      {
+         float diff = sqrt(sqrt(end[i]/(C*len-i)))-sqrt(sqrt(begin[i]/(i)));
+         float ratio = ((1000+end[i])*i)/((1000+begin[i])*(C*len-i));
+         if (diff > maxD)
+         {
+            maxD = diff;
+            maxR = ratio;
+            transient_time = i;
+         }
+      }
+      transient_time /= C;
+      if (transient_time<32)
+      {
+         transient_time = -1;
+         maxR = 0;
+      }
+      if (maxR > 20)
+      {
+         float gain_1;
+         ec_enc_bits(&st->enc, 1, 1);
+         if (maxR < 30)
+         {
+            transient_gain = 1;
+            ec_enc_bits(&st->enc, 0, 2);
+         } else if (maxR < 100)
+         {
+            transient_gain = 2;
+            ec_enc_bits(&st->enc, 1, 2);
+         } else if (maxR < 500)
+         {
+            transient_gain = 4;
+            ec_enc_bits(&st->enc, 2, 2);
+         } else
+         {
+            transient_gain = 8;
+            ec_enc_bits(&st->enc, 3, 2);
+         }
+         ec_enc_uint(&st->enc, transient_time, len);
+         for (c=0;c<C;c++)
+            for (i=0;i<16;i++)
+               in[C*(transient_time+i-16)+c] /= 1+gainWindow[i]*(transient_gain-1);
+         gain_1 = 1./transient_gain;
+         for (c=0;c<C;c++)
+            for (i=transient_time;i<len;i++)
+               in[C*i+c] *= gain_1;
+         time_domain = 1;
+      } else {
+         ec_enc_bits(&st->enc, 0, 1);
+         transient_time = -1;
+         transient_gain = 1;
+         time_domain = 0;
+      }
+   }
   /* Pitch analysis: we do it early to save on the peak stack space */
-   find_spectral_pitch(st->mode, st->mode->fft, &st->mode->psy, in, st->out_mem, st->mode->window, 2*N-2*N4, MAX_PERIOD-(2*N-2*N4), &pitch_index);
+   if (!time_domain)
+      find_spectral_pitch(st->mode, st->mode->fft, &st->mode->psy, in, st->out_mem, st->mode->window, 2*N-2*N4, MAX_PERIOD-(2*N-2*N4), &pitch_index);

   ALLOC(freq, C*N, celt_sig_t); /**< Interleaved signal MDCTs */
   
@@ -316,7 +396,8 @@ int celt_encode(CELTEncoder * restrict st, celt_int16_t * restrict pcm, unsigned
   /*for (i=0;i<N*B*C;i++)printf("%f ", X[i]);printf("\n");*/

   /* Compute MDCTs of the pitch part */
-   compute_mdcts(st->mode, st->mode->window, st->out_mem+pitch_index*C, freq);
+   if (!time_domain)
+      compute_mdcts(st->mode, st->mode->window, st->out_mem+pitch_index*C, freq);

   {
      /* Normalise the pitch vector as well (discard the energies) */
@@ -328,7 +409,7 @@ int celt_encode(CELTEncoder * restrict st, celt_int16_t * restrict pcm, unsigned
   }
   curr_power = bandE[0]+bandE[1]+bandE[2];
   /* Check if we can safely use the pitch (i.e. effective gain isn't too high) */
-   if (MULT16_32_Q15(QCONST16(.1f, 15),curr_power) + QCONST32(10.f,ENER_SHIFT) < pitch_power)
+   if (!time_domain && (MULT16_32_Q15(QCONST16(.1f, 15),curr_power) + QCONST32(10.f,ENER_SHIFT) < pitch_power))
   {
      /* Simulates intensity stereo */
      /*for (i=30;i<N*B;i++)
@@ -357,7 +438,7 @@ int celt_encode(CELTEncoder * restrict st, celt_int16_t * restrict pcm, unsigned
   /*for (i=0;i<B*N;i++) printf("%f ",P[i]);printf("\n");*/

   /* Residual quantisation */
-   quant_bands(st->mode, X, P, NULL, bandE, stereo_mode, nbCompressedBytes*8, &st->enc);
+   quant_bands(st->mode, X, P, NULL, bandE, stereo_mode, nbCompressedBytes*8, time_domain, &st->enc);
   
   if (C==2)
   {
@@ -369,13 +450,13 @@ int celt_encode(CELTEncoder * restrict st, celt_int16_t * restrict pcm, unsigned

   CELT_MOVE(st->out_mem, st->out_mem+C*N, C*(MAX_PERIOD+st->overlap-N));

-   compute_inv_mdcts(st->mode, st->mode->window, freq, st->out_mem);
+   compute_inv_mdcts(st->mode, st->mode->window, freq, transient_time, transient_gain, st->out_mem);
   /* De-emphasis and put everything back at the right place in the synthesis history */
 #ifndef SHORTCUTS
   for (c=0;c<C;c++)
   {
      int j;
-      const celt_sig_t * restrict outp=st->out_mem+C*(MAX_PERIOD-N)+c;
+      celt_sig_t * restrict outp=st->out_mem+C*(MAX_PERIOD-N)+c;
      celt_int16_t * restrict pcmp = pcm+c;
      for (j=0;j<N;j++)
      {
@@ -536,7 +617,7 @@ static void celt_decode_lost(CELTDecoder * restrict st, short * restrict pcm)
   
   CELT_MOVE(st->out_mem, st->out_mem+C*N, C*(MAX_PERIOD+st->mode->overlap-N));
   /* Compute inverse MDCTs */
-   compute_inv_mdcts(st->mode, st->mode->window, freq, st->out_mem);
+   compute_inv_mdcts(st->mode, st->mode->window, freq, -1, 1, st->out_mem);

   for (c=0;c<C;c++)
   {
@@ -565,6 +646,9 @@ int celt_decode(CELTDecoder * restrict st, unsigned char *data, int len, celt_in
   VARDECL(celt_ener_t, bandE);
   VARDECL(celt_pgain_t, gains);
   VARDECL(int, stereo_mode);
+   int time_domain;
+   int transient_time;
+   float transient_gain;
   const int C = CHANNELS(st->mode);
   SAVE_STACK;

@@ -595,6 +679,30 @@ int celt_decode(CELTDecoder * restrict st, unsigned char *data, int len, celt_in
   ec_byte_readinit(&buf,data,len);
   ec_dec_init(&dec,&buf);
   
+   time_domain = ec_dec_bits(&dec, 1);
+   if (time_domain)
+   {
+      int gainid = ec_dec_bits(&dec, 2);
+      switch(gainid) {
+         case 0:
+            transient_gain = 1;
+            break;
+         case 1:
+            transient_gain = 2;
+            break;
+         case 2:
+            transient_gain = 4;
+            break;
+         case 3:
+         default:
+            transient_gain = 8;
+            break;
+      }
+      transient_time = ec_dec_uint(&dec, N+st->mode->overlap);
+   } else {
+      transient_time = -1;
+      transient_gain = 1;
+   }
   /* Get the pitch gains */
   has_pitch = unquant_pitch(gains, st->mode->nbPBands, &dec);
   
@@ -627,7 +735,7 @@ int celt_decode(CELTDecoder * restrict st, unsigned char *data, int len, celt_in
   pitch_quant_bands(st->mode, P, gains);

   /* Decode fixed codebook and merge with pitch */
-   unquant_bands(st->mode, X, P, bandE, stereo_mode, len*8, &dec);
+   unquant_bands(st->mode, X, P, bandE, stereo_mode, len*8, time_domain, &dec);

   if (C==2)
   {
@@ -639,7 +747,7 @@ int celt_decode(CELTDecoder * restrict st, unsigned char *data, int len, celt_in

   CELT_MOVE(st->out_mem, st->out_mem+C*N, C*(MAX_PERIOD+st->overlap-N));
   /* Compute inverse MDCTs */
-   compute_inv_mdcts(st->mode, st->mode->window, freq, st->out_mem);
+   compute_inv_mdcts(st->mode, st->mode->window, freq, transient_time, transient_gain, st->out_mem);

   for (c=0;c<C;c++)
   {