From dbeb86fd0531b618b501a76414536b27d8d4484f Mon Sep 17 00:00:00 2001
From: Jean-Marc Valin <jean-marc.valin@usherbrooke.ca>
Date: Mon, 15 Aug 2011 10:01:00 -0400
Subject: [PATCH] Making the IMDCT work on interleaved data

Saves a copy in the decoder
---
 libcelt/celt.c            | 56 ++++++++++++++++++---------------------
 libcelt/mdct.c            |  9 ++++---
 libcelt/mdct.h            |  3 ++-
 libcelt/tests/mdct-test.c |  2 +-
 4 files changed, 34 insertions(+), 36 deletions(-)

diff --git a/libcelt/celt.c b/libcelt/celt.c
index 8de9ddeb8..037b5326f 100644
--- a/libcelt/celt.c
+++ b/libcelt/celt.c
@@ -440,42 +440,38 @@ static void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig *X
    const int C = CHANNELS(_C);
    const int N = mode->shortMdctSize<<LM;
    const int overlap = OVERLAP(mode);
+   VARDECL(opus_val32, x);
+   SAVE_STACK;
+
+   ALLOC(x, N+overlap, opus_val32);
    c=0; do {
       int j;
-         VARDECL(opus_val32, x);
-         VARDECL(opus_val32, tmp);
-         int b;
-         int N2 = N;
-         int B = 1;
-         SAVE_STACK;
-
-         ALLOC(x, N+overlap, opus_val32);
-         ALLOC(tmp, N, opus_val32);
+      int b;
+      int N2 = N;
+      int B = 1;
 
-         if (shortBlocks)
-         {
-            N2 = mode->shortMdctSize;
-            B = shortBlocks;
-         }
-         /* Prevents problems from the imdct doing the overlap-add */
-         CELT_MEMSET(x, 0, overlap);
+      if (shortBlocks)
+      {
+         N2 = mode->shortMdctSize;
+         B = shortBlocks;
+      }
+      /* Prevents problems from the imdct doing the overlap-add */
+      CELT_MEMSET(x, 0, overlap);
 
-         for (b=0;b<B;b++)
-         {
-            /* De-interleaving the sub-frames */
-            for (j=0;j<N2;j++)
-               tmp[j] = X[(j*B+b)+c*N2*B];
-            clt_mdct_backward(&mode->mdct, tmp, x+N2*b, mode->window, overlap, shortBlocks ? mode->maxLM : mode->maxLM-LM);
-         }
+      for (b=0;b<B;b++)
+      {
+         /* IMDCT on the interleaved the sub-frames */
+         clt_mdct_backward(&mode->mdct, &X[b+c*N2*B], x+N2*b, mode->window, overlap, shortBlocks ? mode->maxLM : mode->maxLM-LM, B);
+      }
 
-         for (j=0;j<overlap;j++)
-            out_mem[c][j] = x[j] + overlap_mem[c][j];
-         for (;j<N;j++)
-            out_mem[c][j] = x[j];
-         for (j=0;j<overlap;j++)
-            overlap_mem[c][j] = x[N+j];
-         RESTORE_STACK;
+      for (j=0;j<overlap;j++)
+         out_mem[c][j] = x[j] + overlap_mem[c][j];
+      for (;j<N;j++)
+         out_mem[c][j] = x[j];
+      for (j=0;j<overlap;j++)
+         overlap_mem[c][j] = x[N+j];
    } while (++c<C);
+   RESTORE_STACK;
 }
 
 static void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int _C, int downsample, const opus_val16 *coef, celt_sig *mem)
diff --git a/libcelt/mdct.c b/libcelt/mdct.c
index e04f43725..4a41517bd 100644
--- a/libcelt/mdct.c
+++ b/libcelt/mdct.c
@@ -202,7 +202,8 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar
    RESTORE_STACK;
 }
 
-void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * restrict out, const opus_val16 * restrict window, int overlap, int shift)
+void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * restrict out,
+      const opus_val16 * restrict window, int overlap, int shift, int stride)
 {
    int i;
    int N, N2, N4;
@@ -227,7 +228,7 @@ void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scala
    {
       /* Temp pointers to make it really clear to the compiler what we're doing */
       const kiss_fft_scalar * restrict xp1 = in;
-      const kiss_fft_scalar * restrict xp2 = in+N2-1;
+      const kiss_fft_scalar * restrict xp2 = in+stride*(N2-1);
       kiss_fft_scalar * restrict yp = f2;
       const kiss_twiddle_scalar *t = &l->trig[0];
       for(i=0;i<N4;i++)
@@ -238,8 +239,8 @@ void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scala
          /* works because the cos is nearly one */
          *yp++ = yr - S_MUL(yi,sine);
          *yp++ = yi + S_MUL(yr,sine);
-         xp1+=2;
-         xp2-=2;
+         xp1+=2*stride;
+         xp2-=2*stride;
       }
    }
 
diff --git a/libcelt/mdct.h b/libcelt/mdct.h
index 9d43ae978..32942ca76 100644
--- a/libcelt/mdct.h
+++ b/libcelt/mdct.h
@@ -60,6 +60,7 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar
 
 /** Compute a backward MDCT (no scaling) and performs weighted overlap-add
     (scales implicitly by 1/2) */
-void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar *out, const opus_val16 * restrict window, int overlap, int shift);
+void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar *out,
+      const opus_val16 * restrict window, int overlap, int shift, int stride);
 
 #endif
diff --git a/libcelt/tests/mdct-test.c b/libcelt/tests/mdct-test.c
index 25bb01755..604d0fcc0 100644
--- a/libcelt/tests/mdct-test.c
+++ b/libcelt/tests/mdct-test.c
@@ -122,7 +122,7 @@ void test1d(int nfft,int isinverse)
     {
        for (k=0;k<nfft;++k)
           out[k] = 0;
-       clt_mdct_backward(&cfg,in,out, window, nfft/2, 0);
+       clt_mdct_backward(&cfg,in,out, window, nfft/2, 0, 1);
        check_inv(in,out,nfft,isinverse);
     } else {
        clt_mdct_forward(&cfg,in,out,window, nfft/2, 0);
-- 
GitLab