From 4c1a90a847a2af528cbfe6924a85ba8173e5c4f9 Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin <jmvalin@jmvalin.ca> Date: Sat, 28 Dec 2013 23:14:26 -0500 Subject: [PATCH] Getting rid of some negations Since we're doing two rotations, we can invert the sign on both. Also adding a few comments for optimizing the FFT. --- celt/kiss_fft.c | 5 +++++ celt/mdct.c | 20 ++++++++++---------- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/celt/kiss_fft.c b/celt/kiss_fft.c index 9869e9eb8..df4c024ad 100644 --- a/celt/kiss_fft.c +++ b/celt/kiss_fft.c @@ -63,6 +63,7 @@ static void kf_bfly2( Fout = Fout_beg + i*mm; Fout2 = Fout + m; tw1 = st->twiddles; + /* For non-custom modes, m is guaranteed to be a multiple of 4. */ for(j=0;j<m;j++) { kiss_fft_cpx t; @@ -118,6 +119,8 @@ static void kf_bfly4( { Fout = Fout_beg + i*mm; tw3 = tw2 = tw1 = st->twiddles; + /* For non-custom modes, m=4, otherwise m is guaranteed to be a + multiple of 4. */ for (j=0;j<m;j++) { C_MUL(scratch[0],Fout[m] , *tw1 ); @@ -169,6 +172,7 @@ static void kf_bfly3( { Fout = Fout_beg + i*mm; tw1=tw2=st->twiddles; + /* For non-custom modes, m is guaranteed to be a multiple of 4. */ k=m; do { @@ -229,6 +233,7 @@ static void kf_bfly5( Fout3=Fout0+3*m; Fout4=Fout0+4*m; + /* For non-custom modes, m is guaranteed to be a multiple of 4. */ for ( u=0; u<m; ++u ) { scratch[0] = *Fout0; diff --git a/celt/mdct.c b/celt/mdct.c index 15c7ffd7f..ff75d4c73 100644 --- a/celt/mdct.c +++ b/celt/mdct.c @@ -202,8 +202,8 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar t1 = t[N4+i]; re = *yp++; im = *yp++; - yr = -S_MUL(re,t0) + S_MUL(im,t1); - yi = -S_MUL(im,t0) - S_MUL(re,t1); + yr = S_MUL(re,t0) - S_MUL(im,t1); + yi = S_MUL(im,t0) + S_MUL(re,t1); yc.r = yr; yc.i = yi; yc.r = PSHR32(MULT16_32_Q16(scale, yc.r), scale_shift); @@ -226,8 +226,8 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar for(i=0;i<N4;i++) { kiss_fft_scalar yr, yi; - yr = -S_MUL(fp->i,t[N4+i]) + S_MUL(fp->r,t[i]); - yi = -S_MUL(fp->r,t[N4+i]) - S_MUL(fp->i,t[i]); + yr = S_MUL(fp->i,t[N4+i]) - S_MUL(fp->r,t[i]); + yi = S_MUL(fp->r,t[N4+i]) + S_MUL(fp->i,t[i]); *yp1 = yr; *yp2 = yi; fp++; @@ -268,8 +268,8 @@ void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scala int rev; kiss_fft_scalar yr, yi; rev = *bitrev++; - yr = -S_MUL(*xp2, t[i]) - S_MUL(*xp1,t[N4+i]); - yi = S_MUL(*xp2, t[N4+i]) - S_MUL(*xp1,t[i]); + yr = S_MUL(*xp2, t[i]) + S_MUL(*xp1, t[N4+i]); + yi = S_MUL(*xp1, t[i]) - S_MUL(*xp2, t[N4+i]); /* We swap real and imag because we use an FFT instead of an IFFT. */ yp[2*rev+1] = yr; yp[2*rev] = yi; @@ -300,19 +300,19 @@ void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scala t1 = t[N4+i]; /* We'd scale up by 2 here, but instead it's done when mixing the windows */ yr = S_MUL(re,t0) + S_MUL(im,t1); - yi = S_MUL(im,t0) - S_MUL(re,t1); + yi = S_MUL(re,t1) - S_MUL(im,t0); /* We swap real and imag because we're using an FFT instead of an IFFT. */ re = yp1[1]; im = yp1[0]; - yp0[0] = -yr; + yp0[0] = yr; yp1[1] = yi; t0 = t[(N4-i-1)]; t1 = t[(N2-i-1)]; /* We'd scale up by 2 here, but instead it's done when mixing the windows */ yr = S_MUL(re,t0) + S_MUL(im,t1); - yi = S_MUL(im,t0) - S_MUL(re,t1); - yp1[0] = -yr; + yi = S_MUL(re,t1) - S_MUL(im,t0); + yp1[0] = yr; yp0[1] = yi; yp0 += 2; yp1 -= 2; -- GitLab