diff --git a/celt/kiss_fft.c b/celt/kiss_fft.c index e67309b293b9ed7cea004651f6cf496cb0ae7542..8d97b4467e99b4456d2eddc6e35b252a8134cbee 100644 --- a/celt/kiss_fft.c +++ b/celt/kiss_fft.c @@ -76,37 +76,6 @@ static void kf_bfly2( } } -static void ki_bfly2( - kiss_fft_cpx * Fout, - const size_t fstride, - const kiss_fft_state *st, - int m, - int N, - int mm - ) -{ - kiss_fft_cpx * Fout2; - const kiss_twiddle_cpx * tw1; - kiss_fft_cpx t; - int i,j; - kiss_fft_cpx * Fout_beg = Fout; - for (i=0;i<N;i++) - { - Fout = Fout_beg + i*mm; - Fout2 = Fout + m; - tw1 = st->twiddles; - for(j=0;j<m;j++) - { - C_MULC (t, *Fout2 , *tw1); - tw1 += fstride; - C_SUB( *Fout2 , *Fout , t ); - C_ADDTO( *Fout , t ); - ++Fout2; - ++Fout; - } - } -} - static void kf_bfly4( kiss_fft_cpx * Fout, const size_t fstride, @@ -152,50 +121,6 @@ static void kf_bfly4( } } -static void ki_bfly4( - kiss_fft_cpx * Fout, - const size_t fstride, - const kiss_fft_state *st, - int m, - int N, - int mm - ) -{ - const kiss_twiddle_cpx *tw1,*tw2,*tw3; - kiss_fft_cpx scratch[6]; - const size_t m2=2*m; - const size_t m3=3*m; - int i, j; - - kiss_fft_cpx * Fout_beg = Fout; - for (i=0;i<N;i++) - { - Fout = Fout_beg + i*mm; - tw3 = tw2 = tw1 = st->twiddles; - for (j=0;j<m;j++) - { - C_MULC(scratch[0],Fout[m] , *tw1 ); - C_MULC(scratch[1],Fout[m2] , *tw2 ); - C_MULC(scratch[2],Fout[m3] , *tw3 ); - - C_SUB( scratch[5] , *Fout, scratch[1] ); - C_ADDTO(*Fout, scratch[1]); - C_ADD( scratch[3] , scratch[0] , scratch[2] ); - C_SUB( scratch[4] , scratch[0] , scratch[2] ); - C_SUB( Fout[m2], *Fout, scratch[3] ); - tw1 += fstride; - tw2 += fstride*2; - tw3 += fstride*3; - C_ADDTO( *Fout , scratch[3] ); - - Fout[m].r = scratch[5].r - scratch[4].i; - Fout[m].i = scratch[5].i + scratch[4].r; - Fout[m3].r = scratch[5].r + scratch[4].i; - Fout[m3].i = scratch[5].i - scratch[4].r; - ++Fout; - } - } -} #ifndef RADIX_TWO_ONLY @@ -250,55 +175,6 @@ static void kf_bfly3( } } -static void ki_bfly3( - kiss_fft_cpx * Fout, - const size_t fstride, - const kiss_fft_state *st, - int m, - int N, - int mm - ) -{ - int i, k; - const size_t m2 = 2*m; - const kiss_twiddle_cpx *tw1,*tw2; - kiss_fft_cpx scratch[5]; - kiss_twiddle_cpx epi3; - - kiss_fft_cpx * Fout_beg = Fout; - epi3 = st->twiddles[fstride*m]; - for (i=0;i<N;i++) - { - Fout = Fout_beg + i*mm; - tw1=tw2=st->twiddles; - k=m; - do{ - - C_MULC(scratch[1],Fout[m] , *tw1); - C_MULC(scratch[2],Fout[m2] , *tw2); - - C_ADD(scratch[3],scratch[1],scratch[2]); - C_SUB(scratch[0],scratch[1],scratch[2]); - tw1 += fstride; - tw2 += fstride*2; - - Fout[m].r = Fout->r - HALF_OF(scratch[3].r); - Fout[m].i = Fout->i - HALF_OF(scratch[3].i); - - C_MULBYSCALAR( scratch[0] , -epi3.i ); - - C_ADDTO(*Fout,scratch[3]); - - Fout[m2].r = Fout[m].r + scratch[0].i; - Fout[m2].i = Fout[m].i - scratch[0].r; - - Fout[m].r -= scratch[0].i; - Fout[m].i += scratch[0].r; - - ++Fout; - }while(--k); - } -} static void kf_bfly5( kiss_fft_cpx * Fout, @@ -368,73 +244,6 @@ static void kf_bfly5( } } -static void ki_bfly5( - kiss_fft_cpx * Fout, - const size_t fstride, - const kiss_fft_state *st, - int m, - int N, - int mm - ) -{ - kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; - int i, u; - kiss_fft_cpx scratch[13]; - const kiss_twiddle_cpx * twiddles = st->twiddles; - const kiss_twiddle_cpx *tw; - kiss_twiddle_cpx ya,yb; - kiss_fft_cpx * Fout_beg = Fout; - - ya = twiddles[fstride*m]; - yb = twiddles[fstride*2*m]; - tw=st->twiddles; - - for (i=0;i<N;i++) - { - Fout = Fout_beg + i*mm; - Fout0=Fout; - Fout1=Fout0+m; - Fout2=Fout0+2*m; - Fout3=Fout0+3*m; - Fout4=Fout0+4*m; - - for ( u=0; u<m; ++u ) { - scratch[0] = *Fout0; - - C_MULC(scratch[1] ,*Fout1, tw[u*fstride]); - C_MULC(scratch[2] ,*Fout2, tw[2*u*fstride]); - C_MULC(scratch[3] ,*Fout3, tw[3*u*fstride]); - C_MULC(scratch[4] ,*Fout4, tw[4*u*fstride]); - - C_ADD( scratch[7],scratch[1],scratch[4]); - C_SUB( scratch[10],scratch[1],scratch[4]); - C_ADD( scratch[8],scratch[2],scratch[3]); - C_SUB( scratch[9],scratch[2],scratch[3]); - - Fout0->r += scratch[7].r + scratch[8].r; - Fout0->i += scratch[7].i + scratch[8].i; - - scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r); - scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r); - - scratch[6].r = -S_MUL(scratch[10].i,ya.i) - S_MUL(scratch[9].i,yb.i); - scratch[6].i = S_MUL(scratch[10].r,ya.i) + S_MUL(scratch[9].r,yb.i); - - C_SUB(*Fout1,scratch[5],scratch[6]); - C_ADD(*Fout4,scratch[5],scratch[6]); - - scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r); - scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r); - scratch[12].r = S_MUL(scratch[10].i,yb.i) - S_MUL(scratch[9].i,ya.i); - scratch[12].i = -S_MUL(scratch[10].r,yb.i) + S_MUL(scratch[9].r,ya.i); - - C_ADD(*Fout2,scratch[11],scratch[12]); - C_SUB(*Fout3,scratch[11],scratch[12]); - - ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; - } - } -} #endif @@ -678,52 +487,6 @@ void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fou opus_fft_impl(st, fout); } -void opus_ifft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout) -{ - int m2, m; - int p; - int L; - int fstride[MAXFACTORS]; - int i; - int shift; - - /* st->shift can be -1 */ - shift = st->shift>0 ? st->shift : 0; - fstride[0] = 1; - L=0; - do { - p = st->factors[2*L]; - m = st->factors[2*L+1]; - fstride[L+1] = fstride[L]*p; - L++; - } while(m!=1); - m = st->factors[2*L-1]; - for (i=L-1;i>=0;i--) - { - if (i!=0) - m2 = st->factors[2*i-1]; - else - m2 = 1; - switch (st->factors[2*i]) - { - case 2: - ki_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2); - break; - case 4: - ki_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2); - break; -#ifndef RADIX_TWO_ONLY - case 3: - ki_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2); - break; - case 5: - ki_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2); - break; -#endif - } - m = m2; - } -} #ifdef TEST_UNIT_DFT_C void opus_ifft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) @@ -733,6 +496,10 @@ void opus_ifft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fo /* Bit-reverse the input */ for (i=0;i<st->nfft;i++) fout[st->bitrev[i]] = fin[i]; - opus_ifft_impl(st, fout); + for (i=0;i<st->nfft;i++) + fout[i].i = -fout[i].i; + opus_fft_impl(st, fout); + for (i=0;i<st->nfft;i++) + fout[i].i = -fout[i].i; } #endif diff --git a/celt/mdct.c b/celt/mdct.c index a6bd6b4cf219b4e2920facf1c84303af724c598c..14d418708e793fc0888554ecf1c5bbf42f36446b 100644 --- a/celt/mdct.c +++ b/celt/mdct.c @@ -262,9 +262,10 @@ void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scala kiss_fft_cpx yc; yr = -S_MUL(*xp2, t[i<<shift]) + S_MUL(*xp1,t[(N4-i)<<shift]); yi = -S_MUL(*xp2, t[(N4-i)<<shift]) - S_MUL(*xp1,t[i<<shift]); - /* works because the cos is nearly one */ - yc.r = yr - S_MUL(yi,sine); - yc.i = yi + S_MUL(yr,sine); + /* Works because the cos is nearly one. We swap real and imag because we + use an FFT instead of an IFFT. */ + yc.i = yr - S_MUL(yi,sine); + yc.r = yi + S_MUL(yr,sine); /* Storing the pre-rotation directly in the bitrev order. */ yp[*bitrev++] = yc; xp1+=2*stride; @@ -272,7 +273,7 @@ void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scala } } - opus_ifft_impl(l->kfft[shift], f2); + opus_fft_impl(l->kfft[shift], f2); /* Post-rotate and de-shuffle from both ends of the buffer at once to make it in-place. */ @@ -286,15 +287,17 @@ void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scala { kiss_fft_scalar re, im, yr, yi; kiss_twiddle_scalar t0, t1; - re = f2[i].r; - im = f2[i].i; + /* We swap real and imag because we're using an FFT instead of an IFFT. */ + re = f2[i].i; + im = f2[i].r; t0 = t[i<<shift]; t1 = t[(N4-i)<<shift]; /* We'd scale up by 2 here, but instead it's done when mixing the windows */ yr = S_MUL(re,t0) - S_MUL(im,t1); yi = S_MUL(im,t0) + S_MUL(re,t1); - re = f2[N4-i-1].r; - im = f2[N4-i-1].i; + /* We swap real and imag because we're using an FFT instead of an IFFT. */ + re = f2[N4-i-1].i; + im = f2[N4-i-1].r; /* works because the cos is nearly one */ yp0[0] = -(yr - S_MUL(yi,sine)); yp1[1] = yi + S_MUL(yr,sine);