From 6211c90def21b649e7c30e75ea1ea07e2a4c8483 Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin <Jean-Marc.Valin@csiro.au> Date: Fri, 8 Feb 2008 14:21:20 +1100 Subject: [PATCH] Split the radix functions into forward and backward versions, removed the "inverse" flag from the state so it can be shared between the forward and inverse transforms. --- libcelt/_kiss_fft_guts.h | 1 - libcelt/kiss_fft.c | 783 +++++++++++++++++++++++---------------- libcelt/kiss_fft.h | 4 +- libcelt/kiss_fftr.c | 16 +- libcelt/kiss_fftr.h | 2 +- libcelt/mdct.c | 6 +- libcelt/mdct.h | 1 - tests/dft-test.c | 7 +- tests/real-fft-test.c | 12 +- 9 files changed, 477 insertions(+), 355 deletions(-) diff --git a/libcelt/_kiss_fft_guts.h b/libcelt/_kiss_fft_guts.h index c29172891..68465147a 100644 --- a/libcelt/_kiss_fft_guts.h +++ b/libcelt/_kiss_fft_guts.h @@ -29,7 +29,6 @@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND struct kiss_fft_state{ int nfft; - int inverse; int factors[2*MAXFACTORS]; int *bitrev; kiss_fft_cpx twiddles[1]; diff --git a/libcelt/kiss_fft.c b/libcelt/kiss_fft.c index 7813d2069..eb864e608 100644 --- a/libcelt/kiss_fft.c +++ b/libcelt/kiss_fft.c @@ -1,6 +1,8 @@ /* Copyright (c) 2003-2004, Mark Borgerding +Lots of modifications by JMV Copyright (c) 2005-2007, Jean-Marc Valin +Copyright (c) 2008, Jean-Marc Valin, CSIRO All rights reserved. @@ -27,370 +29,455 @@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND */ static void kf_bfly2( - kiss_fft_cpx * Fout, - const size_t fstride, - const kiss_fft_cfg st, - int m, - int N, - int mm - ) + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_cfg st, + int m, + int N, + int mm + ) { - kiss_fft_cpx * Fout2; - kiss_fft_cpx * tw1; - kiss_fft_cpx t; - if (!st->inverse) { - int i,j; - kiss_fft_cpx * Fout_beg = Fout; - for (i=0;i<N;i++) - { - Fout = Fout_beg + i*mm; - Fout2 = Fout + m; - tw1 = st->twiddles; - for(j=0;j<m;j++) - { + kiss_fft_cpx * Fout2; + kiss_fft_cpx * tw1; + kiss_fft_cpx t; + int i,j; + kiss_fft_cpx * Fout_beg = Fout; + for (i=0;i<N;i++) + { + Fout = Fout_beg + i*mm; + Fout2 = Fout + m; + tw1 = st->twiddles; + for(j=0;j<m;j++) + { /* Almost the same as the code path below, except that we divide the input by two - (while keeping the best accuracy possible) */ - celt_word32_t tr, ti; - tr = SHR32(SUB32(MULT16_16(Fout2->r , tw1->r),MULT16_16(Fout2->i , tw1->i)), 1); - ti = SHR32(ADD32(MULT16_16(Fout2->i , tw1->r),MULT16_16(Fout2->r , tw1->i)), 1); - tw1 += fstride; - Fout2->r = PSHR32(SUB32(SHL32(EXTEND32(Fout->r), 14), tr), 15); - Fout2->i = PSHR32(SUB32(SHL32(EXTEND32(Fout->i), 14), ti), 15); - Fout->r = PSHR32(ADD32(SHL32(EXTEND32(Fout->r), 14), tr), 15); - Fout->i = PSHR32(ADD32(SHL32(EXTEND32(Fout->i), 14), ti), 15); - ++Fout2; - ++Fout; - } - } - } else { - int i,j; - kiss_fft_cpx * Fout_beg = Fout; - for (i=0;i<N;i++) - { - Fout = Fout_beg + i*mm; - Fout2 = Fout + m; - tw1 = st->twiddles; - for(j=0;j<m;j++) - { - C_MULC (t, *Fout2 , *tw1); - tw1 += fstride; - C_SUB( *Fout2 , *Fout , t ); - C_ADDTO( *Fout , t ); - ++Fout2; - ++Fout; - } - } - } + (while keeping the best accuracy possible) */ + celt_word32_t tr, ti; + tr = SHR32(SUB32(MULT16_16(Fout2->r , tw1->r),MULT16_16(Fout2->i , tw1->i)), 1); + ti = SHR32(ADD32(MULT16_16(Fout2->i , tw1->r),MULT16_16(Fout2->r , tw1->i)), 1); + tw1 += fstride; + Fout2->r = PSHR32(SUB32(SHL32(EXTEND32(Fout->r), 14), tr), 15); + Fout2->i = PSHR32(SUB32(SHL32(EXTEND32(Fout->i), 14), ti), 15); + Fout->r = PSHR32(ADD32(SHL32(EXTEND32(Fout->r), 14), tr), 15); + Fout->i = PSHR32(ADD32(SHL32(EXTEND32(Fout->i), 14), ti), 15); + ++Fout2; + ++Fout; + } + } } -static void kf_bfly4( - kiss_fft_cpx * Fout, - const size_t fstride, - const kiss_fft_cfg st, - int m, - int N, - int mm - ) +static void ki_bfly2( + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_cfg st, + int m, + int N, + int mm + ) { - kiss_fft_cpx *tw1,*tw2,*tw3; - kiss_fft_cpx scratch[6]; - const size_t m2=2*m; - const size_t m3=3*m; - int i, j; + kiss_fft_cpx * Fout2; + kiss_fft_cpx * tw1; + kiss_fft_cpx t; + int i,j; + kiss_fft_cpx * Fout_beg = Fout; + for (i=0;i<N;i++) + { + Fout = Fout_beg + i*mm; + Fout2 = Fout + m; + tw1 = st->twiddles; + for(j=0;j<m;j++) + { + C_MULC (t, *Fout2 , *tw1); + tw1 += fstride; + C_SUB( *Fout2 , *Fout , t ); + C_ADDTO( *Fout , t ); + ++Fout2; + ++Fout; + } + } +} - if (st->inverse) - { - kiss_fft_cpx * Fout_beg = Fout; - for (i=0;i<N;i++) - { - Fout = Fout_beg + i*mm; - tw3 = tw2 = tw1 = st->twiddles; - for (j=0;j<m;j++) - { - C_MULC(scratch[0],Fout[m] , *tw1 ); - C_MULC(scratch[1],Fout[m2] , *tw2 ); - C_MULC(scratch[2],Fout[m3] , *tw3 ); +static void kf_bfly4( + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_cfg st, + int m, + int N, + int mm + ) +{ + kiss_fft_cpx *tw1,*tw2,*tw3; + kiss_fft_cpx scratch[6]; + const size_t m2=2*m; + const size_t m3=3*m; + int i, j; + + kiss_fft_cpx * Fout_beg = Fout; + for (i=0;i<N;i++) + { + Fout = Fout_beg + i*mm; + tw3 = tw2 = tw1 = st->twiddles; + for (j=0;j<m;j++) + { + C_MUL4(scratch[0],Fout[m] , *tw1 ); + C_MUL4(scratch[1],Fout[m2] , *tw2 ); + C_MUL4(scratch[2],Fout[m3] , *tw3 ); - C_SUB( scratch[5] , *Fout, scratch[1] ); - C_ADDTO(*Fout, scratch[1]); - C_ADD( scratch[3] , scratch[0] , scratch[2] ); - C_SUB( scratch[4] , scratch[0] , scratch[2] ); - C_SUB( Fout[m2], *Fout, scratch[3] ); - tw1 += fstride; - tw2 += fstride*2; - tw3 += fstride*3; - C_ADDTO( *Fout , scratch[3] ); + Fout->r = PSHR16(Fout->r, 2); + Fout->i = PSHR16(Fout->i, 2); + C_SUB( scratch[5] , *Fout, scratch[1] ); + C_ADDTO(*Fout, scratch[1]); + C_ADD( scratch[3] , scratch[0] , scratch[2] ); + C_SUB( scratch[4] , scratch[0] , scratch[2] ); + Fout[m2].r = PSHR16(Fout[m2].r, 2); + Fout[m2].i = PSHR16(Fout[m2].i, 2); + C_SUB( Fout[m2], *Fout, scratch[3] ); + tw1 += fstride; + tw2 += fstride*2; + tw3 += fstride*3; + C_ADDTO( *Fout , scratch[3] ); - Fout[m].r = scratch[5].r - scratch[4].i; - Fout[m].i = scratch[5].i + scratch[4].r; - Fout[m3].r = scratch[5].r + scratch[4].i; - Fout[m3].i = scratch[5].i - scratch[4].r; - ++Fout; - } - } - } else - { - kiss_fft_cpx * Fout_beg = Fout; - for (i=0;i<N;i++) - { - Fout = Fout_beg + i*mm; - tw3 = tw2 = tw1 = st->twiddles; - for (j=0;j<m;j++) - { - C_MUL4(scratch[0],Fout[m] , *tw1 ); - C_MUL4(scratch[1],Fout[m2] , *tw2 ); - C_MUL4(scratch[2],Fout[m3] , *tw3 ); + Fout[m].r = scratch[5].r + scratch[4].i; + Fout[m].i = scratch[5].i - scratch[4].r; + Fout[m3].r = scratch[5].r - scratch[4].i; + Fout[m3].i = scratch[5].i + scratch[4].r; + ++Fout; + } + } +} + +static void ki_bfly4( + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_cfg st, + int m, + int N, + int mm + ) +{ + kiss_fft_cpx *tw1,*tw2,*tw3; + kiss_fft_cpx scratch[6]; + const size_t m2=2*m; + const size_t m3=3*m; + int i, j; + + kiss_fft_cpx * Fout_beg = Fout; + for (i=0;i<N;i++) + { + Fout = Fout_beg + i*mm; + tw3 = tw2 = tw1 = st->twiddles; + for (j=0;j<m;j++) + { + C_MULC(scratch[0],Fout[m] , *tw1 ); + C_MULC(scratch[1],Fout[m2] , *tw2 ); + C_MULC(scratch[2],Fout[m3] , *tw3 ); - Fout->r = PSHR16(Fout->r, 2); - Fout->i = PSHR16(Fout->i, 2); - C_SUB( scratch[5] , *Fout, scratch[1] ); - C_ADDTO(*Fout, scratch[1]); - C_ADD( scratch[3] , scratch[0] , scratch[2] ); - C_SUB( scratch[4] , scratch[0] , scratch[2] ); - Fout[m2].r = PSHR16(Fout[m2].r, 2); - Fout[m2].i = PSHR16(Fout[m2].i, 2); - C_SUB( Fout[m2], *Fout, scratch[3] ); - tw1 += fstride; - tw2 += fstride*2; - tw3 += fstride*3; - C_ADDTO( *Fout , scratch[3] ); + C_SUB( scratch[5] , *Fout, scratch[1] ); + C_ADDTO(*Fout, scratch[1]); + C_ADD( scratch[3] , scratch[0] , scratch[2] ); + C_SUB( scratch[4] , scratch[0] , scratch[2] ); + C_SUB( Fout[m2], *Fout, scratch[3] ); + tw1 += fstride; + tw2 += fstride*2; + tw3 += fstride*3; + C_ADDTO( *Fout , scratch[3] ); - Fout[m].r = scratch[5].r + scratch[4].i; - Fout[m].i = scratch[5].i - scratch[4].r; - Fout[m3].r = scratch[5].r - scratch[4].i; - Fout[m3].i = scratch[5].i + scratch[4].r; - ++Fout; - } - } - } + Fout[m].r = scratch[5].r - scratch[4].i; + Fout[m].i = scratch[5].i + scratch[4].r; + Fout[m3].r = scratch[5].r + scratch[4].i; + Fout[m3].i = scratch[5].i - scratch[4].r; + ++Fout; + } + } } + static void kf_bfly3( - kiss_fft_cpx * Fout, - const size_t fstride, - const kiss_fft_cfg st, - size_t m - ) + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_cfg st, + size_t m + ) { - size_t k=m; - const size_t m2 = 2*m; - kiss_fft_cpx *tw1,*tw2; - kiss_fft_cpx scratch[5]; - kiss_fft_cpx epi3; - epi3 = st->twiddles[fstride*m]; + size_t k=m; + const size_t m2 = 2*m; + kiss_fft_cpx *tw1,*tw2; + kiss_fft_cpx scratch[5]; + kiss_fft_cpx epi3; + epi3 = st->twiddles[fstride*m]; - tw1=tw2=st->twiddles; - if (st->inverse) { - do{ - if (!st->inverse) { - C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3); - } + tw1=tw2=st->twiddles; + do{ + C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3); - C_MULC(scratch[1],Fout[m] , *tw1); - C_MULC(scratch[2],Fout[m2] , *tw2); + C_MUL(scratch[1],Fout[m] , *tw1); + C_MUL(scratch[2],Fout[m2] , *tw2); - C_ADD(scratch[3],scratch[1],scratch[2]); - C_SUB(scratch[0],scratch[1],scratch[2]); - tw1 += fstride; - tw2 += fstride*2; + C_ADD(scratch[3],scratch[1],scratch[2]); + C_SUB(scratch[0],scratch[1],scratch[2]); + tw1 += fstride; + tw2 += fstride*2; - Fout[m].r = Fout->r - HALF_OF(scratch[3].r); - Fout[m].i = Fout->i - HALF_OF(scratch[3].i); + Fout[m].r = Fout->r - HALF_OF(scratch[3].r); + Fout[m].i = Fout->i - HALF_OF(scratch[3].i); - C_MULBYSCALAR( scratch[0] , -epi3.i ); + C_MULBYSCALAR( scratch[0] , epi3.i ); - C_ADDTO(*Fout,scratch[3]); + C_ADDTO(*Fout,scratch[3]); - Fout[m2].r = Fout[m].r + scratch[0].i; - Fout[m2].i = Fout[m].i - scratch[0].r; + Fout[m2].r = Fout[m].r + scratch[0].i; + Fout[m2].i = Fout[m].i - scratch[0].r; - Fout[m].r -= scratch[0].i; - Fout[m].i += scratch[0].r; + Fout[m].r -= scratch[0].i; + Fout[m].i += scratch[0].r; - ++Fout; - }while(--k); - } else { - do{ - if (!st->inverse) { - C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3); - } + ++Fout; + }while(--k); +} - C_MUL(scratch[1],Fout[m] , *tw1); - C_MUL(scratch[2],Fout[m2] , *tw2); +static void ki_bfly3( + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_cfg st, + size_t m + ) +{ + size_t k=m; + const size_t m2 = 2*m; + kiss_fft_cpx *tw1,*tw2; + kiss_fft_cpx scratch[5]; + kiss_fft_cpx epi3; + epi3 = st->twiddles[fstride*m]; - C_ADD(scratch[3],scratch[1],scratch[2]); - C_SUB(scratch[0],scratch[1],scratch[2]); - tw1 += fstride; - tw2 += fstride*2; + tw1=tw2=st->twiddles; + do{ - Fout[m].r = Fout->r - HALF_OF(scratch[3].r); - Fout[m].i = Fout->i - HALF_OF(scratch[3].i); + C_MULC(scratch[1],Fout[m] , *tw1); + C_MULC(scratch[2],Fout[m2] , *tw2); - C_MULBYSCALAR( scratch[0] , epi3.i ); + C_ADD(scratch[3],scratch[1],scratch[2]); + C_SUB(scratch[0],scratch[1],scratch[2]); + tw1 += fstride; + tw2 += fstride*2; - C_ADDTO(*Fout,scratch[3]); + Fout[m].r = Fout->r - HALF_OF(scratch[3].r); + Fout[m].i = Fout->i - HALF_OF(scratch[3].i); - Fout[m2].r = Fout[m].r + scratch[0].i; - Fout[m2].i = Fout[m].i - scratch[0].r; + C_MULBYSCALAR( scratch[0] , -epi3.i ); - Fout[m].r -= scratch[0].i; - Fout[m].i += scratch[0].r; + C_ADDTO(*Fout,scratch[3]); - ++Fout; - }while(--k); - } -} + Fout[m2].r = Fout[m].r + scratch[0].i; + Fout[m2].i = Fout[m].i - scratch[0].r; -static void kf_bfly5( - kiss_fft_cpx * Fout, - const size_t fstride, - const kiss_fft_cfg st, - int m - ) -{ - kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; - int u; - kiss_fft_cpx scratch[13]; - kiss_fft_cpx * twiddles = st->twiddles; - kiss_fft_cpx *tw; - kiss_fft_cpx ya,yb; - ya = twiddles[fstride*m]; - yb = twiddles[fstride*2*m]; - - Fout0=Fout; - Fout1=Fout0+m; - Fout2=Fout0+2*m; - Fout3=Fout0+3*m; - Fout4=Fout0+4*m; - - tw=st->twiddles; - if (st->inverse) { - - for ( u=0; u<m; ++u ) { - if (!st->inverse) { - C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5); - } - scratch[0] = *Fout0; - - C_MULC(scratch[1] ,*Fout1, tw[u*fstride]); - C_MULC(scratch[2] ,*Fout2, tw[2*u*fstride]); - C_MULC(scratch[3] ,*Fout3, tw[3*u*fstride]); - C_MULC(scratch[4] ,*Fout4, tw[4*u*fstride]); - - C_ADD( scratch[7],scratch[1],scratch[4]); - C_SUB( scratch[10],scratch[1],scratch[4]); - C_ADD( scratch[8],scratch[2],scratch[3]); - C_SUB( scratch[9],scratch[2],scratch[3]); - - Fout0->r += scratch[7].r + scratch[8].r; - Fout0->i += scratch[7].i + scratch[8].i; - - scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r); - scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r); - - scratch[6].r = -S_MUL(scratch[10].i,ya.i) - S_MUL(scratch[9].i,yb.i); - scratch[6].i = S_MUL(scratch[10].r,ya.i) + S_MUL(scratch[9].r,yb.i); - - C_SUB(*Fout1,scratch[5],scratch[6]); - C_ADD(*Fout4,scratch[5],scratch[6]); - - scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r); - scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r); - scratch[12].r = S_MUL(scratch[10].i,yb.i) - S_MUL(scratch[9].i,ya.i); - scratch[12].i = -S_MUL(scratch[10].r,yb.i) + S_MUL(scratch[9].r,ya.i); - - C_ADD(*Fout2,scratch[11],scratch[12]); - C_SUB(*Fout3,scratch[11],scratch[12]); - - ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; - } - } else { - for ( u=0; u<m; ++u ) { - if (!st->inverse) { - C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5); - } - scratch[0] = *Fout0; - - C_MUL(scratch[1] ,*Fout1, tw[u*fstride]); - C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]); - C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]); - C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]); - - C_ADD( scratch[7],scratch[1],scratch[4]); - C_SUB( scratch[10],scratch[1],scratch[4]); - C_ADD( scratch[8],scratch[2],scratch[3]); - C_SUB( scratch[9],scratch[2],scratch[3]); - - Fout0->r += scratch[7].r + scratch[8].r; - Fout0->i += scratch[7].i + scratch[8].i; - - scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r); - scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r); - - scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i); - scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i); + Fout[m].r -= scratch[0].i; + Fout[m].i += scratch[0].r; - C_SUB(*Fout1,scratch[5],scratch[6]); - C_ADD(*Fout4,scratch[5],scratch[6]); + ++Fout; + }while(--k); +} - scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r); - scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r); - scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i); - scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i); - C_ADD(*Fout2,scratch[11],scratch[12]); - C_SUB(*Fout3,scratch[11],scratch[12]); +static void kf_bfly5( + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_cfg st, + int m + ) +{ + kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; + int u; + kiss_fft_cpx scratch[13]; + kiss_fft_cpx * twiddles = st->twiddles; + kiss_fft_cpx *tw; + kiss_fft_cpx ya,yb; + ya = twiddles[fstride*m]; + yb = twiddles[fstride*2*m]; + + Fout0=Fout; + Fout1=Fout0+m; + Fout2=Fout0+2*m; + Fout3=Fout0+3*m; + Fout4=Fout0+4*m; + + tw=st->twiddles; + for ( u=0; u<m; ++u ) { + C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5); + scratch[0] = *Fout0; + + C_MUL(scratch[1] ,*Fout1, tw[u*fstride]); + C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]); + C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]); + C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]); + + C_ADD( scratch[7],scratch[1],scratch[4]); + C_SUB( scratch[10],scratch[1],scratch[4]); + C_ADD( scratch[8],scratch[2],scratch[3]); + C_SUB( scratch[9],scratch[2],scratch[3]); + + Fout0->r += scratch[7].r + scratch[8].r; + Fout0->i += scratch[7].i + scratch[8].i; + + scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r); + scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r); + + scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i); + scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i); + + C_SUB(*Fout1,scratch[5],scratch[6]); + C_ADD(*Fout4,scratch[5],scratch[6]); + + scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r); + scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r); + scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i); + scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i); + + C_ADD(*Fout2,scratch[11],scratch[12]); + C_SUB(*Fout3,scratch[11],scratch[12]); + + ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; + } +} - ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; - } - } +static void ki_bfly5( + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_cfg st, + int m + ) +{ + kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; + int u; + kiss_fft_cpx scratch[13]; + kiss_fft_cpx * twiddles = st->twiddles; + kiss_fft_cpx *tw; + kiss_fft_cpx ya,yb; + ya = twiddles[fstride*m]; + yb = twiddles[fstride*2*m]; + + Fout0=Fout; + Fout1=Fout0+m; + Fout2=Fout0+2*m; + Fout3=Fout0+3*m; + Fout4=Fout0+4*m; + + tw=st->twiddles; + for ( u=0; u<m; ++u ) { + scratch[0] = *Fout0; + + C_MULC(scratch[1] ,*Fout1, tw[u*fstride]); + C_MULC(scratch[2] ,*Fout2, tw[2*u*fstride]); + C_MULC(scratch[3] ,*Fout3, tw[3*u*fstride]); + C_MULC(scratch[4] ,*Fout4, tw[4*u*fstride]); + + C_ADD( scratch[7],scratch[1],scratch[4]); + C_SUB( scratch[10],scratch[1],scratch[4]); + C_ADD( scratch[8],scratch[2],scratch[3]); + C_SUB( scratch[9],scratch[2],scratch[3]); + + Fout0->r += scratch[7].r + scratch[8].r; + Fout0->i += scratch[7].i + scratch[8].i; + + scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r); + scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r); + + scratch[6].r = -S_MUL(scratch[10].i,ya.i) - S_MUL(scratch[9].i,yb.i); + scratch[6].i = S_MUL(scratch[10].r,ya.i) + S_MUL(scratch[9].r,yb.i); + + C_SUB(*Fout1,scratch[5],scratch[6]); + C_ADD(*Fout4,scratch[5],scratch[6]); + + scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r); + scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r); + scratch[12].r = S_MUL(scratch[10].i,yb.i) - S_MUL(scratch[9].i,ya.i); + scratch[12].i = -S_MUL(scratch[10].r,yb.i) + S_MUL(scratch[9].r,ya.i); + + C_ADD(*Fout2,scratch[11],scratch[12]); + C_SUB(*Fout3,scratch[11],scratch[12]); + + ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; + } } /* perform the butterfly for one stage of a mixed radix FFT */ static void kf_bfly_generic( - kiss_fft_cpx * Fout, - const size_t fstride, - const kiss_fft_cfg st, - int m, - int p - ) + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_cfg st, + int m, + int p + ) { - int u,k,q1,q; - kiss_fft_cpx * twiddles = st->twiddles; - kiss_fft_cpx t; - kiss_fft_cpx scratchbuf[17]; - int Norig = st->nfft; - - /*CHECKBUF(scratchbuf,nscratchbuf,p);*/ - if (p>17) - celt_fatal("KissFFT: max radix supported is 17"); + int u,k,q1,q; + kiss_fft_cpx * twiddles = st->twiddles; + kiss_fft_cpx t; + kiss_fft_cpx scratchbuf[17]; + int Norig = st->nfft; + + /*CHECKBUF(scratchbuf,nscratchbuf,p);*/ + if (p>17) + celt_fatal("KissFFT: max radix supported is 17"); - for ( u=0; u<m; ++u ) { - k=u; - for ( q1=0 ; q1<p ; ++q1 ) { - scratchbuf[q1] = Fout[ k ]; - if (!st->inverse) { - C_FIXDIV(scratchbuf[q1],p); - } - k += m; - } + for ( u=0; u<m; ++u ) { + k=u; + for ( q1=0 ; q1<p ; ++q1 ) { + scratchbuf[q1] = Fout[ k ]; + C_FIXDIV(scratchbuf[q1],p); + k += m; + } - k=u; - for ( q1=0 ; q1<p ; ++q1 ) { - int twidx=0; - Fout[ k ] = scratchbuf[0]; - for (q=1;q<p;++q ) { - twidx += fstride * k; - if (twidx>=Norig) twidx-=Norig; - if (st->inverse) - C_MULC(t,scratchbuf[q] , twiddles[twidx] ); - else - C_MUL(t,scratchbuf[q] , twiddles[twidx] ); - C_ADDTO( Fout[ k ] ,t); - } - k += m; - } - } + k=u; + for ( q1=0 ; q1<p ; ++q1 ) { + int twidx=0; + Fout[ k ] = scratchbuf[0]; + for (q=1;q<p;++q ) { + twidx += fstride * k; + if (twidx>=Norig) twidx-=Norig; + C_MUL(t,scratchbuf[q] , twiddles[twidx] ); + C_ADDTO( Fout[ k ] ,t); + } + k += m; + } + } +} + +static void ki_bfly_generic( + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_cfg st, + int m, + int p + ) +{ + int u,k,q1,q; + kiss_fft_cpx * twiddles = st->twiddles; + kiss_fft_cpx t; + kiss_fft_cpx scratchbuf[17]; + int Norig = st->nfft; + + /*CHECKBUF(scratchbuf,nscratchbuf,p);*/ + if (p>17) + celt_fatal("KissFFT: max radix supported is 17"); + + for ( u=0; u<m; ++u ) { + k=u; + for ( q1=0 ; q1<p ; ++q1 ) { + scratchbuf[q1] = Fout[ k ]; + k += m; + } + + k=u; + for ( q1=0 ; q1<p ; ++q1 ) { + int twidx=0; + Fout[ k ] = scratchbuf[0]; + for (q=1;q<p;++q ) { + twidx += fstride * k; + if (twidx>=Norig) twidx-=Norig; + C_MULC(t,scratchbuf[q] , twiddles[twidx] ); + C_ADDTO( Fout[ k ] ,t); + } + k += m; + } + } } static @@ -456,6 +543,36 @@ void kf_work( } } +static + void ki_work( + kiss_fft_cpx * Fout, + const kiss_fft_cpx * f, + const size_t fstride, + int in_stride, + int * factors, + const kiss_fft_cfg st, + int N, + int s2, + int m2 + ) +{ + int i; + kiss_fft_cpx * Fout_beg=Fout; + const int p=*factors++; /* the radix */ + const int m=*factors++; /* stage's fft length/p */ + /*printf ("fft %d %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N, m2);*/ + if (m!=1) + ki_work( Fout , f, fstride*p, in_stride, factors,st, N*p, fstride*in_stride, m); + + switch (p) { + case 2: ki_bfly2(Fout,fstride,st,m, N, m2); break; + case 3: for (i=0;i<N;i++){Fout=Fout_beg+i*m2; ki_bfly3(Fout,fstride,st,m);} break; + case 4: ki_bfly4(Fout,fstride,st,m, N, m2); break; + case 5: for (i=0;i<N;i++){Fout=Fout_beg+i*m2; ki_bfly5(Fout,fstride,st,m);} break; + default: for (i=0;i<N;i++){Fout=Fout_beg+i*m2; ki_bfly_generic(Fout,fstride,st,m,p);} break; + } +} + /* facbuf is populated by p1,m1,p2,m2, ... where p[i] * m[i] = m[i-1] @@ -488,7 +605,7 @@ void kf_factor(int n,int * facbuf) * The return value is a contiguous block of memory, allocated with malloc. As such, * It can be freed with free(), rather than a kiss_fft-specific function. * */ -kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem ) +kiss_fft_cfg kiss_fft_alloc(int nfft,void * mem,size_t * lenmem ) { kiss_fft_cfg st=NULL; size_t memneeded = sizeof(struct kiss_fft_state) @@ -504,10 +621,9 @@ kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem if (st) { int i; st->nfft=nfft; - st->inverse = inverse_fft; #ifdef FIXED_POINT for (i=0;i<nfft;++i) { - celt_word32_t phase = i; + celt_word32_t phase = -i; kf_cexp2(st->twiddles+i, DIV32(SHL32(phase,17),nfft)); } #else @@ -548,3 +664,22 @@ void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) kiss_fft_stride(cfg,fin,fout,1); } +void kiss_ifft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride) +{ + if (fin == fout) + { + celt_fatal("In-place FFT not supported"); + } else { + /* Bit-reverse the input */ + int i; + for (i=0;i<st->nfft;i++) + fout[i] = fin[st->bitrev[i]]; + ki_work( fout, fin, 1,in_stride, st->factors,st, 1, in_stride, 1); + } +} + +void kiss_ifft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) +{ + kiss_ifft_stride(cfg,fin,fout,1); +} + diff --git a/libcelt/kiss_fft.h b/libcelt/kiss_fft.h index b5d645328..a040eafcf 100644 --- a/libcelt/kiss_fft.h +++ b/libcelt/kiss_fft.h @@ -71,7 +71,7 @@ typedef struct kiss_fft_state* kiss_fft_cfg; * buffer size in *lenmem. * */ -kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem); +kiss_fft_cfg kiss_fft_alloc(int nfft,void * mem,size_t * lenmem); /* * kiss_fft(cfg,in_out_buf) @@ -84,11 +84,13 @@ kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem) f[k].r and f[k].i * */ void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout); +void kiss_ifft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout); /* A more generic version of the above function. It reads its input from every Nth sample. * */ void kiss_fft_stride(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int fin_stride); +void kiss_ifft_stride(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int fin_stride); /* If kiss_fft_alloc allocated a buffer, it is one contiguous buffer and can be simply free()d when no longer needed*/ diff --git a/libcelt/kiss_fftr.c b/libcelt/kiss_fftr.c index bf2b3832c..52c0a3866 100644 --- a/libcelt/kiss_fftr.c +++ b/libcelt/kiss_fftr.c @@ -33,7 +33,7 @@ struct kiss_fftr_state{ #endif }; -kiss_fftr_cfg kiss_fftr_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem) +kiss_fftr_cfg kiss_fftr_alloc(int nfft,void * mem,size_t * lenmem) { int i; kiss_fftr_cfg st = NULL; @@ -45,7 +45,7 @@ kiss_fftr_cfg kiss_fftr_alloc(int nfft,int inverse_fft,void * mem,size_t * lenme } nfft >>= 1; - kiss_fft_alloc (nfft, inverse_fft, NULL, &subsize); + kiss_fft_alloc (nfft, NULL, &subsize); memneeded = sizeof(struct kiss_fftr_state) + subsize + sizeof(kiss_fft_cpx) * ( nfft * 2); if (lenmem == NULL) { @@ -61,7 +61,7 @@ kiss_fftr_cfg kiss_fftr_alloc(int nfft,int inverse_fft,void * mem,size_t * lenme st->substate = (kiss_fft_cfg) (st + 1); /*just beyond kiss_fftr_state struct */ st->tmpbuf = (kiss_fft_cpx *) (((char *) st->substate) + subsize); st->super_twiddles = st->tmpbuf + nfft; - kiss_fft_alloc(nfft, inverse_fft, st->substate, &subsize); + kiss_fft_alloc(nfft, st->substate, &subsize); #ifdef FIXED_POINT for (i=0;i<nfft;++i) { @@ -85,10 +85,6 @@ void kiss_fftr(kiss_fftr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_scalar kiss_fft_cpx f2k,tdc; celt_word32_t f1kr, f1ki, twr, twi; - if ( st->substate->inverse) { - celt_fatal("kiss fft usage error: improper alloc\n"); - } - ncfft = st->substate->nfft; /*perform the parallel fft of two real signals packed in real,imag*/ @@ -142,10 +138,6 @@ void kiss_fftri(kiss_fftr_cfg st,const kiss_fft_scalar *freqdata,kiss_fft_scalar /* input buffer timedata is stored row-wise */ int k, ncfft; - if (st->substate->inverse == 0) { - celt_fatal ("kiss fft usage error: improper alloc\n"); - } - ncfft = st->substate->nfft; st->tmpbuf[0].r = freqdata[0] + freqdata[1]; @@ -169,5 +161,5 @@ void kiss_fftri(kiss_fftr_cfg st,const kiss_fft_scalar *freqdata,kiss_fft_scalar st->tmpbuf[ncfft - k].i *= -1; #endif } - kiss_fft (st->substate, st->tmpbuf, (kiss_fft_cpx *) timedata); + kiss_ifft (st->substate, st->tmpbuf, (kiss_fft_cpx *) timedata); } diff --git a/libcelt/kiss_fftr.h b/libcelt/kiss_fftr.h index fea12bb36..ae5f4f0fc 100644 --- a/libcelt/kiss_fftr.h +++ b/libcelt/kiss_fftr.h @@ -18,7 +18,7 @@ extern "C" { typedef struct kiss_fftr_state *kiss_fftr_cfg; -kiss_fftr_cfg kiss_fftr_alloc(int nfft,int inverse_fft,void * mem, size_t * lenmem); +kiss_fftr_cfg kiss_fftr_alloc(int nfft,void * mem, size_t * lenmem); /* nfft must be even diff --git a/libcelt/mdct.c b/libcelt/mdct.c index 40ed07c15..2f8705fd1 100644 --- a/libcelt/mdct.c +++ b/libcelt/mdct.c @@ -54,8 +54,7 @@ void mdct_init(mdct_lookup *l,int N) l->n = N; N2 = N/2; N4 = N/4; - l->kfft = kiss_fft_alloc(N4, 0, NULL, NULL); - l->ikfft = kiss_fft_alloc(N4, 1, NULL, NULL); + l->kfft = kiss_fft_alloc(N4, NULL, NULL); l->trig = celt_alloc(N2*sizeof(float)); /* We have enough points that sine isn't necessary */ for (i=0;i<N2;i++) @@ -66,7 +65,6 @@ void mdct_init(mdct_lookup *l,int N) void mdct_clear(mdct_lookup *l) { kiss_fft_free(l->kfft); - kiss_fft_free(l->ikfft); celt_free(l->trig); } @@ -131,7 +129,7 @@ void mdct_backward(mdct_lookup *l, float *in, float *out) } /* Inverse N/4 complex FFT. This one should *not* downscale even in fixed-point */ - kiss_fft(l->ikfft, (const kiss_fft_cpx *)out, (kiss_fft_cpx *)f); + kiss_ifft(l->kfft, (const kiss_fft_cpx *)out, (kiss_fft_cpx *)f); /* Post-rotate */ for(i=0;i<N4;i++) diff --git a/libcelt/mdct.h b/libcelt/mdct.h index 09762d455..ef7d999e3 100644 --- a/libcelt/mdct.h +++ b/libcelt/mdct.h @@ -47,7 +47,6 @@ typedef struct { int n; kiss_fft_cfg kfft; - kiss_fft_cfg ikfft; float *trig; float scale; } mdct_lookup; diff --git a/tests/dft-test.c b/tests/dft-test.c index 05a8990a8..d348d32c0 100644 --- a/tests/dft-test.c +++ b/tests/dft-test.c @@ -42,7 +42,7 @@ void test1d(int nfft,int isinverse) kiss_fft_cpx * in = (kiss_fft_cpx*)malloc(buflen); kiss_fft_cpx * out= (kiss_fft_cpx*)malloc(buflen); - kiss_fft_cfg cfg = kiss_fft_alloc(nfft,isinverse,0,0); + kiss_fft_cfg cfg = kiss_fft_alloc(nfft,0,0); int k; for (k=0;k<nfft;++k) { @@ -50,7 +50,10 @@ void test1d(int nfft,int isinverse) in[k].i = (rand() % 65536) - 32768; } - kiss_fft(cfg,in,out); + if (isinverse) + kiss_ifft(cfg,in,out); + else + kiss_fft(cfg,in,out); check(in,out,nfft,isinverse); diff --git a/tests/real-fft-test.c b/tests/real-fft-test.c index 952a83b00..1295d0c13 100644 --- a/tests/real-fft-test.c +++ b/tests/real-fft-test.c @@ -96,8 +96,8 @@ int main(void) cin[i].i = zero; } - kiss_fft_state = kiss_fft_alloc(NFFT,0,0,0); - kiss_fftr_state = kiss_fftr_alloc(NFFT,0,0,0); + kiss_fft_state = kiss_fft_alloc(NFFT,0,0); + kiss_fftr_state = kiss_fftr_alloc(NFFT,0,0); kiss_fft(kiss_fft_state,cin,cout); kiss_fftr(kiss_fftr_state,rin,sout); @@ -118,12 +118,6 @@ int main(void) printf("%d complex ffts took %gs, real took %gs\n",NUMFFTS,tfft,trfft); - free(kiss_fft_state); - free(kiss_fftr_state); - - kiss_fft_state = kiss_fft_alloc(NFFT,1,0,0); - kiss_fftr_state = kiss_fftr_alloc(NFFT,1,0,0); - memset(cin,0,sizeof(cin)); #if 1 cin[0].r = rand_scalar(); @@ -153,7 +147,7 @@ int main(void) fin[2*i+1] = cin[i].i; } - kiss_fft(kiss_fft_state,cin,cout); + kiss_ifft(kiss_fft_state,cin,cout); kiss_fftri(kiss_fftr_state,fin,rout); /* printf(" results from inverse kiss_fft : (%f,%f), (%f,%f), (%f,%f), (%f,%f), (%f,%f) ...\n " -- GitLab