Commit 25649c15 authored by Jean-Marc Valin's avatar Jean-Marc Valin
Browse files

Fixed the FFT for higher precision

parent 37d13cee
......@@ -45,9 +45,22 @@ struct kiss_fft_state{
* */
#ifdef FIXED_POINT
#include "arch.h"
#ifdef DOUBLE_PRECISION
# define FRACBITS 31
# define SAMPPROD celt_int64_t
#define SAMP_MAX 2147483647
#define TRIG_UPSCALE 65536
#else /* DOUBLE_PRECISION */
# define FRACBITS 15
# define SAMPPROD celt_int32_t
#define SAMP_MAX 32767
#define TRIG_UPSCALE 1
#endif /* !DOUBLE_PRECISION */
#define SAMP_MIN -SAMP_MAX
......@@ -59,7 +72,8 @@ struct kiss_fft_state{
# define smul(a,b) ( (SAMPPROD)(a)*(b) )
# define sround( x ) (kiss_fft_scalar)( ( (x) + (1<<(FRACBITS-1)) ) >> FRACBITS )
# define sround( x ) (kiss_fft_scalar)( ( (x) + ((SAMPPROD)1<<(FRACBITS-1)) ) >> FRACBITS )
# define sround4( x ) (kiss_fft_scalar)( ( (x) + ((SAMPPROD)1<<(FRACBITS-1)) ) >> (FRACBITS+2) )
# define S_MUL(a,b) sround( smul(a,b) )
......@@ -71,8 +85,8 @@ struct kiss_fft_state{
(m).i = sround( smul((a).i,(b).r) - smul((a).r,(b).i) ); }while(0)
# define C_MUL4(m,a,b) \
do{ (m).r = PSHR32( smul((a).r,(b).r) - smul((a).i,(b).i),17 ); \
(m).i = PSHR32( smul((a).r,(b).i) + smul((a).i,(b).r),17 ); }while(0)
do{ (m).r = sround4( smul((a).r,(b).r) - smul((a).i,(b).i) ); \
(m).i = sround4( smul((a).r,(b).i) + smul((a).i,(b).r) ); }while(0)
# define DIVSCALAR(x,k) \
(x) = sround( smul( x, SAMP_MAX/k ) )
......@@ -173,8 +187,10 @@ static inline celt_word16_t celt_cos_norm(celt_word32_t x)
#ifdef FIXED_POINT
# define KISS_FFT_COS(phase) floor(MIN(32767,MAX(-32767,.5+32768 * cos (phase))))
# define KISS_FFT_SIN(phase) floor(MIN(32767,MAX(-32767,.5+32768 * sin (phase))))
/*# define KISS_FFT_COS(phase) TRIG_UPSCALE*floor(MIN(32767,MAX(-32767,.5+32768 * cos (phase))))
# define KISS_FFT_SIN(phase) TRIG_UPSCALE*floor(MIN(32767,MAX(-32767,.5+32768 * sin (phase))))*/
# define KISS_FFT_COS(phase) SAMP_MAX*cos (phase)
# define KISS_FFT_SIN(phase) SAMP_MAX*sin (phase)
# define HALF_OF(x) ((x)>>1)
#elif defined(USE_SIMD)
# define KISS_FFT_COS(phase) _mm_set1_ps( cos(phase) )
......@@ -191,13 +207,13 @@ static inline celt_word16_t celt_cos_norm(celt_word32_t x)
(x)->r = KISS_FFT_COS(phase);\
(x)->i = KISS_FFT_SIN(phase);\
}while(0)
#define kf_cexp2(x,phase) \
do{ \
(x)->r = celt_cos_norm((phase));\
(x)->i = celt_cos_norm((phase)-32768);\
do{ \
(x)->r = TRIG_UPSCALE*celt_cos_norm((phase));\
(x)->i = TRIG_UPSCALE*celt_cos_norm((phase)-32768);\
}while(0)
/* a debugging function */
#define pcpx(c)\
fprintf(stderr,"%g + %gi\n",(double)((c)->r),(double)((c)->i) )
......@@ -48,6 +48,7 @@ static void kf_bfly2(
tw1 = st->twiddles;
for(j=0;j<m;j++)
{
#if 0
/* Almost the same as the code path below, except that we divide the input by two
(while keeping the best accuracy possible) */
celt_word32_t tr, ti;
......@@ -58,6 +59,15 @@ static void kf_bfly2(
Fout2->i = PSHR32(SUB32(SHL32(EXTEND32(Fout->i), 14), ti), 15);
Fout->r = PSHR32(ADD32(SHL32(EXTEND32(Fout->r), 14), tr), 15);
Fout->i = PSHR32(ADD32(SHL32(EXTEND32(Fout->i), 14), ti), 15);
#else
Fout->r = SHR(Fout->r, 1);Fout->i = SHR(Fout->i, 1);
Fout2->r = SHR(Fout2->r, 1);Fout2->i = SHR(Fout2->i, 1);
kiss_fft_cpx t;
C_MUL (t, *Fout2 , *tw1);
tw1 += fstride;
C_SUB( *Fout2 , *Fout , t );
C_ADDTO( *Fout , t );
#endif
++Fout2;
++Fout;
}
......@@ -121,14 +131,14 @@ static void kf_bfly4(
C_MUL4(scratch[1],Fout[m2] , *tw2 );
C_MUL4(scratch[2],Fout[m3] , *tw3 );
Fout->r = PSHR16(Fout->r, 2);
Fout->i = PSHR16(Fout->i, 2);
Fout->r = PSHR(Fout->r, 2);
Fout->i = PSHR(Fout->i, 2);
C_SUB( scratch[5] , *Fout, scratch[1] );
C_ADDTO(*Fout, scratch[1]);
C_ADD( scratch[3] , scratch[0] , scratch[2] );
C_SUB( scratch[4] , scratch[0] , scratch[2] );
Fout[m2].r = PSHR16(Fout[m2].r, 2);
Fout[m2].i = PSHR16(Fout[m2].i, 2);
Fout[m2].r = PSHR(Fout[m2].r, 2);
Fout[m2].i = PSHR(Fout[m2].i, 2);
C_SUB( Fout[m2], *Fout, scratch[3] );
tw1 += fstride;
tw2 += fstride*2;
......@@ -620,7 +630,7 @@ kiss_fft_cfg kiss_fft_alloc(int nfft,void * mem,size_t * lenmem )
if (st) {
int i;
st->nfft=nfft;
#ifdef FIXED_POINT
#if defined(FIXED_POINT) && !defined(DOUBLE_PRECISION)
for (i=0;i<nfft;++i) {
celt_word32_t phase = -i;
kf_cexp2(st->twiddles+i, DIV32(SHL32(phase,17),nfft));
......
......@@ -33,7 +33,11 @@ extern "C" {
#ifdef FIXED_POINT
#include "arch.h"
#ifdef DOUBLE_PRECISION
# define kiss_fft_scalar celt_int32_t
#else
# define kiss_fft_scalar celt_int16_t
#endif
#else
# ifndef kiss_fft_scalar
/* default is float */
......
......@@ -58,6 +58,13 @@ void test1d(int nfft,int isinverse)
in[k].i = (rand() % 65536) - 32768;
}
#ifdef DOUBLE_PRECISION
for (k=0;k<nfft;++k) {
in[k].r *= 65536;
in[k].i *= 65536;
}
#endif
if (isinverse)
{
for (k=0;k<nfft;++k) {
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment