From 37942649cc889816d53e54fd98f4bebc279efd63 Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin <jean-marc.valin@usherbrooke.ca> Date: Sat, 1 Mar 2008 22:55:27 +1100 Subject: [PATCH] Saved 4 kB of stack usage in find_spectral_pitch() by doing the FFT in-place (sort of) --- libcelt/kiss_fft.c | 2 +- libcelt/kiss_fft.h | 3 +++ libcelt/kiss_fftr.c | 19 +++++++++---------- libcelt/kiss_fftr.h | 8 ++++++++ libcelt/pitch.c | 46 +++++++++++++++++++++++++++------------------ 5 files changed, 49 insertions(+), 29 deletions(-) diff --git a/libcelt/kiss_fft.c b/libcelt/kiss_fft.c index e9b792b61..d8b769787 100644 --- a/libcelt/kiss_fft.c +++ b/libcelt/kiss_fft.c @@ -510,7 +510,7 @@ void compute_bitrev_table( } } -static + void kf_work( kiss_fft_cpx * Fout, const kiss_fft_cpx * f, diff --git a/libcelt/kiss_fft.h b/libcelt/kiss_fft.h index fb1c9d07b..1e47b216a 100644 --- a/libcelt/kiss_fft.h +++ b/libcelt/kiss_fft.h @@ -85,6 +85,9 @@ typedef struct kiss_fft_state* kiss_fft_cfg; kiss_fft_cfg kiss_fft_alloc(int nfft,void * mem,size_t * lenmem); +void kf_work(kiss_fft_cpx * Fout,const kiss_fft_cpx * f,const size_t fstride, + int in_stride,int * factors,const kiss_fft_cfg st,int N,int s2,int m2); + /** Internal function. Can be useful when you want to do the bit-reversing yourself */ void ki_work(kiss_fft_cpx * Fout, const kiss_fft_cpx * f, const size_t fstride, int in_stride,int * factors,const kiss_fft_cfg st,int N,int s2,int m2); diff --git a/libcelt/kiss_fftr.c b/libcelt/kiss_fftr.c index 979aac95a..392f3eb68 100644 --- a/libcelt/kiss_fftr.c +++ b/libcelt/kiss_fftr.c @@ -24,13 +24,6 @@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND #include "kiss_fftr.h" #include "_kiss_fft_guts.h" -struct kiss_fftr_state{ - kiss_fft_cfg substate; - kiss_twiddle_cpx * super_twiddles; -#ifdef USE_SIMD - long pad; -#endif -}; kiss_fftr_cfg kiss_fftr_alloc(int nfft,void * mem,size_t * lenmem) { @@ -80,7 +73,7 @@ kiss_fftr_cfg kiss_fftr_alloc(int nfft,void * mem,size_t * lenmem) return st; } -void kiss_fftr(kiss_fftr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_scalar *freqdata) +void kiss_fftr_twiddles(kiss_fftr_cfg st,kiss_fft_scalar *freqdata) { /* input buffer timedata is stored row-wise */ int k,ncfft; @@ -88,8 +81,6 @@ void kiss_fftr(kiss_fftr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_scalar ncfft = st->substate->nfft; - /*perform the parallel fft of two real signals packed in real,imag*/ - kiss_fft( st->substate , (const kiss_fft_cpx*)timedata, (kiss_fft_cpx *)freqdata ); /* The real part of the DC element of the frequency spectrum in st->tmpbuf * contains the sum of the even-numbered elements of the input time sequence * The imag part is the sum of the odd-numbered elements @@ -126,6 +117,14 @@ void kiss_fftr(kiss_fftr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_scalar } } +void kiss_fftr(kiss_fftr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_scalar *freqdata) +{ + /*perform the parallel fft of two real signals packed in real,imag*/ + kiss_fft( st->substate , (const kiss_fft_cpx*)timedata, (kiss_fft_cpx *)freqdata ); + + kiss_fftr_twiddles(st,freqdata); +} + void kiss_fftri(kiss_fftr_cfg st,const kiss_fft_scalar *freqdata,kiss_fft_scalar *timedata) { /* input buffer timedata is stored row-wise */ diff --git a/libcelt/kiss_fftr.h b/libcelt/kiss_fftr.h index ae5f4f0fc..7cc5bc616 100644 --- a/libcelt/kiss_fftr.h +++ b/libcelt/kiss_fftr.h @@ -15,6 +15,14 @@ extern "C" { */ +struct kiss_fftr_state{ + kiss_fft_cfg substate; + kiss_twiddle_cpx * super_twiddles; +#ifdef USE_SIMD + long pad; +#endif + }; + typedef struct kiss_fftr_state *kiss_fftr_cfg; diff --git a/libcelt/pitch.c b/libcelt/pitch.c index 2b4611604..4e64a5a43 100644 --- a/libcelt/pitch.c +++ b/libcelt/pitch.c @@ -43,42 +43,52 @@ #include <math.h> #include "pitch.h" #include "psy.h" +#include "_kiss_fft_guts.h" +#include "kiss_fftr.h" void find_spectral_pitch(kiss_fftr_cfg fft, struct PsyDecay *decay, celt_sig_t *x, celt_sig_t *y, int lag, int len, int C, int *pitch) { int c, i; float max_corr; - VARDECL(celt_word32_t *xx); VARDECL(celt_word32_t *X); VARDECL(celt_word32_t *Y); VARDECL(celt_mask_t *curve); int n2; SAVE_STACK; n2 = lag/2; - ALLOC(xx, lag, celt_word32_t); ALLOC(X, lag, celt_word32_t); ALLOC(curve, n2, celt_mask_t); - + for (i=0;i<lag;i++) - xx[i] = 0; + X[i] = 0; for (c=0;c<C;c++) - for (i=0;i<len;i++) - xx[i] += SHR32(x[C*i+c],1); - - kiss_fftr(fft, xx, X); - + { + for (i=0;i<len/2;i++) + { + X[2*fft->substate->bitrev[i]] += SHR32(x[C*(2*i)+c],1); + X[2*fft->substate->bitrev[i]+1] += SHR32(x[C*(2*i+1)+c],1); + } + } + kf_work((kiss_fft_cpx*)X, NULL, 1,1, fft->substate->factors,fft->substate, 1, 1, 1); + kiss_fftr_twiddles(fft,X); + compute_masking(decay, X, curve, lag); /* Deferred allocation to reduce peak stack usage */ ALLOC(Y, lag, celt_word32_t); for (i=0;i<lag;i++) - xx[i] = 0; + Y[i] = 0; for (c=0;c<C;c++) - for (i=0;i<lag;i++) - xx[i] += SHR32(y[C*i+c],1); - kiss_fftr(fft, xx, Y); - - + { + for (i=0;i<lag/2;i++) + { + Y[2*fft->substate->bitrev[i]] += SHR32(y[C*(2*i)+c],1); + Y[2*fft->substate->bitrev[i]+1] += SHR32(y[C*(2*i+1)+c],1); + } + } + kf_work((kiss_fft_cpx*)Y, NULL, 1,1, fft->substate->factors,fft->substate, 1, 1, 1); + kiss_fftr_twiddles(fft,Y); + for (i=1;i<n2;i++) { float n, tmp; @@ -93,7 +103,7 @@ void find_spectral_pitch(kiss_fftr_cfg fft, struct PsyDecay *decay, celt_sig_t * } /*printf ("\n");*/ X[0] = X[1] = 0; - kiss_fftri(fft, X, xx); + kiss_fftri(fft, X, Y); /*for (i=0;i<C*lag;i++) printf ("%d %d\n", X[i], xx[i]);*/ @@ -102,10 +112,10 @@ void find_spectral_pitch(kiss_fftr_cfg fft, struct PsyDecay *decay, celt_sig_t * for (i=0;i<lag-len;i++) { /*printf ("%f ", xx[i]);*/ - if (xx[i] > max_corr) + if (Y[i] > max_corr) { *pitch = i; - max_corr = xx[i]; + max_corr = Y[i]; } } /*printf ("%f\n", max_corr);*/ -- GitLab