Commit 72513f3c authored by Jean-Marc Valin's avatar Jean-Marc Valin
Browse files

Sharing twiddle factors across all MDCTs

parent 268caad4
......@@ -317,11 +317,9 @@ static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig * rest
const int C = CHANNELS(_C);
if (C==1 && !shortBlocks)
{
const mdct_lookup *lookup = &mode->mdct[LM];
const int overlap = OVERLAP(mode);
clt_mdct_forward(lookup, in, out, mode->window, overlap);
clt_mdct_forward(&mode->mdct, in, out, mode->window, overlap, mode->maxLM-LM);
} else {
const mdct_lookup *lookup = &mode->mdct[LM];
const int overlap = OVERLAP(mode);
int N = mode->shortMdctSize<<LM;
int B = 1;
......@@ -331,7 +329,7 @@ static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig * rest
SAVE_STACK;
if (shortBlocks)
{
lookup = &mode->mdct[0];
/*lookup = &mode->mdct[0];*/
N = mode->shortMdctSize;
B = shortBlocks;
}
......@@ -344,7 +342,7 @@ static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig * rest
int j;
for (j=0;j<N+overlap;j++)
x[j] = in[C*(b*N+j)+c];
clt_mdct_forward(lookup, x, tmp, mode->window, overlap);
clt_mdct_forward(&mode->mdct, x, tmp, mode->window, overlap, shortBlocks ? mode->maxLM : mode->maxLM-LM);
/* Interleaving the sub-frames */
for (j=0;j<N;j++)
out[(j*B+b)+c*N*B] = tmp[j];
......@@ -367,8 +365,7 @@ static void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig *X
{
int j;
if (transient_shift==0 && C==1 && !shortBlocks) {
const mdct_lookup *lookup = &mode->mdct[LM];
clt_mdct_backward(lookup, X, out_mem+C*(MAX_PERIOD-N-N4), mode->window, overlap);
clt_mdct_backward(&mode->mdct, X, out_mem+C*(MAX_PERIOD-N-N4), mode->window, overlap, mode->maxLM-LM);
} else {
VARDECL(celt_word32, x);
VARDECL(celt_word32, tmp);
......@@ -376,7 +373,6 @@ static void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig *X
int N2 = N;
int B = 1;
int n4offset=0;
const mdct_lookup *lookup = &mode->mdct[LM];
SAVE_STACK;
ALLOC(x, 2*N, celt_word32);
......@@ -384,7 +380,7 @@ static void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig *X
if (shortBlocks)
{
lookup = &mode->mdct[0];
/*lookup = &mode->mdct[0];*/
N2 = mode->shortMdctSize;
B = shortBlocks;
n4offset = N4;
......@@ -397,7 +393,7 @@ static void compute_inv_mdcts(const CELTMode *mode, int shortBlocks, celt_sig *X
/* De-interleaving the sub-frames */
for (j=0;j<N2;j++)
tmp[j] = X[(j*B+b)+c*N2*B];
clt_mdct_backward(lookup, tmp, x+n4offset+N2*b, mode->window, overlap);
clt_mdct_backward(&mode->mdct, tmp, x+n4offset+N2*b, mode->window, overlap, shortBlocks ? mode->maxLM : mode->maxLM-LM);
}
if (transient_shift > 0)
......
......@@ -58,18 +58,23 @@
#define M_PI 3.141592653
#endif
void clt_mdct_init(mdct_lookup *l,int N)
void clt_mdct_init(mdct_lookup *l,int N, int maxshift)
{
int i;
int N2, N4;
l->n = N;
N2 = N>>1;
N4 = N>>2;
l->kfft = cpx32_fft_alloc(N>>2);
l->kfft = celt_alloc(sizeof(kiss_fft_cfg)*(maxshift+1));
l->maxshift = maxshift;
for (i=0;i<=maxshift;i++)
{
l->kfft[i] = cpx32_fft_alloc(N>>2>>i);
#ifndef ENABLE_TI_DSPLIB55
if (l->kfft==NULL)
return;
if (l->kfft[i]==NULL)
return;
#endif
}
l->trig = (kiss_twiddle_scalar*)celt_alloc((N4+1)*sizeof(kiss_twiddle_scalar));
if (l->trig==NULL)
return;
......@@ -90,11 +95,14 @@ void clt_mdct_init(mdct_lookup *l,int N)
void clt_mdct_clear(mdct_lookup *l)
{
cpx32_fft_free(l->kfft);
int i;
for (i=0;i<=l->maxshift;i++)
cpx32_fft_free(l->kfft[i]);
celt_free(l->kfft);
celt_free(l->trig);
}
void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * restrict out, const celt_word16 *window, int overlap)
void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * restrict out, const celt_word16 *window, int overlap, int shift)
{
int i;
int N, N2, N4;
......@@ -102,6 +110,7 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar
VARDECL(kiss_fft_scalar, f);
SAVE_STACK;
N = l->n;
N >>= shift;
N2 = N>>1;
N4 = N>>2;
ALLOC(f, N2, kiss_fft_scalar);
......@@ -161,8 +170,8 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar
kiss_fft_scalar re, im, yr, yi;
re = yp[0];
im = yp[1];
yr = -S_MUL(re,t[i]) - S_MUL(im,t[N4-i]);
yi = -S_MUL(im,t[i]) + S_MUL(re,t[N4-i]);
yr = -S_MUL(re,t[i<<shift]) - S_MUL(im,t[(N4-i)<<shift]);
yi = -S_MUL(im,t[i<<shift]) + S_MUL(re,t[(N4-i)<<shift]);
/* works because the cos is nearly one */
*yp++ = yr + S_MUL(yi,sine);
*yp++ = yi - S_MUL(yr,sine);
......@@ -170,7 +179,7 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar
}
/* N/4 complex FFT, down-scales by 4/N */
cpx32_fft(l->kfft, out, f, N4);
cpx32_fft(l->kfft[shift], out, f, N4);
/* Post-rotate */
{
......@@ -183,8 +192,8 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar
for(i=0;i<N4;i++)
{
kiss_fft_scalar yr, yi;
yr = S_MUL(fp[1],t[N4-i]) + S_MUL(fp[0],t[i]);
yi = S_MUL(fp[0],t[N4-i]) - S_MUL(fp[1],t[i]);
yr = S_MUL(fp[1],t[(N4-i)<<shift]) + S_MUL(fp[0],t[i<<shift]);
yi = S_MUL(fp[0],t[(N4-i)<<shift]) - S_MUL(fp[1],t[i<<shift]);
/* works because the cos is nearly one */
*yp1 = yr - S_MUL(yi,sine);
*yp2 = yi + S_MUL(yr,sine);;
......@@ -197,7 +206,7 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar
}
void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * restrict out, const celt_word16 * restrict window, int overlap)
void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * restrict out, const celt_word16 * restrict window, int overlap, int shift)
{
int i;
int N, N2, N4;
......@@ -206,6 +215,7 @@ void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scala
VARDECL(kiss_fft_scalar, f2);
SAVE_STACK;
N = l->n;
N >>= shift;
N2 = N>>1;
N4 = N>>2;
ALLOC(f, N2, kiss_fft_scalar);
......@@ -227,8 +237,8 @@ void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scala
for(i=0;i<N4;i++)
{
kiss_fft_scalar yr, yi;
yr = -S_MUL(*xp2, t[i]) + S_MUL(*xp1,t[N4-i]);
yi = -S_MUL(*xp2, t[N4-i]) - S_MUL(*xp1,t[i]);
yr = -S_MUL(*xp2, t[i<<shift]) + S_MUL(*xp1,t[(N4-i)<<shift]);
yi = -S_MUL(*xp2, t[(N4-i)<<shift]) - S_MUL(*xp1,t[i<<shift]);
/* works because the cos is nearly one */
*yp++ = yr - S_MUL(yi,sine);
*yp++ = yi + S_MUL(yr,sine);
......@@ -238,7 +248,7 @@ void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scala
}
/* Inverse N/4 complex FFT. This one should *not* downscale even in fixed-point */
cpx32_ifft(l->kfft, f2, f, N4);
cpx32_ifft(l->kfft[shift], f2, f, N4);
/* Post-rotate */
{
......@@ -251,8 +261,8 @@ void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scala
re = fp[0];
im = fp[1];
/* We'd scale up by 2 here, but instead it's done when mixing the windows */
yr = S_MUL(re,t[i]) - S_MUL(im,t[N4-i]);
yi = S_MUL(im,t[i]) + S_MUL(re,t[N4-i]);
yr = S_MUL(re,t[i<<shift]) - S_MUL(im,t[(N4-i)<<shift]);
yi = S_MUL(im,t[i<<shift]) + S_MUL(re,t[(N4-i)<<shift]);
/* works because the cos is nearly one */
*fp++ = yr - S_MUL(yi,sine);
*fp++ = yi + S_MUL(yr,sine);
......
......@@ -51,18 +51,19 @@
typedef struct {
int n;
kiss_fft_cfg kfft;
int maxshift;
kiss_fft_cfg *kfft;
kiss_twiddle_scalar * restrict trig;
} mdct_lookup;
void clt_mdct_init(mdct_lookup *l,int N);
void clt_mdct_init(mdct_lookup *l,int N, int maxshift);
void clt_mdct_clear(mdct_lookup *l);
/** Compute a forward MDCT and scale by 4/N */
void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar *out, const celt_word16 *window, int overlap);
void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar *out, const celt_word16 *window, int overlap, int shift);
/** Compute a backward MDCT (no scaling) and performs weighted overlap-add
(scales implicitly by 1/2) */
void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar *out, const celt_word16 * restrict window, int overlap);
void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar *out, const celt_word16 * restrict window, int overlap, int shift);
#endif
......@@ -261,6 +261,7 @@ static void compute_allocation_table(CELTMode *mode, int res)
CELTMode *celt_mode_create(celt_int32 Fs, int frame_size, int *error)
{
int i;
int LM;
#ifdef STDIN_TUNING
scanf("%d ", &MIN_BINS);
scanf("%d ", &BITALLOC_SIZE);
......@@ -337,18 +338,20 @@ CELTMode *celt_mode_create(celt_int32 Fs, int frame_size, int *error)
if (frame_size >= 640 && (frame_size%16)==0)
{
mode->nbShortMdcts = 8;
LM = 3;
} else if (frame_size >= 320 && (frame_size%8)==0)
{
mode->nbShortMdcts = 4;
LM = 2;
} else if (frame_size >= 160 && (frame_size%4)==0)
{
mode->nbShortMdcts = 2;
LM = 1;
} else
{
mode->nbShortMdcts = 1;
LM = 0;
}
mode->maxLM = LM;
mode->nbShortMdcts = 1<<LM;
mode->shortMdctSize = frame_size/mode->nbShortMdcts;
res = (mode->Fs+mode->shortMdctSize)/(2*mode->shortMdctSize);
......@@ -402,16 +405,14 @@ CELTMode *celt_mode_create(celt_int32 Fs, int frame_size, int *error)
mode->logN = logN;
#endif /* !STATIC_MODES */
for (i=0;(1<<i)<=mode->nbShortMdcts;i++)
{
clt_mdct_init(&mode->mdct[i], 2*mode->shortMdctSize<<i);
if ((mode->mdct[i].trig==NULL)
clt_mdct_init(&mode->mdct, 2*mode->shortMdctSize*mode->nbShortMdcts, LM);
if ((mode->mdct.trig==NULL)
#ifndef ENABLE_TI_DSPLIB55
|| (mode->mdct[i].kfft==NULL)
|| (mode->mdct.kfft==NULL)
#endif
)
goto failure;
}
)
goto failure;
mode->prob = quant_prob_alloc(mode);
if (mode->prob==NULL)
goto failure;
......@@ -487,8 +488,7 @@ void celt_mode_destroy(CELTMode *mode)
celt_free((celt_int16*)mode->logN);
#endif
for (i=0;(1<<i)<=mode->nbShortMdcts;i++)
clt_mdct_clear(&mode->mdct[i]);
clt_mdct_clear(&mode->mdct);
quant_prob_free(mode->prob);
mode->marker_end = MODEFREED;
......
......@@ -98,10 +98,11 @@ struct CELTMode {
const celt_int16 * const *(_bits[MAX_CONFIG_SIZES]); /**< Cache for pulses->bits mapping in each band */
/* Stuff that could go in the {en,de}coder, but we save space this way */
mdct_lookup mdct[MAX_CONFIG_SIZES];
mdct_lookup mdct;
const celt_word16 *window;
int maxLM;
int nbShortMdcts;
int shortMdctSize;
......
......@@ -45,7 +45,7 @@
/** CELT wrapper for calloc(). To do your own dynamic allocation, all you need to do is replace this function, celt_realloc and celt_free
NOTE: celt_alloc needs to CLEAR THE MEMORY */
#ifndef OVERRIDE_CELT_ALLOC
static inline void *celt_alloc (int size)
static void *celt_alloc (int size)
{
/* WARNING: this is not equivalent to malloc(). If you want to use malloc()
or your own allocator, YOU NEED TO CLEAR THE MEMORY ALLOCATED. Otherwise
......
......@@ -89,7 +89,7 @@ void test1d(int nfft,int isinverse)
celt_word16 * window= (celt_word16*)malloc(sizeof(celt_word16)*nfft/2);
int k;
clt_mdct_init(&cfg, nfft);
clt_mdct_init(&cfg, nfft, 0);
for (k=0;k<nfft;++k) {
in[k] = (rand() % 32768) - 16384;
}
......@@ -116,10 +116,10 @@ void test1d(int nfft,int isinverse)
{
for (k=0;k<nfft;++k)
out[k] = 0;
clt_mdct_backward(&cfg,in,out, window, nfft/2);
clt_mdct_backward(&cfg,in,out, window, nfft/2, 0);
check_inv(in,out,nfft,isinverse);
} else {
clt_mdct_forward(&cfg,in,out,window, nfft/2);
clt_mdct_forward(&cfg,in,out,window, nfft/2, 0);
check(in,out,nfft,isinverse);
}
/*for (k=0;k<nfft;++k) printf("%d %d ", out[k].r, out[k].i);printf("\n");*/
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment