Commit 49134381 authored by Jean-Marc Valin's avatar Jean-Marc Valin
Browse files

optimisations: caching sign of x in alg_quant(), changed celt_div()/celt_rcp()

to assume denominator is positive.
parent 208ae6e3
...@@ -42,7 +42,7 @@ static long long celt_mips = 0; ...@@ -42,7 +42,7 @@ static long long celt_mips = 0;
#define MIPS_INC celt_mips++, #define MIPS_INC celt_mips++,
#define MULT16_16SU(a,b) ((celt_word32_t)(celt_word16_t)(a)*(celt_word32_t)(celt_uint16_t)(b)) #define MULT16_16SU(a,b) ((celt_word32_t)(celt_word16_t)(a)*(celt_word32_t)(celt_uint16_t)(b))
#define MULT32_32_Q31(a,b) ADD32(ADD32(SHL(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15)) #define MULT32_32_Q31(a,b) ADD32(ADD32(SHL32(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR32(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15))
#define QCONST16(x,bits) ((celt_word16_t)(.5+(x)*(((celt_word32_t)1)<<(bits)))) #define QCONST16(x,bits) ((celt_word16_t)(.5+(x)*(((celt_word32_t)1)<<(bits))))
#define QCONST32(x,bits) ((celt_word32_t)(.5+(x)*(((celt_word32_t)1)<<(bits)))) #define QCONST32(x,bits) ((celt_word32_t)(.5+(x)*(((celt_word32_t)1)<<(bits))))
......
...@@ -204,20 +204,14 @@ static inline celt_word32_t celt_exp2(celt_word16_t x) ...@@ -204,20 +204,14 @@ static inline celt_word32_t celt_exp2(celt_word16_t x)
/** Reciprocal approximation (Q15 input, Q16 output) */ /** Reciprocal approximation (Q15 input, Q16 output) */
static inline celt_word32_t celt_rcp(celt_word32_t x) static inline celt_word32_t celt_rcp(celt_word32_t x)
{ {
int i, neg=0; int i;
celt_word16_t n, frac; celt_word16_t n, frac;
const celt_word16_t C[5] = {21848, -7251, 2403, -934, 327}; const celt_word16_t C[5] = {21848, -7251, 2403, -934, 327};
if (x<0) celt_assert2(x>0, "celt_rcp() only defined for positive values");
{
neg = 1;
x = NEG16(x);
}
i = celt_ilog2(x); i = celt_ilog2(x);
n = VSHR32(x,i-16)-SHL32(EXTEND32(3),15); n = VSHR32(x,i-16)-SHL32(EXTEND32(3),15);
frac = ADD16(C[0], MULT16_16_Q15(n, ADD16(C[1], MULT16_16_Q15(n, ADD16(C[2], frac = ADD16(C[0], MULT16_16_Q15(n, ADD16(C[1], MULT16_16_Q15(n, ADD16(C[2],
MULT16_16_Q15(n, ADD16(C[3], MULT16_16_Q15(n, (C[4]))))))))); MULT16_16_Q15(n, ADD16(C[3], MULT16_16_Q15(n, (C[4])))))))));
if (neg)
frac = -frac;
return VSHR32(EXTEND32(frac),i-16); return VSHR32(EXTEND32(frac),i-16);
} }
......
...@@ -98,7 +98,6 @@ struct NBest { ...@@ -98,7 +98,6 @@ struct NBest {
celt_word32_t score; celt_word32_t score;
int sign; int sign;
int pos; int pos;
int orig;
celt_word32_t xy; celt_word32_t xy;
celt_word32_t yy; celt_word32_t yy;
celt_word32_t yp; celt_word32_t yp;
...@@ -110,6 +109,7 @@ void alg_quant(celt_norm_t *X, celt_mask_t *W, int N, int K, const celt_norm_t * ...@@ -110,6 +109,7 @@ void alg_quant(celt_norm_t *X, celt_mask_t *W, int N, int K, const celt_norm_t *
VARDECL(celt_norm_t, _ny); VARDECL(celt_norm_t, _ny);
VARDECL(int, _iy); VARDECL(int, _iy);
VARDECL(int, _iny); VARDECL(int, _iny);
VARDECL(int, signx);
celt_norm_t *y, *ny; celt_norm_t *y, *ny;
int *iy, *iny; int *iy, *iny;
int i, j; int i, j;
...@@ -130,11 +130,21 @@ void alg_quant(celt_norm_t *X, celt_mask_t *W, int N, int K, const celt_norm_t * ...@@ -130,11 +130,21 @@ void alg_quant(celt_norm_t *X, celt_mask_t *W, int N, int K, const celt_norm_t *
ALLOC(_ny, N, celt_norm_t); ALLOC(_ny, N, celt_norm_t);
ALLOC(_iy, N, int); ALLOC(_iy, N, int);
ALLOC(_iny, N, int); ALLOC(_iny, N, int);
ALLOC(signx, N, int);
y = _y; y = _y;
ny = _ny; ny = _ny;
iy = _iy; iy = _iy;
iny = _iny; iny = _iny;
for (j=0;j<N;j++)
{
if (X[j]>0)
signx[j]=1;
else
signx[j]=-1;
}
for (j=0;j<N;j++) for (j=0;j<N;j++)
{ {
Rpp = MAC16_16(Rpp, P[j],P[j]); Rpp = MAC16_16(Rpp, P[j],P[j]);
...@@ -174,7 +184,7 @@ void alg_quant(celt_norm_t *X, celt_mask_t *W, int N, int K, const celt_norm_t * ...@@ -174,7 +184,7 @@ void alg_quant(celt_norm_t *X, celt_mask_t *W, int N, int K, const celt_norm_t *
celt_word16_t s; celt_word16_t s;
/* Select sign based on X[j] alone */ /* Select sign based on X[j] alone */
if (X[j]>0) sign=1; else sign=-1; sign = signx[j];
s = SHL16(sign*pulsesAtOnce, yshift); s = SHL16(sign*pulsesAtOnce, yshift);
/* Updating the sums of the new pulse(s) */ /* Updating the sums of the new pulse(s) */
...@@ -204,7 +214,6 @@ void alg_quant(celt_norm_t *X, celt_mask_t *W, int N, int K, const celt_norm_t * ...@@ -204,7 +214,6 @@ void alg_quant(celt_norm_t *X, celt_mask_t *W, int N, int K, const celt_norm_t *
{ {
nbest.score = score; nbest.score = score;
nbest.pos = j; nbest.pos = j;
nbest.orig = 0;
nbest.sign = sign; nbest.sign = sign;
nbest.xy = Rxy; nbest.xy = Rxy;
nbest.yy = Ryy; nbest.yy = Ryy;
...@@ -212,7 +221,7 @@ void alg_quant(celt_norm_t *X, celt_mask_t *W, int N, int K, const celt_norm_t * ...@@ -212,7 +221,7 @@ void alg_quant(celt_norm_t *X, celt_mask_t *W, int N, int K, const celt_norm_t *
} }
} }
celt_assert2(nbest[0]->score > -VERY_LARGE32, "Could not find any match in VQ codebook. Something got corrupted somewhere."); celt_assert2(nbest.score > -VERY_LARGE32, "Could not find any match in VQ codebook. Something got corrupted somewhere.");
/* Only now that we've made the final choice, update ny/iny and others */ /* Only now that we've made the final choice, update ny/iny and others */
{ {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment