Commit 1dab60cc authored by Jean-Marc Valin's avatar Jean-Marc Valin
Browse files

Better use of the arithmetic operators

parent b781877e
......@@ -114,6 +114,20 @@ struct kiss_fft_state{
do { DIVSCALAR( (c).r , div); \
DIVSCALAR( (c).i , div); }while (0)
#define C_ADD( res, a,b)\
do {(res).r=ADD32((a).r,(b).r); (res).i=ADD32((a).i,(b).i); \
}while(0)
#define C_SUB( res, a,b)\
do {(res).r=SUB32((a).r,(b).r); (res).i=SUB32((a).i,(b).i); \
}while(0)
#define C_ADDTO( res , a)\
do {(res).r = ADD32((res).r, (a).r); (res).i = ADD32((res).i,(a).i);\
}while(0)
#define C_SUBFROM( res , a)\
do {(res).r = ADD32((res).r,(a).r); (res).i = SUB32((res).i,(a).i); \
}while(0)
#else /* MIXED_PRECISION */
# define sround4( x ) (kiss_fft_scalar)( ( (x) + ((SAMPPROD)1<<(FRACBITS-1)) ) >> (FRACBITS+2) )
......@@ -165,10 +179,13 @@ struct kiss_fft_state{
(c).i *= (s); }while(0)
#endif
#ifndef CHECK_OVERFLOW_OP
# define CHECK_OVERFLOW_OP(a,op,b) /* noop */
#endif
#ifndef C_ADD
#define C_ADD( res, a,b)\
do { \
CHECK_OVERFLOW_OP((a).r,+,(b).r)\
......@@ -194,7 +211,7 @@ struct kiss_fft_state{
CHECK_OVERFLOW_OP((res).i,-,(a).i)\
(res).r -= (a).r; (res).i -= (a).i; \
}while(0)
#endif /* C_ADD defined */
#ifdef FIXED_POINT
/*# define KISS_FFT_COS(phase) TRIG_UPSCALE*floor(MIN(32767,MAX(-32767,.5+32768 * cos (phase))))
......
......@@ -69,8 +69,8 @@ void compute_band_energies(const CELTMode *m, const celt_sig_t *X, celt_ener_t *
{
int shift = celt_ilog2(maxval)-10;
j=eBands[i]; do {
sum += MULT16_16(EXTRACT16(VSHR32(X[j*C+c],shift)),
EXTRACT16(VSHR32(X[j*C+c],shift)));
sum = MAC16_16(sum, EXTRACT16(VSHR32(X[j*C+c],shift)),
EXTRACT16(VSHR32(X[j*C+c],shift)));
} while (++j<eBands[i+1]);
/* We're adding one here to make damn sure we never end up with a pitch vector that's
larger than unity norm */
......@@ -250,8 +250,8 @@ static void intensity_band(celt_norm_t * restrict X, int len)
for (j=0;j<len;j++)
{
X[j] = X[2*j];
E += MULT16_16(X[j],X[j]);
E2 += MULT16_16(X[2*j+1],X[2*j+1]);
E = MAC16_16(E, X[j],X[j]);
E2 = MAC16_16(E2, X[2*j+1],X[2*j+1]);
}
#ifndef FIXED_POINT
E = celt_sqrt(E+E2)/celt_sqrt(E);
......
......@@ -617,8 +617,8 @@ int celt_encode_float(CELTEncoder * restrict st, celt_sig_t * restrict pcm, unsi
int j;
for (j=0;j<N;j++)
{
celt_sig_t tmp = ADD32(st->out_mem[C*(MAX_PERIOD-N)+C*j+c],
MULT16_32_Q15(preemph,st->preemph_memD[c]));
celt_sig_t tmp = MAC16_32_Q15(st->out_mem[C*(MAX_PERIOD-N)+C*j+c],
preemph,st->preemph_memD[c]);
st->preemph_memD[c] = tmp;
pcm[C*j+c] = SCALEOUT(SIG2WORD16(tmp));
}
......@@ -825,8 +825,8 @@ static void celt_decode_lost(CELTDecoder * restrict st, celt_word16_t * restrict
int j;
for (j=0;j<N;j++)
{
celt_sig_t tmp = ADD32(st->out_mem[C*(MAX_PERIOD-N)+C*j+c],
MULT16_32_Q15(preemph,st->preemph_memD[c]));
celt_sig_t tmp = MAC16_32_Q15(st->out_mem[C*(MAX_PERIOD-N)+C*j+c],
preemph,st->preemph_memD[c]);
st->preemph_memD[c] = tmp;
pcm[C*j+c] = SCALEOUT(SIG2WORD16(tmp));
}
......@@ -994,8 +994,8 @@ int celt_decode_float(CELTDecoder * restrict st, unsigned char *data, int len, c
int j;
for (j=0;j<N;j++)
{
celt_sig_t tmp = ADD32(st->out_mem[C*(MAX_PERIOD-N)+C*j+c],
MULT16_32_Q15(preemph,st->preemph_memD[c]));
celt_sig_t tmp = MAC16_32_Q15(st->out_mem[C*(MAX_PERIOD-N)+C*j+c],
preemph,st->preemph_memD[c]);
st->preemph_memD[c] = tmp;
pcm[C*j+c] = SCALEOUT(SIG2WORD16(tmp));
}
......
......@@ -373,7 +373,7 @@ static inline int MULT16_32_PX(int a, long long b, int Q)
#define MULT16_32_Q14(a,b) MULT16_32_QX(a,b,14)
#define MULT16_32_Q15(a,b) MULT16_32_QX(a,b,15)
#define MULT16_32_P15(a,b) MULT16_32_PX(a,b,15)
#define MAC16_32_Q15(c,a,b) ADD32((c),MULT16_32_Q15((a),(b)))
#define MAC16_32_Q15(c,a,b) (celt_mips-=2,ADD32((c),MULT16_32_Q15((a),(b))))
static inline int SATURATE(int a, int b)
{
......
......@@ -45,6 +45,7 @@ static void mix_pitch_and_residual(int * restrict iy, celt_norm_t * restrict X,
{
int i;
celt_word32_t Ryp, Ryy, Rpp;
celt_word16_t ryp, ryy, rpp;
celt_word32_t g;
VARDECL(celt_norm_t, y);
#ifdef FIXED_POINT
......@@ -74,16 +75,16 @@ static void mix_pitch_and_residual(int * restrict iy, celt_norm_t * restrict X,
Ryy = MAC16_16(Ryy, y[i], y[i]);
} while (++i < N);
ryp = ROUND16(Ryp,14);
ryy = ROUND16(Ryy,14);
rpp = ROUND16(Rpp,14);
/* g = (sqrt(Ryp^2 + Ryy - Rpp*Ryy)-Ryp)/Ryy */
g = MULT16_32_Q15(
celt_sqrt(MULT16_16(ROUND16(Ryp,14),ROUND16(Ryp,14)) + Ryy -
MULT16_16(ROUND16(Ryy,14),ROUND16(Rpp,14)))
- ROUND16(Ryp,14),
celt_rcp(SHR32(Ryy,9)));
g = MULT16_32_Q15(celt_sqrt(MAC16_16(Ryy, ryp,ryp) - MULT16_16(ryy,rpp)) - ryp,
celt_rcp(SHR32(Ryy,9)));
i=0;
do
X[i] = P[i] + ROUND16(MULT16_16(y[i], g),11);
X[i] = ADD16(P[i], ROUND16(MULT16_16(y[i], g),11));
while (++i < N);
RESTORE_STACK;
......@@ -94,7 +95,7 @@ void alg_quant(celt_norm_t *X, celt_mask_t *W, int N, int K, const celt_norm_t *
{
VARDECL(celt_norm_t, y);
VARDECL(int, iy);
VARDECL(int, signx);
VARDECL(celt_word16_t, signx);
int j, is;
celt_word16_t s;
int pulsesLeft;
......@@ -113,7 +114,7 @@ void alg_quant(celt_norm_t *X, celt_mask_t *W, int N, int K, const celt_norm_t *
ALLOC(y, N, celt_norm_t);
ALLOC(iy, N, int);
ALLOC(signx, N, int);
ALLOC(signx, N, celt_word16_t);
N_1 = 512/N;
sum = 0;
......@@ -154,7 +155,7 @@ void alg_quant(celt_norm_t *X, celt_mask_t *W, int N, int K, const celt_norm_t *
best_id = 0;
/* The squared magnitude term gets added anyway, so we might as well
add it outside the loop */
yy = ADD32(yy, MULT16_16(magnitude,magnitude));
yy = MAC16_16(yy, magnitude,magnitude);
/* Choose between fast and accurate strategy depending on where we are in the search */
if (pulsesLeft>1)
{
......@@ -165,11 +166,11 @@ void alg_quant(celt_norm_t *X, celt_mask_t *W, int N, int K, const celt_norm_t *
do {
celt_word16_t Rxy, Ryy;
/* Select sign based on X[j] alone */
s = signx[j]*magnitude;
s = MULT16_16(signx[j],magnitude);
/* Temporary sums of the new pulse(s) */
Rxy = EXTRACT16(SHR32(xy + MULT16_16(s,X[j]),rshift));
Rxy = EXTRACT16(SHR32(MAC16_16(xy, s,X[j]),rshift));
/* We're multiplying y[j] by two so we don't have to do it here */
Ryy = EXTRACT16(SHR32(yy + MULT16_16(s,y[j]),rshift));
Ryy = EXTRACT16(SHR32(MAC16_16(yy, s,y[j]),rshift));
/* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that
Rxy is positive because the sign is pre-computed) */
......@@ -193,12 +194,12 @@ void alg_quant(celt_norm_t *X, celt_mask_t *W, int N, int K, const celt_norm_t *
celt_word16_t Rxy, Ryy, Ryp;
celt_word16_t num;
/* Select sign based on X[j] alone */
s = signx[j]*magnitude;
s = MULT16_16(signx[j],magnitude);
/* Temporary sums of the new pulse(s) */
Rxy = ROUND16(xy + MULT16_16(s,X[j]), 14);
Rxy = ROUND16(MAC16_16(xy, s,X[j]), 14);
/* We're multiplying y[j] by two so we don't have to do it here */
Ryy = ROUND16(yy + MULT16_16(s,y[j]), 14);
Ryp = ROUND16(yp + MULT16_16(s,P[j]), 14);
Ryy = ROUND16(MAC16_16(yy, s,y[j]), 14);
Ryp = ROUND16(MAC16_16(yp, s,P[j]), 14);
/* Compute the gain such that ||p + g*y|| = 1
...but instead, we compute g*Ryy to avoid dividing */
......@@ -222,7 +223,7 @@ void alg_quant(celt_norm_t *X, celt_mask_t *W, int N, int K, const celt_norm_t *
}
j = best_id;
is = signx[j]*pulsesAtOnce;
is = MULT16_16(signx[j],pulsesAtOnce);
s = SHL16(is, yshift);
/* Updating the sums of the new pulse(s) */
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment