Skip to content
Snippets Groups Projects
Commit 85ede2c6 authored by Timothy B. Terriberry's avatar Timothy B. Terriberry
Browse files

Use more MAC16_16's and unroll a loop.

This splits out the non-arch-specific portions of a patch written
 by Aurélien Zanelli <aurelien.zanelli@parrot.com
 http://lists.xiph.org/pipermail/opus/2013-May/002088.html

I also added support for odd n, for custom modes.

0.25% speedup on 96 kbps stereo encode+decode on a Cortex A8.
parent 2040606f
No related branches found
No related tags found
No related merge requests found
...@@ -101,7 +101,7 @@ void celt_fir(const opus_val16 *x, ...@@ -101,7 +101,7 @@ void celt_fir(const opus_val16 *x,
opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT); opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT);
for (j=0;j<ord;j++) for (j=0;j<ord;j++)
{ {
sum += MULT16_16(num[j],mem[j]); sum = MAC16_16(sum,num[j],mem[j]);
} }
for (j=ord-1;j>=1;j--) for (j=ord-1;j>=1;j--)
{ {
...@@ -161,11 +161,16 @@ void _celt_autocorr( ...@@ -161,11 +161,16 @@ void _celt_autocorr(
} }
#ifdef FIXED_POINT #ifdef FIXED_POINT
{ {
opus_val32 ac0=0; opus_val32 ac0;
int shift; int shift;
for(i=0;i<n;i++) int n2;
ac0 = 1+n;
if (n&1) ac0 += SHR32(MULT16_16(xx[0],xx[0]),9);
for(i=(n&1);i<n;i+=2)
{
ac0 += SHR32(MULT16_16(xx[i],xx[i]),9); ac0 += SHR32(MULT16_16(xx[i],xx[i]),9);
ac0 += 1+n; ac0 += SHR32(MULT16_16(xx[i+1],xx[i+1]),9);
}
shift = celt_ilog2(ac0)-30+10; shift = celt_ilog2(ac0)-30+10;
shift = (shift+1)/2; shift = (shift+1)/2;
...@@ -176,7 +181,7 @@ void _celt_autocorr( ...@@ -176,7 +181,7 @@ void _celt_autocorr(
while (lag>=0) while (lag>=0)
{ {
for (i = lag, d = 0; i < n; i++) for (i = lag, d = 0; i < n; i++)
d += xx[i] * xx[i-lag]; d = MAC16_16(d, xx[i], xx[i-lag]);
ac[lag] = d; ac[lag] = d;
/*printf ("%f ", ac[lag]);*/ /*printf ("%f ", ac[lag]);*/
lag--; lag--;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment