From 02fed471a4568852d6618e041c4f2af0d7730ee2 Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin <jmvalin@jmvalin.ca> Date: Thu, 29 Aug 2013 15:29:02 -0400 Subject: [PATCH] Implements fixed-point silk_LPC_analysis_filter() in terms of celt_fir() Saves 2.5% on ARM without any asm. The float build still uses the old code because celt_fir() then becomes a float function. --- celt/arch.h | 1 + celt/celt_lpc.c | 13 ++++++------- celt/fixed_generic.h | 2 ++ silk/LPC_analysis_filter.c | 23 ++++++++++++++++++++++- 4 files changed, 31 insertions(+), 8 deletions(-) diff --git a/celt/arch.h b/celt/arch.h index e497a4d92..78e2635f0 100644 --- a/celt/arch.h +++ b/celt/arch.h @@ -163,6 +163,7 @@ typedef float celt_ener; #define SHR(a,shift) (a) #define SHL(a,shift) (a) #define SATURATE(x,a) (x) +#define SATURATE16(x) (x) #define ROUND16(a,shift) (a) #define HALF16(x) (.5f*(x)) diff --git a/celt/celt_lpc.c b/celt/celt_lpc.c index 523c36aae..7ffe90a35 100644 --- a/celt/celt_lpc.c +++ b/celt/celt_lpc.c @@ -118,25 +118,24 @@ void celt_fir(const opus_val16 *_x, { sum = MAC16_16(sum,rnum[j],x[i+j]); } - _y[i] = ROUND16(sum, SIG_SHIFT); + _y[i] = SATURATE16(PSHR32(sum, SIG_SHIFT)); } #else - celt_assert((ord&3)==0); for (i=0;i<N-3;i+=4) { opus_val32 sum[4]={0,0,0,0}; xcorr_kernel(rnum, x+i, sum, ord); - _y[i ] = ADD16(_x[i ], ROUND16(sum[0], SIG_SHIFT)); - _y[i+1] = ADD16(_x[i+1], ROUND16(sum[1], SIG_SHIFT)); - _y[i+2] = ADD16(_x[i+2], ROUND16(sum[2], SIG_SHIFT)); - _y[i+3] = ADD16(_x[i+3], ROUND16(sum[3], SIG_SHIFT)); + _y[i ] = SATURATE16(ADD32(EXTEND32(_x[i ]), PSHR32(sum[0], SIG_SHIFT))); + _y[i+1] = SATURATE16(ADD32(EXTEND32(_x[i+1]), PSHR32(sum[1], SIG_SHIFT))); + _y[i+2] = SATURATE16(ADD32(EXTEND32(_x[i+2]), PSHR32(sum[2], SIG_SHIFT))); + _y[i+3] = SATURATE16(ADD32(EXTEND32(_x[i+3]), PSHR32(sum[3], SIG_SHIFT))); } for (;i<N;i++) { opus_val32 sum = 0; for (j=0;j<ord;j++) sum = MAC16_16(sum,rnum[j],x[i+j]); - _y[i] = ADD16(_x[i ], ROUND16(sum, SIG_SHIFT)); + _y[i] = SATURATE16(ADD32(EXTEND32(_x[i]), PSHR32(sum, SIG_SHIFT))); } #endif RESTORE_STACK; diff --git a/celt/fixed_generic.h b/celt/fixed_generic.h index ac01a4345..0e77976e8 100644 --- a/celt/fixed_generic.h +++ b/celt/fixed_generic.h @@ -84,6 +84,8 @@ #define PSHR(a,shift) (SHR((a)+((EXTEND32(1)<<((shift))>>1)),shift)) #define SATURATE(x,a) (((x)>(a) ? (a) : (x)<-(a) ? -(a) : (x))) +#define SATURATE16(x) (EXTRACT16((x)>32767 ? 32767 : (x)<-32768 ? -32768 : (x))) + /** Shift by a and round-to-neareast 32-bit value. Result is a 16-bit value */ #define ROUND16(x,a) (EXTRACT16(PSHR32((x),(a)))) /** Divide by two */ diff --git a/silk/LPC_analysis_filter.c b/silk/LPC_analysis_filter.c index e2c0c88b1..9bfeba0f5 100644 --- a/silk/LPC_analysis_filter.c +++ b/silk/LPC_analysis_filter.c @@ -30,6 +30,7 @@ POSSIBILITY OF SUCH DAMAGE. #endif #include "SigProc_FIX.h" +#include "celt_lpc.h" /*******************************************/ /* LPC analysis filter */ @@ -46,14 +47,33 @@ void silk_LPC_analysis_filter( const opus_int32 d /* I Filter order */ ) { - opus_int ix, j; + opus_int j; +#ifdef FIXED_POINT + opus_int16 mem[SILK_MAX_ORDER_LPC]; + opus_int16 num[SILK_MAX_ORDER_LPC]; +#else + int ix; opus_int32 out32_Q12, out32; const opus_int16 *in_ptr; +#endif silk_assert( d >= 6 ); silk_assert( (d & 1) == 0 ); silk_assert( d <= len ); +#ifdef FIXED_POINT + silk_assert( d <= SILK_MAX_ORDER_LPC ); + for ( j = 0; j < d; j++ ) { + num[ j ] = -B[ j ]; + } + for (j=0;j<d;j++) { + mem[ j ] = in[ d - j - 1 ]; + } + celt_fir( in + d, num, out + d, len - d, d, mem ); + for ( j = 0; j < d; j++ ) { + out[ j ] = 0; + } +#else for( ix = d; ix < len; ix++ ) { in_ptr = &in[ ix - 1 ]; @@ -82,4 +102,5 @@ void silk_LPC_analysis_filter( /* Set first d output samples to zero */ silk_memset( out, 0, d * sizeof( opus_int16 ) ); +#endif } -- GitLab