diff --git a/celt/mips/pitch_mipsr1.h b/celt/mips/pitch_mipsr1.h index 9c5dc34d69614e2d9a840acf45c935127655aca3..e2f017e9ce052a35ba035bbcf8656473d6d7ecd6 100644 --- a/celt/mips/pitch_mipsr1.h +++ b/celt/mips/pitch_mipsr1.h @@ -58,8 +58,8 @@ static inline void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, c *xy2 = xy02; } -#define OVERRIDE_XCORR_KERNEL -static inline void xcorr_kernel(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len) +static inline void xcorr_kernel_mips(const opus_val16 * x, + const opus_val16 * y, opus_val32 sum[4], int len) { int j; opus_val16 y_0, y_1, y_2, y_3; @@ -151,4 +151,8 @@ static inline void xcorr_kernel(const opus_val16 * x, const opus_val16 * y, opus sum[3] = (opus_val32)sum_3; } +#define OVERRIDE_XCORR_KERNEL +#define xcorr_kernel(x, y, sum, len, arch) \ + ((void)(arch), xcorr_kernel_mips(x, y, sum, len)) + #endif /* PITCH_MIPSR1_H */ diff --git a/celt/pitch.c b/celt/pitch.c index 154c84843befb26a05f4e42c12af7ea9cce61d00..43647030936454662b073b58e0d7ef271d440c26 100644 --- a/celt/pitch.c +++ b/celt/pitch.c @@ -214,25 +214,35 @@ void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x celt_fir5(x_lp, lpc2, x_lp, len>>1, mem); } -#if 0 /* This is a simple version of the pitch correlation that should work - well on DSPs like Blackfin and TI C5x/C6x */ - +/* Pure C implementation. */ #ifdef FIXED_POINT opus_val32 #else void #endif -celt_pitch_xcorr(opus_val16 *x, opus_val16 *y, opus_val32 *xcorr, int len, int max_pitch) +#if defined(OVERRIDE_PITCH_XCORR) +celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, + opus_val32 *xcorr, int len, int max_pitch) +#else +celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y, + opus_val32 *xcorr, int len, int max_pitch, int arch) +#endif { + +#if 0 /* This is a simple version of the pitch correlation that should work + well on DSPs like Blackfin and TI C5x/C6x */ int i, j; #ifdef FIXED_POINT opus_val32 maxcorr=1; +#endif +#if !defined(OVERRIDE_PITCH_XCORR) + (void)arch; #endif for (i=0;i<max_pitch;i++) { opus_val32 sum = 0; for (j=0;j<len;j++) - sum = MAC16_16(sum, x[j],y[i+j]); + sum = MAC16_16(sum, _x[j], _y[i+j]); xcorr[i] = sum; #ifdef FIXED_POINT maxcorr = MAX32(maxcorr, sum); @@ -241,18 +251,8 @@ celt_pitch_xcorr(opus_val16 *x, opus_val16 *y, opus_val32 *xcorr, int len, int m #ifdef FIXED_POINT return maxcorr; #endif -} #else /* Unrolled version of the pitch correlation -- runs faster on x86 and ARM */ - -#ifdef FIXED_POINT -opus_val32 -#else -void -#endif -celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, - opus_val32 *xcorr, int len, int max_pitch, int arch) -{ int i; /*The EDSP version requires that max_pitch is at least 1, and that _x is 32-bit aligned. @@ -265,7 +265,11 @@ celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, for (i=0;i<max_pitch-3;i+=4) { opus_val32 sum[4]={0,0,0,0}; +#if defined(OVERRIDE_PITCH_XCORR) + xcorr_kernel_c(_x, _y+i, sum, len); +#else xcorr_kernel(_x, _y+i, sum, len, arch); +#endif xcorr[i]=sum[0]; xcorr[i+1]=sum[1]; xcorr[i+2]=sum[2]; @@ -281,7 +285,11 @@ celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, for (;i<max_pitch;i++) { opus_val32 sum; +#if defined(OVERRIDE_PITCH_XCORR) + sum = celt_inner_prod_c(_x, _y+i, len); +#else sum = celt_inner_prod(_x, _y+i, len, arch); +#endif xcorr[i] = sum; #ifdef FIXED_POINT maxcorr = MAX32(maxcorr, sum); @@ -290,9 +298,9 @@ celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, #ifdef FIXED_POINT return maxcorr; #endif +#endif } -#endif void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTRICT y, int len, int max_pitch, int *pitch, int arch) { diff --git a/celt/pitch.h b/celt/pitch.h index 027ebd9b06b077640c40858d50f36ee4b0f91413..5c6e551ac987e1a417ca46557b4db48015530344 100644 --- a/celt/pitch.h +++ b/celt/pitch.h @@ -62,7 +62,6 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, /* OPT: This is the kernel you really want to optimize. It gets used a lot by the prefilter and by the PLC. */ -#ifndef OVERRIDE_XCORR_KERNEL static OPUS_INLINE void xcorr_kernel_c(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len) { int j; @@ -129,11 +128,9 @@ static OPUS_INLINE void xcorr_kernel_c(const opus_val16 * x, const opus_val16 * } } -#if !defined(OPUS_X86_MAY_HAVE_SSE4_1) +#ifndef OVERRIDE_XCORR_KERNEL #define xcorr_kernel(x, y, sum, len, arch) \ ((void)(arch),xcorr_kernel_c(x, y, sum, len)) -#endif - #endif /* OVERRIDE_XCORR_KERNEL */ @@ -177,7 +174,7 @@ opus_val32 void #endif celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, - opus_val32 *xcorr, int len, int max_pitch, int arch); + opus_val32 *xcorr, int len, int max_pitch); #if !defined(OVERRIDE_PITCH_XCORR) /*Is run-time CPU detection enabled on this platform?*/ @@ -191,12 +188,20 @@ void (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, const opus_val16 *, opus_val32 *, int, int); +# define OVERRIDE_PITCH_XCORR # define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \ ((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \ xcorr, len, max_pitch)) # else -# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \ - ((void)(arch),celt_pitch_xcorr_c(_x, _y, xcorr, len, max_pitch, arch)) + +#ifdef FIXED_POINT +opus_val32 +#else +void +#endif +celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y, + opus_val32 *xcorr, int len, int max_pitch, int arch); + # endif #endif diff --git a/celt/x86/pitch_sse.h b/celt/x86/pitch_sse.h index 837e8ae27984d0d887b91311ffbb33ce98381ccb..99d1919a2e62579f40f1d641099bbd7691c9f2f2 100644 --- a/celt/x86/pitch_sse.h +++ b/celt/x86/pitch_sse.h @@ -43,14 +43,15 @@ void xcorr_kernel_sse4_1( const opus_int16 *x, const opus_int16 *y, opus_val32 sum[4], - int len ); + int len); extern void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])( const opus_int16 *x, const opus_int16 *y, opus_val32 sum[4], - int len ); + int len); +#define OVERRIDE_XCORR_KERNEL #define xcorr_kernel(x, y, sum, len, arch) \ ((*XCORR_KERNEL_IMPL[(arch) & OPUS_ARCHMASK])(x, y, sum, len))