From aad281878d650f680a895730eaa386df7cb3e8f0 Mon Sep 17 00:00:00 2001
From: "Timothy B. Terriberry" <tterribe@xiph.org>
Date: Mon, 1 Dec 2014 10:47:25 -0800
Subject: [PATCH] Fix celt_pitch_xcorr_c signature.

This should not take an arch parameter, so it can properly be used
 as a fallback for accelerated versions which do not.
This patch instead provides a separate version which can call
 accelerated helpers for platforms that have taken that approach.
---
 celt/mips/pitch_mipsr1.h |  8 ++++++--
 celt/pitch.c             | 40 ++++++++++++++++++++++++----------------
 celt/pitch.h             | 19 ++++++++++++-------
 celt/x86/pitch_sse.h     |  5 +++--
 4 files changed, 45 insertions(+), 27 deletions(-)

diff --git a/celt/mips/pitch_mipsr1.h b/celt/mips/pitch_mipsr1.h
index 9c5dc34d6..e2f017e9c 100644
--- a/celt/mips/pitch_mipsr1.h
+++ b/celt/mips/pitch_mipsr1.h
@@ -58,8 +58,8 @@ static inline void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, c
    *xy2 = xy02;
 }
 
-#define OVERRIDE_XCORR_KERNEL
-static inline void xcorr_kernel(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len)
+static inline void xcorr_kernel_mips(const opus_val16 * x,
+      const opus_val16 * y, opus_val32 sum[4], int len)
 {
    int j;
    opus_val16 y_0, y_1, y_2, y_3;
@@ -151,4 +151,8 @@ static inline void xcorr_kernel(const opus_val16 * x, const opus_val16 * y, opus
    sum[3] = (opus_val32)sum_3;
 }
 
+#define OVERRIDE_XCORR_KERNEL
+#define xcorr_kernel(x, y, sum, len, arch) \
+    ((void)(arch), xcorr_kernel_mips(x, y, sum, len))
+
 #endif /* PITCH_MIPSR1_H */
diff --git a/celt/pitch.c b/celt/pitch.c
index 154c84843..436470309 100644
--- a/celt/pitch.c
+++ b/celt/pitch.c
@@ -214,25 +214,35 @@ void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x
    celt_fir5(x_lp, lpc2, x_lp, len>>1, mem);
 }
 
-#if 0 /* This is a simple version of the pitch correlation that should work
-         well on DSPs like Blackfin and TI C5x/C6x */
-
+/* Pure C implementation. */
 #ifdef FIXED_POINT
 opus_val32
 #else
 void
 #endif
-celt_pitch_xcorr(opus_val16 *x, opus_val16 *y, opus_val32 *xcorr, int len, int max_pitch)
+#if defined(OVERRIDE_PITCH_XCORR)
+celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
+      opus_val32 *xcorr, int len, int max_pitch)
+#else
+celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y,
+      opus_val32 *xcorr, int len, int max_pitch, int arch)
+#endif
 {
+
+#if 0 /* This is a simple version of the pitch correlation that should work
+         well on DSPs like Blackfin and TI C5x/C6x */
    int i, j;
 #ifdef FIXED_POINT
    opus_val32 maxcorr=1;
+#endif
+#if !defined(OVERRIDE_PITCH_XCORR)
+   (void)arch;
 #endif
    for (i=0;i<max_pitch;i++)
    {
       opus_val32 sum = 0;
       for (j=0;j<len;j++)
-         sum = MAC16_16(sum, x[j],y[i+j]);
+         sum = MAC16_16(sum, _x[j], _y[i+j]);
       xcorr[i] = sum;
 #ifdef FIXED_POINT
       maxcorr = MAX32(maxcorr, sum);
@@ -241,18 +251,8 @@ celt_pitch_xcorr(opus_val16 *x, opus_val16 *y, opus_val32 *xcorr, int len, int m
 #ifdef FIXED_POINT
    return maxcorr;
 #endif
-}
 
 #else /* Unrolled version of the pitch correlation -- runs faster on x86 and ARM */
-
-#ifdef FIXED_POINT
-opus_val32
-#else
-void
-#endif
-celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
-      opus_val32 *xcorr, int len, int max_pitch, int arch)
-{
    int i;
    /*The EDSP version requires that max_pitch is at least 1, and that _x is
       32-bit aligned.
@@ -265,7 +265,11 @@ celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
    for (i=0;i<max_pitch-3;i+=4)
    {
       opus_val32 sum[4]={0,0,0,0};
+#if defined(OVERRIDE_PITCH_XCORR)
+      xcorr_kernel_c(_x, _y+i, sum, len);
+#else
       xcorr_kernel(_x, _y+i, sum, len, arch);
+#endif
       xcorr[i]=sum[0];
       xcorr[i+1]=sum[1];
       xcorr[i+2]=sum[2];
@@ -281,7 +285,11 @@ celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
    for (;i<max_pitch;i++)
    {
       opus_val32 sum;
+#if defined(OVERRIDE_PITCH_XCORR)
+      sum = celt_inner_prod_c(_x, _y+i, len);
+#else
       sum = celt_inner_prod(_x, _y+i, len, arch);
+#endif
       xcorr[i] = sum;
 #ifdef FIXED_POINT
       maxcorr = MAX32(maxcorr, sum);
@@ -290,9 +298,9 @@ celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
 #ifdef FIXED_POINT
    return maxcorr;
 #endif
+#endif
 }
 
-#endif
 void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTRICT y,
                   int len, int max_pitch, int *pitch, int arch)
 {
diff --git a/celt/pitch.h b/celt/pitch.h
index 027ebd9b0..5c6e551ac 100644
--- a/celt/pitch.h
+++ b/celt/pitch.h
@@ -62,7 +62,6 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
 
 /* OPT: This is the kernel you really want to optimize. It gets used a lot
    by the prefilter and by the PLC. */
-#ifndef OVERRIDE_XCORR_KERNEL
 static OPUS_INLINE void xcorr_kernel_c(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len)
 {
    int j;
@@ -129,11 +128,9 @@ static OPUS_INLINE void xcorr_kernel_c(const opus_val16 * x, const opus_val16 *
    }
 }
 
-#if !defined(OPUS_X86_MAY_HAVE_SSE4_1)
+#ifndef OVERRIDE_XCORR_KERNEL
 #define xcorr_kernel(x, y, sum, len, arch) \
     ((void)(arch),xcorr_kernel_c(x, y, sum, len))
-#endif
-
 #endif /* OVERRIDE_XCORR_KERNEL */
 
 
@@ -177,7 +174,7 @@ opus_val32
 void
 #endif
 celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
-      opus_val32 *xcorr, int len, int max_pitch, int arch);
+      opus_val32 *xcorr, int len, int max_pitch);
 
 #if !defined(OVERRIDE_PITCH_XCORR)
 /*Is run-time CPU detection enabled on this platform?*/
@@ -191,12 +188,20 @@ void
 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
       const opus_val16 *, opus_val32 *, int, int);
 
+#  define OVERRIDE_PITCH_XCORR
 #  define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
   ((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \
         xcorr, len, max_pitch))
 # else
-#  define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
-  ((void)(arch),celt_pitch_xcorr_c(_x, _y, xcorr, len, max_pitch, arch))
+
+#ifdef FIXED_POINT
+opus_val32
+#else
+void
+#endif
+celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y,
+      opus_val32 *xcorr, int len, int max_pitch, int arch);
+
 # endif
 #endif
 
diff --git a/celt/x86/pitch_sse.h b/celt/x86/pitch_sse.h
index 837e8ae27..99d1919a2 100644
--- a/celt/x86/pitch_sse.h
+++ b/celt/x86/pitch_sse.h
@@ -43,14 +43,15 @@ void xcorr_kernel_sse4_1(
                     const opus_int16 *x,
                     const opus_int16 *y,
                     opus_val32       sum[4],
-                    int              len );
+                    int              len);
 
 extern void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
                     const opus_int16 *x,
                     const opus_int16 *y,
                     opus_val32       sum[4],
-                    int              len );
+                    int              len);
 
+#define OVERRIDE_XCORR_KERNEL
 #define xcorr_kernel(x, y, sum, len, arch) \
     ((*XCORR_KERNEL_IMPL[(arch) & OPUS_ARCHMASK])(x, y, sum, len))
 
-- 
GitLab