From d503125101116d2b399287824d7902b6351b691d Mon Sep 17 00:00:00 2001
From: "Timothy B. Terriberry" <tterribe@xiph.org>
Date: Fri, 9 Feb 2024 17:26:35 -0500
Subject: [PATCH] Add check-asm for fixed-point xcorr_kernel().

Compare the output of xcorr_kernel() against the results of
 xcorr_kernel_c() when configured with --enable-check-asm.
Currently this is only checked in fixed point, as a float check
 requires more sophisticated error analysis and may need to be
 customized for each vector implementation.

Signed-off-by: Jean-Marc Valin <jmvalin@jmvalin.ca>
---
 celt/celt_lpc.c            | 25 ++++++++++++++++++++++---
 celt/pitch.c               | 11 ++++++++++-
 celt/x86/celt_lpc_sse4_1.c | 13 ++++++++++---
 3 files changed, 42 insertions(+), 7 deletions(-)

diff --git a/celt/celt_lpc.c b/celt/celt_lpc.c
index 518eac1f7..fabca65cb 100644
--- a/celt/celt_lpc.c
+++ b/celt/celt_lpc.c
@@ -158,7 +158,17 @@ void celt_fir_c(
       sum[1] = SHL32(EXTEND32(x[i+1]), SIG_SHIFT);
       sum[2] = SHL32(EXTEND32(x[i+2]), SIG_SHIFT);
       sum[3] = SHL32(EXTEND32(x[i+3]), SIG_SHIFT);
-      xcorr_kernel(rnum, x+i-ord, sum, ord, arch);
+#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
+      {
+         opus_val32 sum_c[4];
+         memcpy(sum_c, sum, sizeof(sum_c));
+         xcorr_kernel_c(rnum, x+i-ord, sum_c, ord);
+#endif
+         xcorr_kernel(rnum, x+i-ord, sum, ord, arch);
+#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
+         celt_assert(memcmp(sum, sum_c, sizeof(sum)) == 0);
+      }
+#endif
       y[i  ] = SROUND16(sum[0], SIG_SHIFT);
       y[i+1] = SROUND16(sum[1], SIG_SHIFT);
       y[i+2] = SROUND16(sum[2], SIG_SHIFT);
@@ -222,8 +232,17 @@ void celt_iir(const opus_val32 *_x,
       sum[1]=_x[i+1];
       sum[2]=_x[i+2];
       sum[3]=_x[i+3];
-      xcorr_kernel(rden, y+i, sum, ord, arch);
-
+#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
+      {
+         opus_val32 sum_c[4];
+         memcpy(sum_c, sum, sizeof(sum_c));
+         xcorr_kernel_c(rden, y+i, sum_c, ord);
+#endif
+         xcorr_kernel(rden, y+i, sum, ord, arch);
+#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
+         celt_assert(memcmp(sum, sum_c, sizeof(sum)) == 0);
+      }
+#endif
       /* Patch up the result to compensate for the fact that this is an IIR */
       y[i+ord  ] = -SROUND16(sum[0],SIG_SHIFT);
       _y[i  ] = sum[0];
diff --git a/celt/pitch.c b/celt/pitch.c
index 7998db416..e33c60a3b 100644
--- a/celt/pitch.c
+++ b/celt/pitch.c
@@ -262,7 +262,16 @@ celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
    for (i=0;i<max_pitch-3;i+=4)
    {
       opus_val32 sum[4]={0,0,0,0};
-      xcorr_kernel(_x, _y+i, sum, len, arch);
+#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
+      {
+         opus_val32 sum_c[4]={0,0,0,0};
+         xcorr_kernel_c(_x, _y+i, sum_c, len);
+#endif
+         xcorr_kernel(_x, _y+i, sum, len, arch);
+#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
+         celt_assert(memcmp(sum, sum_c, sizeof(sum)) == 0);
+      }
+#endif
       xcorr[i]=sum[0];
       xcorr[i+1]=sum[1];
       xcorr[i+2]=sum[2];
diff --git a/celt/x86/celt_lpc_sse4_1.c b/celt/x86/celt_lpc_sse4_1.c
index 547856884..fa0152c5b 100644
--- a/celt/x86/celt_lpc_sse4_1.c
+++ b/celt/x86/celt_lpc_sse4_1.c
@@ -64,9 +64,16 @@ void celt_fir_sse4_1(const opus_val16 *x,
    {
       opus_val32 sums[4] = {0};
       __m128i vecSum, vecX;
-
-      xcorr_kernel(rnum, x+i-ord, sums, ord, arch);
-
+#if defined(OPUS_CHECK_ASM)
+      {
+         opus_val32 sums_c[4] = {0};
+         xcorr_kernel_c(rnum, x+i-ord, sums, ord);
+#endif
+         xcorr_kernel(rnum, x+i-ord, sums, ord, arch);
+#if defined(OPUS_CHECK_ASM)
+         celt_assert(memcmp(sums, sums_c, sizeof(sums)) == 0);
+      }
+#endif
       vecSum = _mm_loadu_si128((__m128i *)sums);
       vecSum = _mm_add_epi32(vecSum, vecNoA);
       vecSum = _mm_srai_epi32(vecSum, SIG_SHIFT);
-- 
GitLab