Skip to content
Snippets Groups Projects
Commit 1632152b authored by Radu Velea's avatar Radu Velea Committed by Timothy B. Terriberry
Browse files

Adding AVX config switches

parent bb0e1e0d
No related branches found
No related tags found
No related merge requests found
......@@ -48,13 +48,14 @@
(defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1))
#include "x86/x86cpu.h"
/* We currently support 4 x86 variants:
/* We currently support 5 x86 variants:
* arch[0] -> non-sse
* arch[1] -> sse
* arch[2] -> sse2
* arch[3] -> sse4.1
* arch[4] -> avx
*/
#define OPUS_ARCHMASK 3
#define OPUS_ARCHMASK 7
int opus_select_arch(void);
#else
......
......@@ -53,6 +53,7 @@ void (*const CELT_FIR_IMPL[OPUS_ARCHMASK + 1])(
celt_fir_c,
celt_fir_c,
MAY_HAVE_SSE4_1(celt_fir), /* sse4.1 */
MAY_HAVE_SSE4_1(celt_fir) /* avx */
};
void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
......@@ -65,6 +66,7 @@ void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
xcorr_kernel_c,
xcorr_kernel_c,
MAY_HAVE_SSE4_1(xcorr_kernel), /* sse4.1 */
MAY_HAVE_SSE4_1(xcorr_kernel) /* avx */
};
#endif
......@@ -81,6 +83,7 @@ opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
celt_inner_prod_c,
MAY_HAVE_SSE2(celt_inner_prod),
MAY_HAVE_SSE4_1(celt_inner_prod), /* sse4.1 */
MAY_HAVE_SSE4_1(celt_inner_prod) /* avx */
};
#endif
......@@ -99,6 +102,7 @@ void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
MAY_HAVE_SSE(xcorr_kernel),
MAY_HAVE_SSE(xcorr_kernel),
MAY_HAVE_SSE(xcorr_kernel),
MAY_HAVE_SSE(xcorr_kernel)
};
opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
......@@ -110,6 +114,7 @@ opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
MAY_HAVE_SSE(celt_inner_prod),
MAY_HAVE_SSE(celt_inner_prod),
MAY_HAVE_SSE(celt_inner_prod),
MAY_HAVE_SSE(celt_inner_prod)
};
void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
......@@ -124,6 +129,7 @@ void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
MAY_HAVE_SSE(dual_inner_prod),
MAY_HAVE_SSE(dual_inner_prod),
MAY_HAVE_SSE(dual_inner_prod),
MAY_HAVE_SSE(dual_inner_prod)
};
void (*const COMB_FILTER_CONST_IMPL[OPUS_ARCHMASK + 1])(
......@@ -139,6 +145,7 @@ void (*const COMB_FILTER_CONST_IMPL[OPUS_ARCHMASK + 1])(
MAY_HAVE_SSE(comb_filter_const),
MAY_HAVE_SSE(comb_filter_const),
MAY_HAVE_SSE(comb_filter_const),
MAY_HAVE_SSE(comb_filter_const)
};
......
......@@ -91,6 +91,8 @@ typedef struct CPU_Feature{
int HW_SSE;
int HW_SSE2;
int HW_SSE41;
/* SIMD: 256-bit */
int HW_AVX;
} CPU_Feature;
static void opus_cpu_feature_check(CPU_Feature *cpu_feature)
......@@ -106,11 +108,13 @@ static void opus_cpu_feature_check(CPU_Feature *cpu_feature)
cpu_feature->HW_SSE = (info[3] & (1 << 25)) != 0;
cpu_feature->HW_SSE2 = (info[3] & (1 << 26)) != 0;
cpu_feature->HW_SSE41 = (info[2] & (1 << 19)) != 0;
cpu_feature->HW_AVX = (info[2] & (1 << 28)) != 0;
}
else {
cpu_feature->HW_SSE = 0;
cpu_feature->HW_SSE2 = 0;
cpu_feature->HW_SSE41 = 0;
cpu_feature->HW_AVX = 0;
}
}
......@@ -140,6 +144,12 @@ int opus_select_arch(void)
}
arch++;
if (!cpu_feature.HW_AVX)
{
return arch;
}
arch++;
return arch;
}
......
......@@ -351,10 +351,12 @@ AM_CONDITIONAL([OPUS_ARM_EXTERNAL_ASM],
AM_CONDITIONAL([HAVE_SSE], [false])
AM_CONDITIONAL([HAVE_SSE2], [false])
AM_CONDITIONAL([HAVE_SSE4_1], [false])
AM_CONDITIONAL([HAVE_AVX], [false])
m4_define([DEFAULT_X86_SSE_CFLAGS], [-msse])
m4_define([DEFAULT_X86_SSE2_CFLAGS], [-msse2])
m4_define([DEFAULT_X86_SSE4_1_CFLAGS], [-msse4.1])
m4_define([DEFAULT_X86_AVX_CFLAGS], [-mavx])
m4_define([DEFAULT_ARM_NEON_INTR_CFLAGS], [-mfpu=neon])
# With GCC on ARM32 softfp architectures (e.g. Android, or older Ubuntu) you need to specify
# -mfloat-abi=softfp for -mfpu=neon to work. However, on ARM32 hardfp architectures (e.g. newer Ubuntu),
......@@ -371,11 +373,13 @@ AS_CASE([$host],
AC_ARG_VAR([X86_SSE_CFLAGS], [C compiler flags to compile SSE intrinsics @<:@default=]DEFAULT_X86_SSE_CFLAGS[@:>@])
AC_ARG_VAR([X86_SSE2_CFLAGS], [C compiler flags to compile SSE2 intrinsics @<:@default=]DEFAULT_X86_SSE2_CFLAGS[@:>@])
AC_ARG_VAR([X86_SSE4_1_CFLAGS], [C compiler flags to compile SSE4.1 intrinsics @<:@default=]DEFAULT_X86_SSE4_1_CFLAGS[@:>@])
AC_ARG_VAR([X86_AVX_CFLAGS], [C compiler flags to compile AVX intrinsics @<:@default=]DEFAULT_X86_AVX_CFLAGS[@:>@])
AC_ARG_VAR([ARM_NEON_INTR_CFLAGS], [C compiler flags to compile ARM NEON intrinsics @<:@default=]DEFAULT_ARM_NEON_INTR_CFLAGS / DEFAULT_ARM_NEON_SOFTFP_INTR_CFLAGS[@:>@])
AS_VAR_SET_IF([X86_SSE_CFLAGS], [], [AS_VAR_SET([X86_SSE_CFLAGS], "DEFAULT_X86_SSE_CFLAGS")])
AS_VAR_SET_IF([X86_SSE2_CFLAGS], [], [AS_VAR_SET([X86_SSE2_CFLAGS], "DEFAULT_X86_SSE2_CFLAGS")])
AS_VAR_SET_IF([X86_SSE4_1_CFLAGS], [], [AS_VAR_SET([X86_SSE4_1_CFLAGS], "DEFAULT_X86_SSE4_1_CFLAGS")])
AS_VAR_SET_IF([X86_AVX_CFLAGS], [], [AS_VAR_SET([X86_AVX_CFLAGS], "DEFAULT_X86_AVX_CFLAGS")])
AS_VAR_SET_IF([ARM_NEON_INTR_CFLAGS], [], [AS_VAR_SET([ARM_NEON_INTR_CFLAGS], ["$RESOLVED_DEFAULT_ARM_NEON_INTR_CFLAGS"])])
AC_DEFUN([OPUS_PATH_NE10],
......@@ -566,7 +570,24 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[
AC_SUBST([OPUS_X86_SSE4_1_CFLAGS])
]
)
OPUS_CHECK_INTRINSICS(
[AVX],
[$X86_AVX_CFLAGS],
[OPUS_X86_MAY_HAVE_AVX],
[OPUS_X86_PRESUME_AVX],
[[#include <immintrin.h>
]],
[[
static __m256 mtest;
mtest = _mm256_setzero_ps();
]]
)
AS_IF([test x"$OPUS_X86_MAY_HAVE_AVX" = x"1" && test x"$OPUS_X86_PRESUME_AVX" != x"1"],
[
OPUS_X86_AVX_CFLAGS="$X86_AVX_CFLAGS"
AC_SUBST([OPUS_X86_AVX_CFLAGS])
]
)
AS_IF([test x"$rtcd_support" = x"no"], [rtcd_support=""])
AS_IF([test x"$OPUS_X86_MAY_HAVE_SSE" = x"1"],
[
......@@ -606,6 +627,19 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[
[
AC_MSG_WARN([Compiler does not support SSE4.1 intrinsics])
])
AS_IF([test x"$OPUS_X86_MAY_HAVE_AVX" = x"1"],
[
AC_DEFINE([OPUS_X86_MAY_HAVE_AVX], 1, [Compiler supports X86 AVX Intrinsics])
intrinsics_support="$intrinsics_support AVX"
AS_IF([test x"$OPUS_X86_PRESUME_AVX" = x"1"],
[AC_DEFINE([OPUS_X86_PRESUME_AVX], 1, [Define if binary requires AVX intrinsics support])],
[rtcd_support="$rtcd_support AVX"])
],
[
AC_MSG_WARN([Compiler does not support AVX intrinsics])
])
AS_IF([test x"$intrinsics_support" = x""],
[intrinsics_support=no],
[intrinsics_support="x86$intrinsics_support"]
......@@ -672,6 +706,8 @@ AM_CONDITIONAL([HAVE_SSE2],
[test x"$OPUS_X86_MAY_HAVE_SSE2" = x"1"])
AM_CONDITIONAL([HAVE_SSE4_1],
[test x"$OPUS_X86_MAY_HAVE_SSE4_1" = x"1"])
AM_CONDITIONAL([HAVE_AVX],
[test x"$OPUS_X86_MAY_HAVE_AVX" = x"1"])
AS_IF([test x"$enable_rtcd" = x"yes"],[
AS_IF([test x"$rtcd_support" != x"no"],[
......
......@@ -50,6 +50,7 @@ opus_int64 (*const SILK_INNER_PROD16_ALIGNED_64_IMPL[ OPUS_ARCHMASK + 1 ] )(
silk_inner_prod16_aligned_64_c,
silk_inner_prod16_aligned_64_c,
MAY_HAVE_SSE4_1( silk_inner_prod16_aligned_64 ), /* sse4.1 */
MAY_HAVE_SSE4_1( silk_inner_prod16_aligned_64 ) /* avx */
};
#endif
......@@ -62,6 +63,7 @@ opus_int (*const SILK_VAD_GETSA_Q8_IMPL[ OPUS_ARCHMASK + 1 ] )(
silk_VAD_GetSA_Q8_c,
silk_VAD_GetSA_Q8_c,
MAY_HAVE_SSE4_1( silk_VAD_GetSA_Q8 ), /* sse4.1 */
MAY_HAVE_SSE4_1( silk_VAD_GetSA_Q8 ) /* avx */
};
void (*const SILK_NSQ_IMPL[ OPUS_ARCHMASK + 1 ] )(
......@@ -85,6 +87,7 @@ void (*const SILK_NSQ_IMPL[ OPUS_ARCHMASK + 1 ] )(
silk_NSQ_c,
silk_NSQ_c,
MAY_HAVE_SSE4_1( silk_NSQ ), /* sse4.1 */
MAY_HAVE_SSE4_1( silk_NSQ ) /* avx */
};
void (*const SILK_VQ_WMAT_EC_IMPL[ OPUS_ARCHMASK + 1 ] )(
......@@ -104,6 +107,7 @@ void (*const SILK_VQ_WMAT_EC_IMPL[ OPUS_ARCHMASK + 1 ] )(
silk_VQ_WMat_EC_c,
silk_VQ_WMat_EC_c,
MAY_HAVE_SSE4_1( silk_VQ_WMat_EC ), /* sse4.1 */
MAY_HAVE_SSE4_1( silk_VQ_WMat_EC ) /* avx */
};
void (*const SILK_NSQ_DEL_DEC_IMPL[ OPUS_ARCHMASK + 1 ] )(
......@@ -127,6 +131,7 @@ void (*const SILK_NSQ_DEL_DEC_IMPL[ OPUS_ARCHMASK + 1 ] )(
silk_NSQ_del_dec_c,
silk_NSQ_del_dec_c,
MAY_HAVE_SSE4_1( silk_NSQ_del_dec ), /* sse4.1 */
MAY_HAVE_SSE4_1( silk_NSQ_del_dec ) /* avx */
};
#if defined(FIXED_POINT)
......@@ -144,6 +149,7 @@ void (*const SILK_WARPED_LPC_ANALYSIS_FILTER_FIX_IMPL[ OPUS_ARCHMASK + 1 ] )(
silk_warped_LPC_analysis_filter_FIX_c,
silk_warped_LPC_analysis_filter_FIX_c,
MAY_HAVE_SSE4_1( silk_warped_LPC_analysis_filter_FIX ), /* sse4.1 */
MAY_HAVE_SSE4_1( silk_warped_LPC_analysis_filter_FIX ) /* avx */
};
void (*const SILK_BURG_MODIFIED_IMPL[ OPUS_ARCHMASK + 1 ] )(
......@@ -161,6 +167,7 @@ void (*const SILK_BURG_MODIFIED_IMPL[ OPUS_ARCHMASK + 1 ] )(
silk_burg_modified_c,
silk_burg_modified_c,
MAY_HAVE_SSE4_1( silk_burg_modified ), /* sse4.1 */
MAY_HAVE_SSE4_1( silk_burg_modified ) /* avx */
};
#endif
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment