Skip to content
Snippets Groups Projects
Unverified Commit 71fb7078 authored by Timothy B. Terriberry's avatar Timothy B. Terriberry
Browse files

Don't compile x86 cpu detection without RTCD.

Also #error if RTCD is enabled without a detection method, like Arm.
A number of SILK functions also still used the lookup tables, even
 when RTCD was disabled.
Fix those, too.
parent affb551e
No related branches found
No related tags found
No related merge requests found
......@@ -43,10 +43,11 @@
*/
#define OPUS_ARCHMASK 3
#elif (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
#elif defined(OPUS_HAVE_RTCD) && \
((defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
(defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \
(defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \
(defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX))
(defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX)))
#include "x86/x86cpu.h"
/* We currently support 5 x86 variants:
......
......@@ -35,11 +35,11 @@
#include "pitch.h"
#include "x86cpu.h"
#if (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
#if defined(OPUS_HAVE_RTCD) && \
((defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
(defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \
(defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \
(defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX))
(defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX)))
#if defined(_MSC_VER)
......@@ -91,6 +91,9 @@ static void cpuid(unsigned int CPUInfo[4], unsigned int InfoType)
what we want on CPUs that don't support CPUID. */
CPUInfo[3] = CPUInfo[2] = CPUInfo[1] = CPUInfo[0] = 0;
}
#else
# error "Configured to use x86 RTCD, but no CPU detection method available. " \
"Reconfigure with --disable-rtcd (or send patches)."
#endif
}
......
......@@ -609,10 +609,12 @@ static OPUS_INLINE opus_int64 silk_max_64(opus_int64 a, opus_int64 b)
/* the following seems faster on x86 */
#define silk_SMMUL(a32, b32) (opus_int32)silk_RSHIFT64(silk_SMULL((a32), (b32)), 32)
#if !defined(OPUS_X86_MAY_HAVE_SSE4_1)
#if !defined(OVERRIDE_silk_burg_modified)
#define silk_burg_modified(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch) \
((void)(arch), silk_burg_modified_c(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch))
#endif
#if !defined(OVERRIDE_silk_inner_prod16)
#define silk_inner_prod16(inVec1, inVec2, len, arch) \
((void)(arch),silk_inner_prod16_c(inVec1, inVec2, len))
#endif
......
......@@ -46,10 +46,12 @@ void silk_burg_modified_sse4_1(
);
# if defined(OPUS_X86_PRESUME_SSE4_1)
# define OVERRIDE_silk_burg_modified
# define silk_burg_modified(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch) \
((void)(arch), silk_burg_modified_sse4_1(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch))
# else
# elif defined(OPUS_HAVE_RTCD)
extern void (*const SILK_BURG_MODIFIED_IMPL[OPUS_ARCHMASK + 1])(
opus_int32 *res_nrg, /* O Residual energy */
......@@ -62,6 +64,7 @@ extern void (*const SILK_BURG_MODIFIED_IMPL[OPUS_ARCHMASK + 1])(
const opus_int D, /* I Order */
int arch /* I Run-time architecture */);
# define OVERRIDE_silk_burg_modified
# define silk_burg_modified(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch) \
((*SILK_BURG_MODIFIED_IMPL[(arch) & OPUS_ARCHMASK])(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch))
......@@ -76,16 +79,18 @@ opus_int64 silk_inner_prod16_sse4_1(
# if defined(OPUS_X86_PRESUME_SSE4_1)
# define OVERRIDE_silk_inner_prod16
# define silk_inner_prod16(inVec1, inVec2, len, arch) \
((void)(arch),silk_inner_prod16_sse4_1(inVec1, inVec2, len))
# else
# elif defined(OPUS_HAVE_RTCD)
extern opus_int64 (*const SILK_INNER_PROD16_IMPL[OPUS_ARCHMASK + 1])(
const opus_int16 *inVec1,
const opus_int16 *inVec2,
const opus_int len);
# define OVERRIDE_silk_inner_prod16
# define silk_inner_prod16(inVec1, inVec2, len, arch) \
((*SILK_INNER_PROD16_IMPL[(arch) & OPUS_ARCHMASK])(inVec1, inVec2, len))
......
......@@ -34,8 +34,6 @@
# if defined(OPUS_X86_MAY_HAVE_SSE4_1)
# define OVERRIDE_silk_VQ_WMat_EC
void silk_VQ_WMat_EC_sse4_1(
opus_int8 *ind, /* O index of best codebook vector */
opus_int32 *res_nrg_Q15, /* O best residual energy */
......@@ -53,12 +51,13 @@ void silk_VQ_WMat_EC_sse4_1(
# if defined OPUS_X86_PRESUME_SSE4_1
# define OVERRIDE_silk_VQ_WMat_EC
# define silk_VQ_WMat_EC(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, \
subfr_len, max_gain_Q7, L, arch) \
((void)(arch),silk_VQ_WMat_EC_sse4_1(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, \
subfr_len, max_gain_Q7, L))
# else
# elif defined(OPUS_HAVE_RTCD)
extern void (*const SILK_VQ_WMAT_EC_IMPL[OPUS_ARCHMASK + 1])(
opus_int8 *ind, /* O index of best codebook vector */
......@@ -75,6 +74,7 @@ extern void (*const SILK_VQ_WMAT_EC_IMPL[OPUS_ARCHMASK + 1])(
const opus_int L /* I number of vectors in codebook */
);
# define OVERRIDE_silk_VQ_WMat_EC
# define silk_VQ_WMat_EC(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, \
subfr_len, max_gain_Q7, L, arch) \
((*SILK_VQ_WMAT_EC_IMPL[(arch) & OPUS_ARCHMASK])(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, \
......@@ -82,8 +82,6 @@ extern void (*const SILK_VQ_WMAT_EC_IMPL[OPUS_ARCHMASK + 1])(
# endif
# define OVERRIDE_silk_NSQ
void silk_NSQ_sse4_1(
const silk_encoder_state *psEncC, /* I Encoder State */
silk_nsq_state *NSQ, /* I/O NSQ state */
......@@ -104,12 +102,13 @@ void silk_NSQ_sse4_1(
# if defined OPUS_X86_PRESUME_SSE4_1
# define OVERRIDE_silk_NSQ
# define silk_NSQ(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \
((void)(arch),silk_NSQ_sse4_1(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14))
# else
# elif defined(OPUS_HAVE_RTCD)
extern void (*const SILK_NSQ_IMPL[OPUS_ARCHMASK + 1])(
const silk_encoder_state *psEncC, /* I Encoder State */
......@@ -129,6 +128,7 @@ extern void (*const SILK_NSQ_IMPL[OPUS_ARCHMASK + 1])(
const opus_int LTP_scale_Q14 /* I LTP state scaling */
);
# define OVERRIDE_silk_NSQ
# define silk_NSQ(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \
((*SILK_NSQ_IMPL[(arch) & OPUS_ARCHMASK])(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
......@@ -136,8 +136,6 @@ extern void (*const SILK_NSQ_IMPL[OPUS_ARCHMASK + 1])(
# endif
# define OVERRIDE_silk_NSQ_del_dec
void silk_NSQ_del_dec_sse4_1(
const silk_encoder_state *psEncC, /* I Encoder State */
silk_nsq_state *NSQ, /* I/O NSQ state */
......@@ -158,12 +156,13 @@ void silk_NSQ_del_dec_sse4_1(
# if defined OPUS_X86_PRESUME_SSE4_1
# define OVERRIDE_silk_NSQ_del_dec
# define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \
HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \
((void)(arch),silk_NSQ_del_dec_sse4_1(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \
HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14))
# else
# elif defined(OPUS_HAVE_RTCD)
extern void (*const SILK_NSQ_DEL_DEC_IMPL[OPUS_ARCHMASK + 1])(
const silk_encoder_state *psEncC, /* I Encoder State */
......@@ -183,6 +182,7 @@ extern void (*const SILK_NSQ_DEL_DEC_IMPL[OPUS_ARCHMASK + 1])(
const opus_int LTP_scale_Q14 /* I LTP state scaling */
);
# define OVERRIDE_silk_NSQ_del_dec
# define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \
HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \
((*SILK_NSQ_DEL_DEC_IMPL[(arch) & OPUS_ARCHMASK])(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \
......@@ -221,25 +221,26 @@ void silk_VAD_GetNoiseLevels(
silk_VAD_state *psSilk_VAD /* I/O Pointer to Silk VAD state */
);
# define OVERRIDE_silk_VAD_GetSA_Q8
opus_int silk_VAD_GetSA_Q8_sse4_1(
silk_encoder_state *psEnC,
const opus_int16 pIn[]
);
# if defined(OPUS_X86_PRESUME_SSE4_1)
# define OVERRIDE_silk_VAD_GetSA_Q8
# define silk_VAD_GetSA_Q8(psEnC, pIn, arch) ((void)(arch),silk_VAD_GetSA_Q8_sse4_1(psEnC, pIn))
# else
# define silk_VAD_GetSA_Q8(psEnC, pIn, arch) \
((*SILK_VAD_GETSA_Q8_IMPL[(arch) & OPUS_ARCHMASK])(psEnC, pIn))
# elif defined(OPUS_HAVE_RTCD)
extern opus_int (*const SILK_VAD_GETSA_Q8_IMPL[OPUS_ARCHMASK + 1])(
silk_encoder_state *psEnC,
const opus_int16 pIn[]);
# define OVERRIDE_silk_VAD_GetSA_Q8
# define silk_VAD_GetSA_Q8(psEnC, pIn, arch) \
((*SILK_VAD_GETSA_Q8_IMPL[(arch) & OPUS_ARCHMASK])(psEnC, pIn))
# endif
# endif
......
......@@ -35,7 +35,7 @@
#include "pitch.h"
#include "main.h"
#if !defined(OPUS_X86_PRESUME_SSE4_1)
#if defined(OPUS_HAVE_RTCD) && !defined(OPUS_X86_PRESUME_SSE4_1)
#if defined(FIXED_POINT)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment