Commit cf0e42ae authored by Erik de Castro Lopo's avatar Erik de Castro Lopo
Browse files

Don't use intrinsics when they are slower.

More thorough en-/decoding tests show that sometimes the functions
that use intrinsics are slower (or not really faster) than old
plain C functions.

After this patch the encoder doesn't use these new functions
when their usefulness is questionable.

Patch-from: lvqcl <lvqcl.mail@gmail.com>
parent 4be8ed8e
......@@ -1289,6 +1289,10 @@ void FLAC__lpc_restore_signal_16_intrin_sse2(const FLAC__int32 residual[], unsig
{
int i;
FLAC__int32 sum;
if (order < 8) {
FLAC__lpc_restore_signal(residual, data_len, qlp_coeff, order, lp_quantization, data);
return;
}
FLAC__ASSERT(order > 0);
FLAC__ASSERT(order <= 32);
......
......@@ -417,24 +417,17 @@ static FLAC__StreamDecoderInitStatus init_stream_internal_(
}
#endif
#ifdef FLAC__HAS_X86INTRIN
# if defined FLAC__SSE2_SUPPORTED && !defined FLAC__HAS_NASM /* not faster than asm MMX code */
# if defined FLAC__SSE2_SUPPORTED && !defined FLAC__HAS_NASM /* OPT: not faster than ASM/MMX code */
if(decoder->private_->cpuinfo.ia32.sse2) {
decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal_16_intrin_sse2;
decoder->private_->local_lpc_restore_signal_16bit_order8 = FLAC__lpc_restore_signal_16_intrin_sse2;
}
# endif
# if defined FLAC__SSE4_1_SUPPORTED && 1 /* faster than asm */
# if defined FLAC__SSE4_1_SUPPORTED && 1 /* OPT: faster than asm; TODO: more tests */
if(decoder->private_->cpuinfo.ia32.sse41)
decoder->private_->local_lpc_restore_signal_64bit = FLAC__lpc_restore_signal_wide_intrin_sse41;
# endif
#endif
#elif defined FLAC__CPU_X86_64
#ifdef FLAC__HAS_X86INTRIN
# if defined FLAC__SSE2_SUPPORTED
decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal_16_intrin_sse2;
decoder->private_->local_lpc_restore_signal_16bit_order8 = FLAC__lpc_restore_signal_16_intrin_sse2;
# endif
#endif
#elif defined FLAC__CPU_PPC
FLAC__ASSERT(decoder->private_->cpuinfo.type == FLAC__CPUINFO_TYPE_PPC);
if(decoder->private_->cpuinfo.ppc.altivec) {
......
......@@ -957,7 +957,7 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_(
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_16;
# endif
# ifdef FLAC__SSE2_SUPPORTED
encoder->private_->local_lpc_compute_residual_from_qlp_coefficients = FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse2;
/* encoder->private_->local_lpc_compute_residual_from_qlp_coefficients = FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse2; // OPT: not faster than C; TODO: more tests on different CPUs */
encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_16bit = FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2;
# endif
# endif /* FLAC__HAS_X86INTRIN */
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment