Commit 02591f6b authored by Erik de Castro Lopo's avatar Erik de Castro Lopo

libFLAC : SSE optimisations.

Add new function:

    FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse41()

and rewrite function:

    FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2()

Testing shows noticeable speed increase on Intel Core i3/5/7 (up to 30%
for -8 mode), AMD Athlon64, Phenom, Bulldozer/Piledriver, but no increase
or even very small speed decrease (~2% for -8 mode) on Intel Core2.

Patch-from: lvqcl <lvqcl.mail@gmail.com>
parent 8ce310dd
......@@ -164,6 +164,7 @@ void FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2(const FLAC_
void FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse2(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[]);
# endif
# ifdef FLAC__SSE4_1_SUPPORTED
void FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse41(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[]);
void FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_sse41(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[]);
# endif
# endif
......
This diff is collapsed.
This diff is collapsed.
......@@ -935,6 +935,8 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_(
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_12;
else if(encoder->protected_->max_lpc_order < 16)
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_16;
else
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation;
}
# endif
# ifdef FLAC__SSE2_SUPPORTED
......@@ -944,6 +946,7 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_(
}
# ifdef FLAC__SSE4_1_SUPPORTED
if(encoder->private_->cpuinfo.ia32.sse41) {
encoder->private_->local_lpc_compute_residual_from_qlp_coefficients = FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse41;
encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_64bit = FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_sse41;
}
# endif
......@@ -977,6 +980,11 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_(
# ifdef FLAC__SSE2_SUPPORTED
/* encoder->private_->local_lpc_compute_residual_from_qlp_coefficients = FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse2; // OPT_SSE: not faster than C; TODO: more tests on different CPUs */
encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_16bit = FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2;
# ifdef FLAC__SSE4_1_SUPPORTED
if(encoder->private_->cpuinfo.x86_64.sse41) {
encoder->private_->local_lpc_compute_residual_from_qlp_coefficients = FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse41;
}
# endif
# ifdef FLAC__SSSE3_SUPPORTED
if (encoder->private_->cpuinfo.x86_64.ssse3) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment