Commit cdb030cd authored by Anton Blanchard's avatar Anton Blanchard Committed by Erik de Castro Lopo

Add VSX optimised versions of autocorrelation loops

Add a POWER8 and POWER9 version of the autocorrelation functions.

flac --best is about 3.3x faster on POWER9 with this patch.
Signed-off-by: default avatarAnton Blanchard <anton@ozlabs.org>
parent 8e1796b9
......@@ -114,6 +114,7 @@ libFLAC_sources = \
lpc_intrin_sse2.c \
lpc_intrin_sse41.c \
lpc_intrin_avx2.c \
lpc_intrin_vsx.c \
md5.c \
memory.c \
metadata_iterators.c \
......
......@@ -91,6 +91,20 @@ void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_12_new(const FLAC__real da
void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_16_new(const FLAC__real data[], uint32_t data_len, uint32_t lag, FLAC__real autoc[]);
# endif
# endif
#if defined(FLAC__CPU_PPC64) && defined(FLAC__USE_VSX)
#ifdef FLAC__HAS_TARGET_POWER9
void FLAC__lpc_compute_autocorrelation_intrin_power9_vsx_lag_4(const FLAC__real data[], uint32_t data_len, uint32_t lag, FLAC__real autoc[]);
void FLAC__lpc_compute_autocorrelation_intrin_power9_vsx_lag_8(const FLAC__real data[], uint32_t data_len, uint32_t lag, FLAC__real autoc[]);
void FLAC__lpc_compute_autocorrelation_intrin_power9_vsx_lag_12(const FLAC__real data[], uint32_t data_len, uint32_t lag, FLAC__real autoc[]);
void FLAC__lpc_compute_autocorrelation_intrin_power9_vsx_lag_16(const FLAC__real data[], uint32_t data_len, uint32_t lag, FLAC__real autoc[]);
#endif
#ifdef FLAC__HAS_TARGET_POWER8
void FLAC__lpc_compute_autocorrelation_intrin_power8_vsx_lag_4(const FLAC__real data[], uint32_t data_len, uint32_t lag, FLAC__real autoc[]);
void FLAC__lpc_compute_autocorrelation_intrin_power8_vsx_lag_8(const FLAC__real data[], uint32_t data_len, uint32_t lag, FLAC__real autoc[]);
void FLAC__lpc_compute_autocorrelation_intrin_power8_vsx_lag_12(const FLAC__real data[], uint32_t data_len, uint32_t lag, FLAC__real autoc[]);
void FLAC__lpc_compute_autocorrelation_intrin_power8_vsx_lag_16(const FLAC__real data[], uint32_t data_len, uint32_t lag, FLAC__real autoc[]);
#endif
#endif
#endif
/*
......
This diff is collapsed.
......@@ -885,6 +885,36 @@ static FLAC__StreamEncoderInitStatus init_stream_internal_(
/* now override with asm where appropriate */
#ifndef FLAC__INTEGER_ONLY_LIBRARY
# ifndef FLAC__NO_ASM
#if defined(FLAC__CPU_PPC64) && defined(FLAC__USE_VSX)
#ifdef FLAC__HAS_TARGET_POWER8
#ifdef FLAC__HAS_TARGET_POWER9
if (encoder->private_->cpuinfo.ppc.arch_3_00) {
if(encoder->protected_->max_lpc_order < 4)
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_power9_vsx_lag_4;
else if(encoder->protected_->max_lpc_order < 8)
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_power9_vsx_lag_8;
else if(encoder->protected_->max_lpc_order < 12)
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_power9_vsx_lag_12;
else if(encoder->protected_->max_lpc_order < 16)
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_power9_vsx_lag_16;
else
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation;
} else
#endif
if (encoder->private_->cpuinfo.ppc.arch_2_07) {
if(encoder->protected_->max_lpc_order < 4)
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_power8_vsx_lag_4;
else if(encoder->protected_->max_lpc_order < 8)
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_power8_vsx_lag_8;
else if(encoder->protected_->max_lpc_order < 12)
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_power8_vsx_lag_12;
else if(encoder->protected_->max_lpc_order < 16)
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_power8_vsx_lag_16;
else
encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation;
}
#endif
#endif
if(encoder->private_->cpuinfo.use_asm) {
# ifdef FLAC__CPU_IA32
FLAC__ASSERT(encoder->private_->cpuinfo.type == FLAC__CPUINFO_TYPE_IA32);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment