diff --git a/aom_dsp/aom_dsp.mk b/aom_dsp/aom_dsp.mk index 227436923507b81393be6da7f9941243a499838a..f9d675ac033200b902094dcc9df811f619121555 100644 --- a/aom_dsp/aom_dsp.mk +++ b/aom_dsp/aom_dsp.mk @@ -290,10 +290,10 @@ DSP_SRCS-yes += quantize.c DSP_SRCS-yes += quantize.h DSP_SRCS-$(HAVE_SSE2) += x86/quantize_sse2.c -ifeq ($(CONFIG_HIGHBITDEPTH),yes) + DSP_SRCS-$(HAVE_SSE2) += x86/highbd_quantize_intrin_sse2.c DSP_SRCS-$(HAVE_AVX2) += x86/highbd_quantize_intrin_avx2.c -endif + ifeq ($(ARCH_X86_64),yes) DSP_SRCS-$(HAVE_SSSE3) += x86/quantize_ssse3_x86_64.asm DSP_SRCS-$(HAVE_AVX) += x86/quantize_avx_x86_64.asm diff --git a/aom_dsp/aom_dsp_rtcd_defs.pl b/aom_dsp/aom_dsp_rtcd_defs.pl index 9d8028e43bff6448320aca354fc154287f0d0581..71edf88fdd6d50ed8f8ff34b8c795a18e670ea4f 100755 --- a/aom_dsp/aom_dsp_rtcd_defs.pl +++ b/aom_dsp/aom_dsp_rtcd_defs.pl @@ -525,13 +525,12 @@ if (aom_config("CONFIG_AOM_QM") eq "yes") { add_proto qw/void aom_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr"; - if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") { - add_proto qw/void aom_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr"; + add_proto qw/void aom_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr"; + + add_proto qw/void aom_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr"; - add_proto qw/void aom_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr"; + add_proto qw/void aom_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr"; - add_proto qw/void aom_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr"; - } # CONFIG_HIGHBITDEPTH } # CONFIG_AV1_ENCODER } else { if (aom_config("CONFIG_AV1_ENCODER") eq "yes") { @@ -543,15 +542,14 @@ if (aom_config("CONFIG_AOM_QM") eq "yes") { add_proto qw/void aom_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; - if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") { - add_proto qw/void aom_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; - specialize qw/aom_highbd_quantize_b sse2 avx2/; + add_proto qw/void aom_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + specialize qw/aom_highbd_quantize_b sse2 avx2/; + + add_proto qw/void aom_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + specialize qw/aom_highbd_quantize_b_32x32 sse2/; - add_proto qw/void aom_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; - specialize qw/aom_highbd_quantize_b_32x32 sse2/; + add_proto qw/void aom_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; - add_proto qw/void aom_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; - } # CONFIG_HIGHBITDEPTH } # CONFIG_AV1_ENCODER } # CONFIG_AOM_QM if (aom_config("CONFIG_AV1") eq "yes") { diff --git a/aom_dsp/quantize.c b/aom_dsp/quantize.c index d5c89ebf19bc0f275e58710d853a21ce772ffc5d..fe98b6028584066653b7103a101eee65808557f1 100644 --- a/aom_dsp/quantize.c +++ b/aom_dsp/quantize.c @@ -256,7 +256,6 @@ void aom_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block, } #endif // CONFIG_TX64X64 -#if CONFIG_HIGHBITDEPTH void aom_highbd_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t quant, tran_low_t *qcoeff_ptr, @@ -523,7 +522,6 @@ void aom_highbd_quantize_b_64x64_c( *eob_ptr = eob + 1; } #endif // CONFIG_TX64X64 -#endif // CONFIG_HIGHBITDEPTH #else // CONFIG_AOM_QM @@ -602,7 +600,6 @@ void aom_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block, } #endif // CONFIG_TX64X64 -#if CONFIG_HIGHBITDEPTH void aom_highbd_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t quant, tran_low_t *qcoeff_ptr, @@ -825,5 +822,4 @@ void aom_highbd_quantize_b_64x64_c( *eob_ptr = eob + 1; } #endif // CONFIG_TX64X64 -#endif // CONFIG_HIGHBITDEPTH #endif // CONFIG_AOM_QM diff --git a/aom_dsp/x86/highbd_quantize_intrin_sse2.c b/aom_dsp/x86/highbd_quantize_intrin_sse2.c index 3ee24ab161ea1ffafa269ab66b510c00f56bd1db..5570ca5b76ce96774895a8cbc0dd1800716bbaaf 100644 --- a/aom_dsp/x86/highbd_quantize_intrin_sse2.c +++ b/aom_dsp/x86/highbd_quantize_intrin_sse2.c @@ -15,7 +15,6 @@ #include "aom_mem/aom_mem.h" #include "aom_ports/mem.h" -#if CONFIG_HIGHBITDEPTH void aom_highbd_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t count, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, @@ -152,4 +151,3 @@ void aom_highbd_quantize_b_32x32_sse2( } *eob_ptr = eob + 1; } -#endif diff --git a/av1/av1_cx.mk b/av1/av1_cx.mk index 938be38b61b6043ba649c3ba4772ae7d2b05ff1e..5c8c9b8f4a2b1e4bb2db0b1cab030608c5d68946 100644 --- a/av1/av1_cx.mk +++ b/av1/av1_cx.mk @@ -125,10 +125,10 @@ endif AV1_CX_SRCS-$(HAVE_SSE2) += encoder/x86/av1_quantize_sse2.c AV1_CX_SRCS-$(HAVE_AVX2) += encoder/x86/av1_quantize_avx2.c AV1_CX_SRCS-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm -ifeq ($(CONFIG_HIGHBITDEPTH),yes) + AV1_CX_SRCS-$(HAVE_SSE2) += encoder/x86/highbd_block_error_intrin_sse2.c AV1_CX_SRCS-$(HAVE_AVX2) += encoder/x86/av1_highbd_quantize_avx2.c -endif + AV1_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.asm AV1_CX_SRCS-$(HAVE_SSE2) += encoder/x86/error_sse2.asm @@ -140,10 +140,10 @@ endif AV1_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_intrin_sse2.c AV1_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/dct_ssse3.c AV1_CX_SRCS-$(HAVE_AVX2) += encoder/x86/hybrid_fwd_txfm_avx2.c -ifeq ($(CONFIG_HIGHBITDEPTH),yes) + AV1_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/av1_highbd_quantize_sse4.c + AV1_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/highbd_fwd_txfm_sse4.c -endif ifeq ($(CONFIG_EXT_INTER),yes) AV1_CX_SRCS-yes += encoder/wedge_utils.c diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl index b16f486383059a3ccfc867721da2af95064325e8..1248cd31b7776286a0f7f5d20948a70a0b3f2ef4 100755 --- a/av1/common/av1_rtcd_defs.pl +++ b/av1/common/av1_rtcd_defs.pl @@ -417,25 +417,23 @@ if (aom_config("CONFIG_DPCM_INTRA") eq "yes") { } } -if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") { - #fwd txfm - add_proto qw/void av1_fwd_txfm2d_4x8/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd"; - add_proto qw/void av1_fwd_txfm2d_8x4/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd"; - add_proto qw/void av1_fwd_txfm2d_8x16/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd"; - add_proto qw/void av1_fwd_txfm2d_16x8/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd"; - add_proto qw/void av1_fwd_txfm2d_16x32/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd"; - add_proto qw/void av1_fwd_txfm2d_32x16/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd"; - add_proto qw/void av1_fwd_txfm2d_4x4/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd"; - specialize qw/av1_fwd_txfm2d_4x4 sse4_1/; - add_proto qw/void av1_fwd_txfm2d_8x8/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd"; - specialize qw/av1_fwd_txfm2d_8x8 sse4_1/; - add_proto qw/void av1_fwd_txfm2d_16x16/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd"; - specialize qw/av1_fwd_txfm2d_16x16 sse4_1/; - add_proto qw/void av1_fwd_txfm2d_32x32/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd"; - specialize qw/av1_fwd_txfm2d_32x32 sse4_1/; - add_proto qw/void av1_fwd_txfm2d_64x64/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd"; - specialize qw/av1_fwd_txfm2d_64x64 sse4_1/; -} +#fwd txfm +add_proto qw/void av1_fwd_txfm2d_4x8/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd"; +add_proto qw/void av1_fwd_txfm2d_8x4/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd"; +add_proto qw/void av1_fwd_txfm2d_8x16/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd"; +add_proto qw/void av1_fwd_txfm2d_16x8/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd"; +add_proto qw/void av1_fwd_txfm2d_16x32/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd"; +add_proto qw/void av1_fwd_txfm2d_32x16/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd"; +add_proto qw/void av1_fwd_txfm2d_4x4/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd"; +specialize qw/av1_fwd_txfm2d_4x4 sse4_1/; +add_proto qw/void av1_fwd_txfm2d_8x8/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd"; +specialize qw/av1_fwd_txfm2d_8x8 sse4_1/; +add_proto qw/void av1_fwd_txfm2d_16x16/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd"; +specialize qw/av1_fwd_txfm2d_16x16 sse4_1/; +add_proto qw/void av1_fwd_txfm2d_32x32/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd"; +specialize qw/av1_fwd_txfm2d_32x32 sse4_1/; +add_proto qw/void av1_fwd_txfm2d_64x64/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd"; +specialize qw/av1_fwd_txfm2d_64x64 sse4_1/; # # Motion search @@ -480,33 +478,34 @@ if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") { add_proto qw/int64_t av1_highbd_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd"; specialize qw/av1_highbd_block_error sse2/; - if (aom_config("CONFIG_AOM_QM") eq "yes") { - add_proto qw/void av1_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale"; + # fdct functions + if (aom_config("CONFIG_TX64X64") eq "yes") { + add_proto qw/void av1_highbd_fht64x64/, "const int16_t *input, tran_low_t *output, int stride, int tx_type"; + } - add_proto qw/void av1_highbd_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale"; + add_proto qw/void av1_highbd_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count"; - if (aom_config("CONFIG_TX64X64") eq "yes") { - add_proto qw/void av1_highbd_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale"; - } +} - add_proto qw/void av1_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale"; - } else { - add_proto qw/void av1_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale"; - specialize qw/av1_highbd_quantize_fp sse4_1 avx2/; +if (aom_config("CONFIG_AOM_QM") eq "yes") { + add_proto qw/void av1_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale"; - add_proto qw/void av1_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale"; - } + add_proto qw/void av1_highbd_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale"; - # fdct functions if (aom_config("CONFIG_TX64X64") eq "yes") { - add_proto qw/void av1_highbd_fht64x64/, "const int16_t *input, tran_low_t *output, int stride, int tx_type"; + add_proto qw/void av1_highbd_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale"; } - add_proto qw/void av1_highbd_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride"; - - add_proto qw/void av1_highbd_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count"; + add_proto qw/void av1_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale"; +} else { + add_proto qw/void av1_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale"; + specialize qw/av1_highbd_quantize_fp sse4_1 avx2/; + add_proto qw/void av1_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale"; } + +add_proto qw/void av1_highbd_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride"; + # End av1_high encoder functions if (aom_config("CONFIG_EXT_INTER") eq "yes") { diff --git a/av1/encoder/av1_quantize.c b/av1/encoder/av1_quantize.c index 8c56d2133de11f1ae3bf592a49bc1166ff74e652..dd53d42232ede0bbfa5b4fe81ee31626f7481772 100644 --- a/av1/encoder/av1_quantize.c +++ b/av1/encoder/av1_quantize.c @@ -845,7 +845,6 @@ void av1_quantize_dc_nuq_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs, } #endif // CONFIG_NEW_QUANT -#if CONFIG_HIGHBITDEPTH void av1_highbd_quantize_fp_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr, @@ -951,7 +950,6 @@ void av1_highbd_quantize_b_facade(const tran_low_t *coeff_ptr, } } -#if CONFIG_HIGHBITDEPTH static INLINE void highbd_quantize_dc( const tran_low_t *coeff_ptr, int n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t quant, tran_low_t *qcoeff_ptr, @@ -980,7 +978,6 @@ static INLINE void highbd_quantize_dc( } *eob_ptr = eob + 1; } -#endif // CONFIG_HIGHBITDEPTH void av1_highbd_quantize_dc_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const MACROBLOCK_PLANE *p, @@ -1519,9 +1516,7 @@ void av1_highbd_quantize_dc_nuq_facade( } } #endif // CONFIG_NEW_QUANT -#endif // CONFIG_HIGHBITDEPTH -#if CONFIG_HIGHBITDEPTH void av1_highbd_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t count, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, @@ -1579,8 +1574,6 @@ void av1_highbd_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t count, *eob_ptr = eob + 1; } -#endif // CONFIG_HIGHBITDEPTH - static void invert_quant(int16_t *quant, int16_t *shift, int d) { uint32_t t; int l, m; diff --git a/av1/encoder/av1_quantize.h b/av1/encoder/av1_quantize.h index 4bc9cccc21382dd02da3d218c88ea6302b3a174c..e5fc8b528eec40d9a82590728986da5b6e1eadec 100644 --- a/av1/encoder/av1_quantize.h +++ b/av1/encoder/av1_quantize.h @@ -146,7 +146,6 @@ void av1_quantize_dc_nuq_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const QUANT_PARAM *qparam); #endif // CONFIG_NEW_QUANT -#if CONFIG_HIGHBITDEPTH void av1_highbd_quantize_fp_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr, @@ -190,7 +189,6 @@ void av1_highbd_quantize_dc_nuq_facade( tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const SCAN_ORDER *sc, const QUANT_PARAM *qparam); #endif // CONFIG_NEW_QUANT -#endif // CONFIG_HIGHBITDEPTH #ifdef __cplusplus } // extern "C" diff --git a/av1/encoder/dct.c b/av1/encoder/dct.c index 8ed48dd85b3291962964c0917b9ab97c0d28a4f7..60c641f6db3f82ad74cb453f435d418bfa3a4f6c 100644 --- a/av1/encoder/dct.c +++ b/av1/encoder/dct.c @@ -2020,12 +2020,10 @@ void av1_fht16x16_c(const int16_t *input, tran_low_t *output, int stride, } } -#if CONFIG_HIGHBITDEPTH void av1_highbd_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride) { av1_fwht4x4_c(input, output, stride); } -#endif // CONFIG_HIGHBITDEPTH void av1_fht32x32_c(const int16_t *input, tran_low_t *output, int stride, int tx_type) { diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c index c441421376b780bf5df3ac1c23afe65eedaf644e..4f8e0cd620fc048c398c71a986984d2a0cb8fc96 100644 --- a/av1/encoder/encodemb.c +++ b/av1/encoder/encodemb.c @@ -494,7 +494,6 @@ int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int block, } #if !CONFIG_PVQ -#if CONFIG_HIGHBITDEPTH typedef enum QUANT_FUNC { QUANT_FUNC_LOWBD = 0, QUANT_FUNC_HIGHBD = 1, @@ -514,29 +513,12 @@ static AV1_QUANT_FACADE #endif // !CONFIG_NEW_QUANT { NULL, NULL } }; +#endif // !CONFIG_PVQ -#else - -typedef enum QUANT_FUNC { - QUANT_FUNC_LOWBD = 0, - QUANT_FUNC_TYPES = 1 -} QUANT_FUNC; - -static AV1_QUANT_FACADE quant_func_list[AV1_XFORM_QUANT_TYPES] - [QUANT_FUNC_TYPES] = { -#if !CONFIG_NEW_QUANT - { av1_quantize_fp_facade }, - { av1_quantize_b_facade }, - { av1_quantize_dc_facade }, -#else // !CONFIG_NEW_QUANT - { av1_quantize_fp_nuq_facade }, - { av1_quantize_b_nuq_facade }, - { av1_quantize_dc_nuq_facade }, -#endif // !CONFIG_NEW_QUANT - { NULL } - }; -#endif // CONFIG_HIGHBITDEPTH -#endif // CONFIG_PVQ +typedef void (*fwdTxfmFunc)(const int16_t *diff, tran_low_t *coeff, int stride, + FWD_TXFM_PARAM *param); +static const fwdTxfmFunc fwd_txfm_func[2] = { av1_fwd_txfm, + av1_highbd_fwd_txfm }; void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block, int blk_row, int blk_col, BLOCK_SIZE plane_bsize, @@ -668,29 +650,13 @@ void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block, fwd_txfm_param.lossless = xd->lossless[mbmi->segment_id]; #if !CONFIG_PVQ -#if CONFIG_HIGHBITDEPTH fwd_txfm_param.bd = xd->bd; - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - av1_highbd_fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param); - if (xform_quant_idx != AV1_XFORM_QUANT_SKIP_QUANT) { - if (LIKELY(!x->skip_block)) { - quant_func_list[xform_quant_idx][QUANT_FUNC_HIGHBD]( - coeff, tx2d_size, p, qcoeff, pd, dqcoeff, eob, scan_order, &qparam); - } else { - av1_quantize_skip(tx2d_size, qcoeff, dqcoeff, eob); - } - } -#if CONFIG_LV_MAP - p->txb_entropy_ctx[block] = - (uint8_t)av1_get_txb_entropy_context(qcoeff, scan_order, *eob); -#endif // CONFIG_LV_MAP - return; - } -#endif // CONFIG_HIGHBITDEPTH - av1_fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param); + const int is_hbd = get_bitdepth_data_path_index(xd); + fwd_txfm_func[is_hbd](src_diff, coeff, diff_stride, &fwd_txfm_param); + if (xform_quant_idx != AV1_XFORM_QUANT_SKIP_QUANT) { if (LIKELY(!x->skip_block)) { - quant_func_list[xform_quant_idx][QUANT_FUNC_LOWBD]( + quant_func_list[xform_quant_idx][is_hbd]( coeff, tx2d_size, p, qcoeff, pd, dqcoeff, eob, scan_order, &qparam); } else { av1_quantize_skip(tx2d_size, qcoeff, dqcoeff, eob); @@ -700,7 +666,8 @@ void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block, p->txb_entropy_ctx[block] = (uint8_t)av1_get_txb_entropy_context(qcoeff, scan_order, *eob); #endif // CONFIG_LV_MAP -#else // #if !CONFIG_PVQ + return; +#else // CONFIG_PVQ (void)xform_quant_idx; #if CONFIG_HIGHBITDEPTH fwd_txfm_param.bd = xd->bd; diff --git a/av1/encoder/hybrid_fwd_txfm.c b/av1/encoder/hybrid_fwd_txfm.c index a5409c2247f221ec256e1b03dbfa77ab0e368348..0ff7524d6da3594e0f7aad7f599eb2674836f2f6 100644 --- a/av1/encoder/hybrid_fwd_txfm.c +++ b/av1/encoder/hybrid_fwd_txfm.c @@ -203,7 +203,6 @@ static void fwd_txfm_8x32(const int16_t *src_diff, tran_low_t *coeff, } #endif // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT -#if CONFIG_HIGHBITDEPTH #if CONFIG_CHROMA_2X2 static void highbd_fwd_txfm_2x2(const int16_t *src_diff, tran_low_t *coeff, int diff_stride, TX_TYPE tx_type, int lossless, @@ -237,6 +236,7 @@ static void highbd_fwd_txfm_2x2(const int16_t *src_diff, tran_low_t *coeff, static void highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff, int diff_stride, TX_TYPE tx_type, int lossless, const int bd) { + int32_t *dst_coeff = (int32_t *)coeff; if (lossless) { assert(tx_type == DCT_DCT); av1_highbd_fwht4x4(src_diff, coeff, diff_stride); @@ -248,7 +248,7 @@ static void highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff, case DCT_ADST: case ADST_ADST: // fallthrough intended - av1_fwd_txfm2d_4x4(src_diff, coeff, diff_stride, tx_type, bd); + av1_fwd_txfm2d_4x4(src_diff, dst_coeff, diff_stride, tx_type, bd); break; #if CONFIG_EXT_TX case FLIPADST_DCT: @@ -257,7 +257,7 @@ static void highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff, case ADST_FLIPADST: case FLIPADST_ADST: // fallthrough intended - av1_fwd_txfm2d_4x4(src_diff, coeff, diff_stride, tx_type, bd); + av1_fwd_txfm2d_4x4(src_diff, dst_coeff, diff_stride, tx_type, bd); break; // use the c version for anything including identity for now case V_DCT: @@ -268,7 +268,7 @@ static void highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff, case H_FLIPADST: case IDTX: // fallthrough intended - av1_fwd_txfm2d_4x4_c(src_diff, coeff, diff_stride, tx_type, bd); + av1_fwd_txfm2d_4x4_c(src_diff, dst_coeff, diff_stride, tx_type, bd); break; #endif // CONFIG_EXT_TX default: assert(0); @@ -279,47 +279,54 @@ static void highbd_fwd_txfm_4x8(const int16_t *src_diff, tran_low_t *coeff, int diff_stride, TX_TYPE tx_type, FWD_TXFM_OPT fwd_txfm_opt, const int bd) { (void)fwd_txfm_opt; - av1_fwd_txfm2d_4x8_c(src_diff, coeff, diff_stride, tx_type, bd); + int32_t *dst_coeff = (int32_t *)coeff; + av1_fwd_txfm2d_4x8_c(src_diff, dst_coeff, diff_stride, tx_type, bd); } static void highbd_fwd_txfm_8x4(const int16_t *src_diff, tran_low_t *coeff, int diff_stride, TX_TYPE tx_type, FWD_TXFM_OPT fwd_txfm_opt, const int bd) { (void)fwd_txfm_opt; - av1_fwd_txfm2d_8x4_c(src_diff, coeff, diff_stride, tx_type, bd); + int32_t *dst_coeff = (int32_t *)coeff; + av1_fwd_txfm2d_8x4_c(src_diff, dst_coeff, diff_stride, tx_type, bd); } static void highbd_fwd_txfm_8x16(const int16_t *src_diff, tran_low_t *coeff, int diff_stride, TX_TYPE tx_type, FWD_TXFM_OPT fwd_txfm_opt, const int bd) { (void)fwd_txfm_opt; - av1_fwd_txfm2d_8x16_c(src_diff, coeff, diff_stride, tx_type, bd); + int32_t *dst_coeff = (int32_t *)coeff; + av1_fwd_txfm2d_8x16_c(src_diff, dst_coeff, diff_stride, tx_type, bd); } static void highbd_fwd_txfm_16x8(const int16_t *src_diff, tran_low_t *coeff, int diff_stride, TX_TYPE tx_type, FWD_TXFM_OPT fwd_txfm_opt, const int bd) { (void)fwd_txfm_opt; - av1_fwd_txfm2d_16x8_c(src_diff, coeff, diff_stride, tx_type, bd); + int32_t *dst_coeff = (int32_t *)coeff; + av1_fwd_txfm2d_16x8_c(src_diff, dst_coeff, diff_stride, tx_type, bd); } static void highbd_fwd_txfm_16x32(const int16_t *src_diff, tran_low_t *coeff, int diff_stride, TX_TYPE tx_type, FWD_TXFM_OPT fwd_txfm_opt, const int bd) { (void)fwd_txfm_opt; - av1_fwd_txfm2d_16x32_c(src_diff, coeff, diff_stride, tx_type, bd); + int32_t *dst_coeff = (int32_t *)coeff; + av1_fwd_txfm2d_16x32_c(src_diff, dst_coeff, diff_stride, tx_type, bd); } static void highbd_fwd_txfm_32x16(const int16_t *src_diff, tran_low_t *coeff, int diff_stride, TX_TYPE tx_type, FWD_TXFM_OPT fwd_txfm_opt, const int bd) { (void)fwd_txfm_opt; - av1_fwd_txfm2d_32x16_c(src_diff, coeff, diff_stride, tx_type, bd); + int32_t *dst_coeff = (int32_t *)coeff; + av1_fwd_txfm2d_32x16_c(src_diff, dst_coeff, diff_stride, tx_type, bd); } static void highbd_fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff, int diff_stride, TX_TYPE tx_type, FWD_TXFM_OPT fwd_txfm_opt, const int bd) { + int32_t *dst_coeff = (int32_t *)coeff; (void)fwd_txfm_opt; switch (tx_type) { case DCT_DCT: @@ -327,7 +334,7 @@ static void highbd_fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff, case DCT_ADST: case ADST_ADST: // fallthrough intended - av1_fwd_txfm2d_8x8(src_diff, coeff, diff_stride, tx_type, bd); + av1_fwd_txfm2d_8x8(src_diff, dst_coeff, diff_stride, tx_type, bd); break; #if CONFIG_EXT_TX case FLIPADST_DCT: @@ -336,7 +343,7 @@ static void highbd_fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff, case ADST_FLIPADST: case FLIPADST_ADST: // fallthrough intended - av1_fwd_txfm2d_8x8(src_diff, coeff, diff_stride, tx_type, bd); + av1_fwd_txfm2d_8x8(src_diff, dst_coeff, diff_stride, tx_type, bd); break; // use the c version for anything including identity for now case V_DCT: @@ -347,7 +354,7 @@ static void highbd_fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff, case H_FLIPADST: case IDTX: // fallthrough intended - av1_fwd_txfm2d_8x8_c(src_diff, coeff, diff_stride, tx_type, bd); + av1_fwd_txfm2d_8x8_c(src_diff, dst_coeff, diff_stride, tx_type, bd); break; #endif // CONFIG_EXT_TX default: assert(0); @@ -357,6 +364,7 @@ static void highbd_fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff, static void highbd_fwd_txfm_16x16(const int16_t *src_diff, tran_low_t *coeff, int diff_stride, TX_TYPE tx_type, FWD_TXFM_OPT fwd_txfm_opt, const int bd) { + int32_t *dst_coeff = (int32_t *)coeff; (void)fwd_txfm_opt; switch (tx_type) { case DCT_DCT: @@ -364,7 +372,7 @@ static void highbd_fwd_txfm_16x16(const int16_t *src_diff, tran_low_t *coeff, case DCT_ADST: case ADST_ADST: // fallthrough intended - av1_fwd_txfm2d_16x16(src_diff, coeff, diff_stride, tx_type, bd); + av1_fwd_txfm2d_16x16(src_diff, dst_coeff, diff_stride, tx_type, bd); break; #if CONFIG_EXT_TX case FLIPADST_DCT: @@ -373,7 +381,7 @@ static void highbd_fwd_txfm_16x16(const int16_t *src_diff, tran_low_t *coeff, case ADST_FLIPADST: case FLIPADST_ADST: // fallthrough intended - av1_fwd_txfm2d_16x16(src_diff, coeff, diff_stride, tx_type, bd); + av1_fwd_txfm2d_16x16(src_diff, dst_coeff, diff_stride, tx_type, bd); break; // use the c version for anything including identity for now case V_DCT: @@ -384,7 +392,7 @@ static void highbd_fwd_txfm_16x16(const int16_t *src_diff, tran_low_t *coeff, case H_FLIPADST: case IDTX: // fallthrough intended - av1_fwd_txfm2d_16x16_c(src_diff, coeff, diff_stride, tx_type, bd); + av1_fwd_txfm2d_16x16_c(src_diff, dst_coeff, diff_stride, tx_type, bd); break; #endif // CONFIG_EXT_TX default: assert(0); @@ -394,6 +402,7 @@ static void highbd_fwd_txfm_16x16(const int16_t *src_diff, tran_low_t *coeff, static void highbd_fwd_txfm_32x32(const int16_t *src_diff, tran_low_t *coeff, int diff_stride, TX_TYPE tx_type, FWD_TXFM_OPT fwd_txfm_opt, const int bd) { + int32_t *dst_coeff = (int32_t *)coeff; (void)fwd_txfm_opt; switch (tx_type) { case DCT_DCT: @@ -401,7 +410,7 @@ static void highbd_fwd_txfm_32x32(const int16_t *src_diff, tran_low_t *coeff, case DCT_ADST: case ADST_ADST: // fallthrough intended - av1_fwd_txfm2d_32x32(src_diff, coeff, diff_stride, tx_type, bd); + av1_fwd_txfm2d_32x32(src_diff, dst_coeff, diff_stride, tx_type, bd); break; #if CONFIG_EXT_TX case FLIPADST_DCT: @@ -410,7 +419,7 @@ static void highbd_fwd_txfm_32x32(const int16_t *src_diff, tran_low_t *coeff, case ADST_FLIPADST: case FLIPADST_ADST: // fallthrough intended - av1_fwd_txfm2d_32x32(src_diff, coeff, diff_stride, tx_type, bd); + av1_fwd_txfm2d_32x32(src_diff, dst_coeff, diff_stride, tx_type, bd); break; // use the c version for anything including identity for now case V_DCT: @@ -421,7 +430,7 @@ static void highbd_fwd_txfm_32x32(const int16_t *src_diff, tran_low_t *coeff, case H_FLIPADST: case IDTX: // fallthrough intended - av1_fwd_txfm2d_32x32_c(src_diff, coeff, diff_stride, tx_type, bd); + av1_fwd_txfm2d_32x32_c(src_diff, dst_coeff, diff_stride, tx_type, bd); break; #endif // CONFIG_EXT_TX default: assert(0); @@ -432,11 +441,12 @@ static void highbd_fwd_txfm_32x32(const int16_t *src_diff, tran_low_t *coeff, static void highbd_fwd_txfm_64x64(const int16_t *src_diff, tran_low_t *coeff, int diff_stride, TX_TYPE tx_type, FWD_TXFM_OPT fwd_txfm_opt, const int bd) { + int32_t *dst_coeff = (int32_t *)coeff; (void)fwd_txfm_opt; (void)bd; switch (tx_type) { case DCT_DCT: - av1_fwd_txfm2d_64x64(src_diff, coeff, diff_stride, tx_type, bd); + av1_fwd_txfm2d_64x64(src_diff, dst_coeff, diff_stride, tx_type, bd); break; #if CONFIG_EXT_TX case ADST_DCT: @@ -459,7 +469,7 @@ static void highbd_fwd_txfm_64x64(const int16_t *src_diff, tran_low_t *coeff, // in a later change. This shouldn't impact performance since // DCT_DCT is the only extended type currently allowed for 64x64, // as dictated by get_ext_tx_set_type in blockd.h. - av1_fwd_txfm2d_64x64_c(src_diff, coeff, diff_stride, DCT_DCT, bd); + av1_fwd_txfm2d_64x64_c(src_diff, dst_coeff, diff_stride, DCT_DCT, bd); break; case IDTX: av1_fwd_idtx_c(src_diff, coeff, diff_stride, 64, tx_type); break; #endif // CONFIG_EXT_TX @@ -467,7 +477,6 @@ static void highbd_fwd_txfm_64x64(const int16_t *src_diff, tran_low_t *coeff, } } #endif // CONFIG_TX64X64 -#endif // CONFIG_HIGHBITDEPTH void av1_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, int diff_stride, FWD_TXFM_PARAM *fwd_txfm_param) { @@ -534,7 +543,6 @@ void av1_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, int diff_stride, } } -#if CONFIG_HIGHBITDEPTH void av1_highbd_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, int diff_stride, FWD_TXFM_PARAM *fwd_txfm_param) { const int fwd_txfm_opt = FWD_TXFM_OPT_NORMAL; @@ -596,4 +604,3 @@ void av1_highbd_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, default: assert(0); break; } } -#endif // CONFIG_HIGHBITDEPTH diff --git a/av1/encoder/hybrid_fwd_txfm.h b/av1/encoder/hybrid_fwd_txfm.h index e6fd17275e0166d9ca6b438a87d5ec46ebff7b74..88a72b542a049c47ca6efee90a2426af4b094b82 100644 --- a/av1/encoder/hybrid_fwd_txfm.h +++ b/av1/encoder/hybrid_fwd_txfm.h @@ -20,9 +20,7 @@ typedef struct FWD_TXFM_PARAM { TX_TYPE tx_type; TX_SIZE tx_size; int lossless; -#if CONFIG_HIGHBITDEPTH int bd; -#endif // CONFIG_HIGHBITDEPTH } FWD_TXFM_PARAM; #ifdef __cplusplus @@ -32,10 +30,8 @@ extern "C" { void av1_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, int diff_stride, FWD_TXFM_PARAM *fwd_txfm_param); -#if CONFIG_HIGHBITDEPTH void av1_highbd_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, int diff_stride, FWD_TXFM_PARAM *fwd_txfm_param); -#endif // CONFIG_HIGHBITDEPTH #ifdef __cplusplus } // extern "C" diff --git a/av1/encoder/x86/highbd_fwd_txfm_sse4.c b/av1/encoder/x86/highbd_fwd_txfm_sse4.c index fb74068fe15702e5a8fa989036ecc099f7eadfac..cab36f2bdee24edeaaa1584162966e53c878f95e 100644 --- a/av1/encoder/x86/highbd_fwd_txfm_sse4.c +++ b/av1/encoder/x86/highbd_fwd_txfm_sse4.c @@ -113,7 +113,7 @@ static void fdct4x4_sse4_1(__m128i *in, int bit) { in[3] = _mm_unpackhi_epi64(v1, v3); } -static INLINE void write_buffer_4x4(__m128i *res, tran_low_t *output) { +static INLINE void write_buffer_4x4(__m128i *res, int32_t *output) { _mm_store_si128((__m128i *)(output + 0 * 4), res[0]); _mm_store_si128((__m128i *)(output + 1 * 4), res[1]); _mm_store_si128((__m128i *)(output + 2 * 4), res[2]); @@ -404,7 +404,7 @@ static INLINE void col_txfm_8x8_rounding(__m128i *in, int shift) { in[15] = _mm_srai_epi32(in[15], shift); } -static INLINE void write_buffer_8x8(const __m128i *res, tran_low_t *output) { +static INLINE void write_buffer_8x8(const __m128i *res, int32_t *output) { _mm_store_si128((__m128i *)(output + 0 * 4), res[0]); _mm_store_si128((__m128i *)(output + 1 * 4), res[1]); _mm_store_si128((__m128i *)(output + 2 * 4), res[2]); @@ -1788,7 +1788,7 @@ static void col_txfm_16x16_rounding(__m128i *in, int shift) { col_txfm_8x8_rounding(&in[48], shift); } -static void write_buffer_16x16(const __m128i *in, tran_low_t *output) { +static void write_buffer_16x16(const __m128i *in, int32_t *output) { const int size_8x8 = 16 * 4; write_buffer_8x8(&in[0], output); output += size_8x8;