Commit 0f4195c2 authored by Yi Luo's avatar Yi Luo

Fwd txfm and quantizer HBD/LBD data paths co-exist

Change-Id: Iaae46d0735539b8b8daf9faac81c2a3434838020
parent 8ab5b5f6
...@@ -290,10 +290,10 @@ DSP_SRCS-yes += quantize.c ...@@ -290,10 +290,10 @@ DSP_SRCS-yes += quantize.c
DSP_SRCS-yes += quantize.h DSP_SRCS-yes += quantize.h
DSP_SRCS-$(HAVE_SSE2) += x86/quantize_sse2.c DSP_SRCS-$(HAVE_SSE2) += x86/quantize_sse2.c
ifeq ($(CONFIG_HIGHBITDEPTH),yes)
DSP_SRCS-$(HAVE_SSE2) += x86/highbd_quantize_intrin_sse2.c DSP_SRCS-$(HAVE_SSE2) += x86/highbd_quantize_intrin_sse2.c
DSP_SRCS-$(HAVE_AVX2) += x86/highbd_quantize_intrin_avx2.c DSP_SRCS-$(HAVE_AVX2) += x86/highbd_quantize_intrin_avx2.c
endif
ifeq ($(ARCH_X86_64),yes) ifeq ($(ARCH_X86_64),yes)
DSP_SRCS-$(HAVE_SSSE3) += x86/quantize_ssse3_x86_64.asm DSP_SRCS-$(HAVE_SSSE3) += x86/quantize_ssse3_x86_64.asm
DSP_SRCS-$(HAVE_AVX) += x86/quantize_avx_x86_64.asm DSP_SRCS-$(HAVE_AVX) += x86/quantize_avx_x86_64.asm
......
...@@ -525,13 +525,12 @@ if (aom_config("CONFIG_AOM_QM") eq "yes") { ...@@ -525,13 +525,12 @@ if (aom_config("CONFIG_AOM_QM") eq "yes") {
add_proto qw/void aom_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr"; add_proto qw/void aom_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") { add_proto qw/void aom_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
add_proto qw/void aom_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
add_proto qw/void aom_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
add_proto qw/void aom_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr"; add_proto qw/void aom_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
add_proto qw/void aom_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr";
} # CONFIG_HIGHBITDEPTH
} # CONFIG_AV1_ENCODER } # CONFIG_AV1_ENCODER
} else { } else {
if (aom_config("CONFIG_AV1_ENCODER") eq "yes") { if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
...@@ -543,15 +542,14 @@ if (aom_config("CONFIG_AOM_QM") eq "yes") { ...@@ -543,15 +542,14 @@ if (aom_config("CONFIG_AOM_QM") eq "yes") {
add_proto qw/void aom_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; add_proto qw/void aom_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") { add_proto qw/void aom_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
add_proto qw/void aom_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; specialize qw/aom_highbd_quantize_b sse2 avx2/;
specialize qw/aom_highbd_quantize_b sse2 avx2/;
add_proto qw/void aom_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/aom_highbd_quantize_b_32x32 sse2/;
add_proto qw/void aom_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; add_proto qw/void aom_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
specialize qw/aom_highbd_quantize_b_32x32 sse2/;
add_proto qw/void aom_highbd_quantize_b_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
} # CONFIG_HIGHBITDEPTH
} # CONFIG_AV1_ENCODER } # CONFIG_AV1_ENCODER
} # CONFIG_AOM_QM } # CONFIG_AOM_QM
if (aom_config("CONFIG_AV1") eq "yes") { if (aom_config("CONFIG_AV1") eq "yes") {
......
...@@ -256,7 +256,6 @@ void aom_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block, ...@@ -256,7 +256,6 @@ void aom_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block,
} }
#endif // CONFIG_TX64X64 #endif // CONFIG_TX64X64
#if CONFIG_HIGHBITDEPTH
void aom_highbd_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, void aom_highbd_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs,
int skip_block, const int16_t *round_ptr, int skip_block, const int16_t *round_ptr,
const int16_t quant, tran_low_t *qcoeff_ptr, const int16_t quant, tran_low_t *qcoeff_ptr,
...@@ -523,7 +522,6 @@ void aom_highbd_quantize_b_64x64_c( ...@@ -523,7 +522,6 @@ void aom_highbd_quantize_b_64x64_c(
*eob_ptr = eob + 1; *eob_ptr = eob + 1;
} }
#endif // CONFIG_TX64X64 #endif // CONFIG_TX64X64
#endif // CONFIG_HIGHBITDEPTH
#else // CONFIG_AOM_QM #else // CONFIG_AOM_QM
...@@ -602,7 +600,6 @@ void aom_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block, ...@@ -602,7 +600,6 @@ void aom_quantize_dc_64x64(const tran_low_t *coeff_ptr, int skip_block,
} }
#endif // CONFIG_TX64X64 #endif // CONFIG_TX64X64
#if CONFIG_HIGHBITDEPTH
void aom_highbd_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, void aom_highbd_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs,
int skip_block, const int16_t *round_ptr, int skip_block, const int16_t *round_ptr,
const int16_t quant, tran_low_t *qcoeff_ptr, const int16_t quant, tran_low_t *qcoeff_ptr,
...@@ -825,5 +822,4 @@ void aom_highbd_quantize_b_64x64_c( ...@@ -825,5 +822,4 @@ void aom_highbd_quantize_b_64x64_c(
*eob_ptr = eob + 1; *eob_ptr = eob + 1;
} }
#endif // CONFIG_TX64X64 #endif // CONFIG_TX64X64
#endif // CONFIG_HIGHBITDEPTH
#endif // CONFIG_AOM_QM #endif // CONFIG_AOM_QM
...@@ -15,7 +15,6 @@ ...@@ -15,7 +15,6 @@
#include "aom_mem/aom_mem.h" #include "aom_mem/aom_mem.h"
#include "aom_ports/mem.h" #include "aom_ports/mem.h"
#if CONFIG_HIGHBITDEPTH
void aom_highbd_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t count, void aom_highbd_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t count,
int skip_block, const int16_t *zbin_ptr, int skip_block, const int16_t *zbin_ptr,
const int16_t *round_ptr, const int16_t *round_ptr,
...@@ -152,4 +151,3 @@ void aom_highbd_quantize_b_32x32_sse2( ...@@ -152,4 +151,3 @@ void aom_highbd_quantize_b_32x32_sse2(
} }
*eob_ptr = eob + 1; *eob_ptr = eob + 1;
} }
#endif
...@@ -125,10 +125,10 @@ endif ...@@ -125,10 +125,10 @@ endif
AV1_CX_SRCS-$(HAVE_SSE2) += encoder/x86/av1_quantize_sse2.c AV1_CX_SRCS-$(HAVE_SSE2) += encoder/x86/av1_quantize_sse2.c
AV1_CX_SRCS-$(HAVE_AVX2) += encoder/x86/av1_quantize_avx2.c AV1_CX_SRCS-$(HAVE_AVX2) += encoder/x86/av1_quantize_avx2.c
AV1_CX_SRCS-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm AV1_CX_SRCS-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm
ifeq ($(CONFIG_HIGHBITDEPTH),yes)
AV1_CX_SRCS-$(HAVE_SSE2) += encoder/x86/highbd_block_error_intrin_sse2.c AV1_CX_SRCS-$(HAVE_SSE2) += encoder/x86/highbd_block_error_intrin_sse2.c
AV1_CX_SRCS-$(HAVE_AVX2) += encoder/x86/av1_highbd_quantize_avx2.c AV1_CX_SRCS-$(HAVE_AVX2) += encoder/x86/av1_highbd_quantize_avx2.c
endif
AV1_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.asm AV1_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.asm
AV1_CX_SRCS-$(HAVE_SSE2) += encoder/x86/error_sse2.asm AV1_CX_SRCS-$(HAVE_SSE2) += encoder/x86/error_sse2.asm
...@@ -140,10 +140,10 @@ endif ...@@ -140,10 +140,10 @@ endif
AV1_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_intrin_sse2.c AV1_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_intrin_sse2.c
AV1_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/dct_ssse3.c AV1_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/dct_ssse3.c
AV1_CX_SRCS-$(HAVE_AVX2) += encoder/x86/hybrid_fwd_txfm_avx2.c AV1_CX_SRCS-$(HAVE_AVX2) += encoder/x86/hybrid_fwd_txfm_avx2.c
ifeq ($(CONFIG_HIGHBITDEPTH),yes)
AV1_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/av1_highbd_quantize_sse4.c AV1_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/av1_highbd_quantize_sse4.c
AV1_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/highbd_fwd_txfm_sse4.c AV1_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/highbd_fwd_txfm_sse4.c
endif
ifeq ($(CONFIG_EXT_INTER),yes) ifeq ($(CONFIG_EXT_INTER),yes)
AV1_CX_SRCS-yes += encoder/wedge_utils.c AV1_CX_SRCS-yes += encoder/wedge_utils.c
......
...@@ -417,25 +417,23 @@ if (aom_config("CONFIG_DPCM_INTRA") eq "yes") { ...@@ -417,25 +417,23 @@ if (aom_config("CONFIG_DPCM_INTRA") eq "yes") {
} }
} }
if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") { #fwd txfm
#fwd txfm add_proto qw/void av1_fwd_txfm2d_4x8/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
add_proto qw/void av1_fwd_txfm2d_4x8/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd"; add_proto qw/void av1_fwd_txfm2d_8x4/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
add_proto qw/void av1_fwd_txfm2d_8x4/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd"; add_proto qw/void av1_fwd_txfm2d_8x16/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
add_proto qw/void av1_fwd_txfm2d_8x16/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd"; add_proto qw/void av1_fwd_txfm2d_16x8/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
add_proto qw/void av1_fwd_txfm2d_16x8/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd"; add_proto qw/void av1_fwd_txfm2d_16x32/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
add_proto qw/void av1_fwd_txfm2d_16x32/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd"; add_proto qw/void av1_fwd_txfm2d_32x16/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
add_proto qw/void av1_fwd_txfm2d_32x16/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd"; add_proto qw/void av1_fwd_txfm2d_4x4/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
add_proto qw/void av1_fwd_txfm2d_4x4/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd"; specialize qw/av1_fwd_txfm2d_4x4 sse4_1/;
specialize qw/av1_fwd_txfm2d_4x4 sse4_1/; add_proto qw/void av1_fwd_txfm2d_8x8/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
add_proto qw/void av1_fwd_txfm2d_8x8/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd"; specialize qw/av1_fwd_txfm2d_8x8 sse4_1/;
specialize qw/av1_fwd_txfm2d_8x8 sse4_1/; add_proto qw/void av1_fwd_txfm2d_16x16/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
add_proto qw/void av1_fwd_txfm2d_16x16/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd"; specialize qw/av1_fwd_txfm2d_16x16 sse4_1/;
specialize qw/av1_fwd_txfm2d_16x16 sse4_1/; add_proto qw/void av1_fwd_txfm2d_32x32/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
add_proto qw/void av1_fwd_txfm2d_32x32/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd"; specialize qw/av1_fwd_txfm2d_32x32 sse4_1/;
specialize qw/av1_fwd_txfm2d_32x32 sse4_1/; add_proto qw/void av1_fwd_txfm2d_64x64/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd";
add_proto qw/void av1_fwd_txfm2d_64x64/, "const int16_t *input, int32_t *output, int stride, int tx_type, int bd"; specialize qw/av1_fwd_txfm2d_64x64 sse4_1/;
specialize qw/av1_fwd_txfm2d_64x64 sse4_1/;
}
# #
# Motion search # Motion search
...@@ -480,33 +478,34 @@ if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") { ...@@ -480,33 +478,34 @@ if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
add_proto qw/int64_t av1_highbd_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd"; add_proto qw/int64_t av1_highbd_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd";
specialize qw/av1_highbd_block_error sse2/; specialize qw/av1_highbd_block_error sse2/;
if (aom_config("CONFIG_AOM_QM") eq "yes") { # fdct functions
add_proto qw/void av1_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale"; if (aom_config("CONFIG_TX64X64") eq "yes") {
add_proto qw/void av1_highbd_fht64x64/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
}
add_proto qw/void av1_highbd_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale"; add_proto qw/void av1_highbd_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
if (aom_config("CONFIG_TX64X64") eq "yes") { }
add_proto qw/void av1_highbd_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale";
}
add_proto qw/void av1_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale"; if (aom_config("CONFIG_AOM_QM") eq "yes") {
} else { add_proto qw/void av1_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale";
add_proto qw/void av1_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale";
specialize qw/av1_highbd_quantize_fp sse4_1 avx2/;
add_proto qw/void av1_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale"; add_proto qw/void av1_highbd_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale";
}
# fdct functions
if (aom_config("CONFIG_TX64X64") eq "yes") { if (aom_config("CONFIG_TX64X64") eq "yes") {
add_proto qw/void av1_highbd_fht64x64/, "const int16_t *input, tran_low_t *output, int stride, int tx_type"; add_proto qw/void av1_highbd_quantize_fp_64x64/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale";
} }
add_proto qw/void av1_highbd_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride"; add_proto qw/void av1_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, const qm_val_t * qm_ptr, const qm_val_t * iqm_ptr, int log_scale";
} else {
add_proto qw/void av1_highbd_temporal_filter_apply/, "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count"; add_proto qw/void av1_highbd_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale";
specialize qw/av1_highbd_quantize_fp sse4_1 avx2/;
add_proto qw/void av1_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int log_scale";
} }
add_proto qw/void av1_highbd_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
# End av1_high encoder functions # End av1_high encoder functions
if (aom_config("CONFIG_EXT_INTER") eq "yes") { if (aom_config("CONFIG_EXT_INTER") eq "yes") {
......
...@@ -845,7 +845,6 @@ void av1_quantize_dc_nuq_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs, ...@@ -845,7 +845,6 @@ void av1_quantize_dc_nuq_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
} }
#endif // CONFIG_NEW_QUANT #endif // CONFIG_NEW_QUANT
#if CONFIG_HIGHBITDEPTH
void av1_highbd_quantize_fp_facade(const tran_low_t *coeff_ptr, void av1_highbd_quantize_fp_facade(const tran_low_t *coeff_ptr,
intptr_t n_coeffs, const MACROBLOCK_PLANE *p, intptr_t n_coeffs, const MACROBLOCK_PLANE *p,
tran_low_t *qcoeff_ptr, tran_low_t *qcoeff_ptr,
...@@ -951,7 +950,6 @@ void av1_highbd_quantize_b_facade(const tran_low_t *coeff_ptr, ...@@ -951,7 +950,6 @@ void av1_highbd_quantize_b_facade(const tran_low_t *coeff_ptr,
} }
} }
#if CONFIG_HIGHBITDEPTH
static INLINE void highbd_quantize_dc( static INLINE void highbd_quantize_dc(
const tran_low_t *coeff_ptr, int n_coeffs, int skip_block, const tran_low_t *coeff_ptr, int n_coeffs, int skip_block,
const int16_t *round_ptr, const int16_t quant, tran_low_t *qcoeff_ptr, const int16_t *round_ptr, const int16_t quant, tran_low_t *qcoeff_ptr,
...@@ -980,7 +978,6 @@ static INLINE void highbd_quantize_dc( ...@@ -980,7 +978,6 @@ static INLINE void highbd_quantize_dc(
} }
*eob_ptr = eob + 1; *eob_ptr = eob + 1;
} }
#endif // CONFIG_HIGHBITDEPTH
void av1_highbd_quantize_dc_facade(const tran_low_t *coeff_ptr, void av1_highbd_quantize_dc_facade(const tran_low_t *coeff_ptr,
intptr_t n_coeffs, const MACROBLOCK_PLANE *p, intptr_t n_coeffs, const MACROBLOCK_PLANE *p,
...@@ -1519,9 +1516,7 @@ void av1_highbd_quantize_dc_nuq_facade( ...@@ -1519,9 +1516,7 @@ void av1_highbd_quantize_dc_nuq_facade(
} }
} }
#endif // CONFIG_NEW_QUANT #endif // CONFIG_NEW_QUANT
#endif // CONFIG_HIGHBITDEPTH
#if CONFIG_HIGHBITDEPTH
void av1_highbd_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t count, void av1_highbd_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t count,
int skip_block, const int16_t *zbin_ptr, int skip_block, const int16_t *zbin_ptr,
const int16_t *round_ptr, const int16_t *round_ptr,
...@@ -1579,8 +1574,6 @@ void av1_highbd_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t count, ...@@ -1579,8 +1574,6 @@ void av1_highbd_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t count,
*eob_ptr = eob + 1; *eob_ptr = eob + 1;
} }
#endif // CONFIG_HIGHBITDEPTH
static void invert_quant(int16_t *quant, int16_t *shift, int d) { static void invert_quant(int16_t *quant, int16_t *shift, int d) {
uint32_t t; uint32_t t;
int l, m; int l, m;
......
...@@ -146,7 +146,6 @@ void av1_quantize_dc_nuq_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs, ...@@ -146,7 +146,6 @@ void av1_quantize_dc_nuq_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
const QUANT_PARAM *qparam); const QUANT_PARAM *qparam);
#endif // CONFIG_NEW_QUANT #endif // CONFIG_NEW_QUANT
#if CONFIG_HIGHBITDEPTH
void av1_highbd_quantize_fp_facade(const tran_low_t *coeff_ptr, void av1_highbd_quantize_fp_facade(const tran_low_t *coeff_ptr,
intptr_t n_coeffs, const MACROBLOCK_PLANE *p, intptr_t n_coeffs, const MACROBLOCK_PLANE *p,
tran_low_t *qcoeff_ptr, tran_low_t *qcoeff_ptr,
...@@ -190,7 +189,6 @@ void av1_highbd_quantize_dc_nuq_facade( ...@@ -190,7 +189,6 @@ void av1_highbd_quantize_dc_nuq_facade(
tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const SCAN_ORDER *sc, tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, const SCAN_ORDER *sc,
const QUANT_PARAM *qparam); const QUANT_PARAM *qparam);
#endif // CONFIG_NEW_QUANT #endif // CONFIG_NEW_QUANT
#endif // CONFIG_HIGHBITDEPTH
#ifdef __cplusplus #ifdef __cplusplus
} // extern "C" } // extern "C"
......
...@@ -2020,12 +2020,10 @@ void av1_fht16x16_c(const int16_t *input, tran_low_t *output, int stride, ...@@ -2020,12 +2020,10 @@ void av1_fht16x16_c(const int16_t *input, tran_low_t *output, int stride,
} }
} }
#if CONFIG_HIGHBITDEPTH
void av1_highbd_fwht4x4_c(const int16_t *input, tran_low_t *output, void av1_highbd_fwht4x4_c(const int16_t *input, tran_low_t *output,
int stride) { int stride) {
av1_fwht4x4_c(input, output, stride); av1_fwht4x4_c(input, output, stride);
} }
#endif // CONFIG_HIGHBITDEPTH
void av1_fht32x32_c(const int16_t *input, tran_low_t *output, int stride, void av1_fht32x32_c(const int16_t *input, tran_low_t *output, int stride,
int tx_type) { int tx_type) {
......
...@@ -494,7 +494,6 @@ int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int block, ...@@ -494,7 +494,6 @@ int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int block,
} }
#if !CONFIG_PVQ #if !CONFIG_PVQ
#if CONFIG_HIGHBITDEPTH
typedef enum QUANT_FUNC { typedef enum QUANT_FUNC {
QUANT_FUNC_LOWBD = 0, QUANT_FUNC_LOWBD = 0,
QUANT_FUNC_HIGHBD = 1, QUANT_FUNC_HIGHBD = 1,
...@@ -514,29 +513,12 @@ static AV1_QUANT_FACADE ...@@ -514,29 +513,12 @@ static AV1_QUANT_FACADE
#endif // !CONFIG_NEW_QUANT #endif // !CONFIG_NEW_QUANT
{ NULL, NULL } { NULL, NULL }
}; };
#endif // !CONFIG_PVQ
#else typedef void (*fwdTxfmFunc)(const int16_t *diff, tran_low_t *coeff, int stride,
FWD_TXFM_PARAM *param);
typedef enum QUANT_FUNC { static const fwdTxfmFunc fwd_txfm_func[2] = { av1_fwd_txfm,
QUANT_FUNC_LOWBD = 0, av1_highbd_fwd_txfm };
QUANT_FUNC_TYPES = 1
} QUANT_FUNC;
static AV1_QUANT_FACADE quant_func_list[AV1_XFORM_QUANT_TYPES]
[QUANT_FUNC_TYPES] = {
#if !CONFIG_NEW_QUANT
{ av1_quantize_fp_facade },
{ av1_quantize_b_facade },
{ av1_quantize_dc_facade },
#else // !CONFIG_NEW_QUANT
{ av1_quantize_fp_nuq_facade },
{ av1_quantize_b_nuq_facade },
{ av1_quantize_dc_nuq_facade },
#endif // !CONFIG_NEW_QUANT
{ NULL }
};
#endif // CONFIG_HIGHBITDEPTH
#endif // CONFIG_PVQ
void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block, void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block,
int blk_row, int blk_col, BLOCK_SIZE plane_bsize, int blk_row, int blk_col, BLOCK_SIZE plane_bsize,
...@@ -668,29 +650,13 @@ void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block, ...@@ -668,29 +650,13 @@ void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block,
fwd_txfm_param.lossless = xd->lossless[mbmi->segment_id]; fwd_txfm_param.lossless = xd->lossless[mbmi->segment_id];
#if !CONFIG_PVQ #if !CONFIG_PVQ
#if CONFIG_HIGHBITDEPTH
fwd_txfm_param.bd = xd->bd; fwd_txfm_param.bd = xd->bd;
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { const int is_hbd = get_bitdepth_data_path_index(xd);
av1_highbd_fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param); fwd_txfm_func[is_hbd](src_diff, coeff, diff_stride, &fwd_txfm_param);
if (xform_quant_idx != AV1_XFORM_QUANT_SKIP_QUANT) {
if (LIKELY(!x->skip_block)) {
quant_func_list[xform_quant_idx][QUANT_FUNC_HIGHBD](
coeff, tx2d_size, p, qcoeff, pd, dqcoeff, eob, scan_order, &qparam);
} else {
av1_quantize_skip(tx2d_size, qcoeff, dqcoeff, eob);
}
}
#if CONFIG_LV_MAP
p->txb_entropy_ctx[block] =
(uint8_t)av1_get_txb_entropy_context(qcoeff, scan_order, *eob);
#endif // CONFIG_LV_MAP
return;
}
#endif // CONFIG_HIGHBITDEPTH
av1_fwd_txfm(src_diff, coeff, diff_stride, &fwd_txfm_param);
if (xform_quant_idx != AV1_XFORM_QUANT_SKIP_QUANT) { if (xform_quant_idx != AV1_XFORM_QUANT_SKIP_QUANT) {
if (LIKELY(!x->skip_block)) { if (LIKELY(!x->skip_block)) {
quant_func_list[xform_quant_idx][QUANT_FUNC_LOWBD]( quant_func_list[xform_quant_idx][is_hbd](
coeff, tx2d_size, p, qcoeff, pd, dqcoeff, eob, scan_order, &qparam); coeff, tx2d_size, p, qcoeff, pd, dqcoeff, eob, scan_order, &qparam);
} else { } else {
av1_quantize_skip(tx2d_size, qcoeff, dqcoeff, eob); av1_quantize_skip(tx2d_size, qcoeff, dqcoeff, eob);
...@@ -700,7 +666,8 @@ void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block, ...@@ -700,7 +666,8 @@ void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block,
p->txb_entropy_ctx[block] = p->txb_entropy_ctx[block] =
(uint8_t)av1_get_txb_entropy_context(qcoeff, scan_order, *eob); (uint8_t)av1_get_txb_entropy_context(qcoeff, scan_order, *eob);
#endif // CONFIG_LV_MAP #endif // CONFIG_LV_MAP
#else // #if !CONFIG_PVQ return;
#else // CONFIG_PVQ
(void)xform_quant_idx; (void)xform_quant_idx;
#if CONFIG_HIGHBITDEPTH #if CONFIG_HIGHBITDEPTH
fwd_txfm_param.bd = xd->bd; fwd_txfm_param.bd = xd->bd;
......
...@@ -203,7 +203,6 @@ static void fwd_txfm_8x32(const int16_t *src_diff, tran_low_t *coeff, ...@@ -203,7 +203,6 @@ static void fwd_txfm_8x32(const int16_t *src_diff, tran_low_t *coeff,
} }
#endif // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT #endif // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT
#if CONFIG_HIGHBITDEPTH
#if CONFIG_CHROMA_2X2 #if CONFIG_CHROMA_2X2
static void highbd_fwd_txfm_2x2(const int16_t *src_diff, tran_low_t *coeff, static void highbd_fwd_txfm_2x2(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TX_TYPE tx_type, int lossless, int diff_stride, TX_TYPE tx_type, int lossless,
...@@ -237,6 +236,7 @@ static void highbd_fwd_txfm_2x2(const int16_t *src_diff, tran_low_t *coeff, ...@@ -237,6 +236,7 @@ static void highbd_fwd_txfm_2x2(const int16_t *src_diff, tran_low_t *coeff,
static void highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff, static void highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TX_TYPE tx_type, int lossless, int diff_stride, TX_TYPE tx_type, int lossless,
const int bd) { const int bd) {
int32_t *dst_coeff = (int32_t *)coeff;
if (lossless) { if (lossless) {
assert(tx_type == DCT_DCT); assert(tx_type == DCT_DCT);
av1_highbd_fwht4x4(src_diff, coeff, diff_stride); av1_highbd_fwht4x4(src_diff, coeff, diff_stride);
...@@ -248,7 +248,7 @@ static void highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff, ...@@ -248,7 +248,7 @@ static void highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
case DCT_ADST: case DCT_ADST:
case ADST_ADST: case ADST_ADST:
// fallthrough intended // fallthrough intended
av1_fwd_txfm2d_4x4(src_diff, coeff, diff_stride, tx_type, bd); av1_fwd_txfm2d_4x4(src_diff, dst_coeff, diff_stride, tx_type, bd);
break; break;
#if CONFIG_EXT_TX #if CONFIG_EXT_TX
case FLIPADST_DCT: case FLIPADST_DCT:
...@@ -257,7 +257,7 @@ static void highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff, ...@@ -257,7 +257,7 @@ static void highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
case ADST_FLIPADST: case ADST_FLIPADST:
case FLIPADST_ADST: case FLIPADST_ADST:
// fallthrough intended // fallthrough intended
av1_fwd_txfm2d_4x4(src_diff, coeff, diff_stride, tx_type, bd); av1_fwd_txfm2d_4x4(src_diff, dst_coeff, diff_stride, tx_type, bd);
break; break;
// use the c version for anything including identity for now // use the c version for anything including identity for now
case V_DCT: case V_DCT:
...@@ -268,7 +268,7 @@ static void highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff, ...@@ -268,7 +268,7 @@ static void highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff,
case H_FLIPADST: case H_FLIPADST:
case IDTX: case IDTX:
// fallthrough intended // fallthrough intended
av1_fwd_txfm2d_4x4_c(src_diff, coeff, diff_stride, tx_type, bd); av1_fwd_txfm2d_4x4_c(src_diff, dst_coeff, diff_stride, tx_type, bd);
break; break;
#endif // CONFIG_EXT_TX #endif // CONFIG_EXT_TX
default: assert(0); default: assert(0);
...@@ -279,47 +279,54 @@ static void highbd_fwd_txfm_4x8(const int16_t *src_diff, tran_low_t *coeff, ...@@ -279,47 +279,54 @@ static void highbd_fwd_txfm_4x8(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TX_TYPE tx_type, int diff_stride, TX_TYPE tx_type,
FWD_TXFM_OPT fwd_txfm_opt, const int bd) { FWD_TXFM_OPT fwd_txfm_opt, const int bd) {
(void)fwd_txfm_opt; (void)fwd_txfm_opt;
av1_fwd_txfm2d_4x8_c(src_diff, coeff, diff_stride, tx_type, bd); int32_t *dst_coeff = (int32_t *)coeff;
av1_fwd_txfm2d_4x8_c(src_diff, dst_coeff, diff_stride, tx_type, bd);
} }
static void highbd_fwd_txfm_8x4(const int16_t *src_diff, tran_low_t *coeff, static void highbd_fwd_txfm_8x4(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TX_TYPE tx_type, int diff_stride, TX_TYPE tx_type,
FWD_TXFM_OPT fwd_txfm_opt, const int bd) { FWD_TXFM_OPT fwd_txfm_opt, const int bd) {
(void)fwd_txfm_opt; (void)fwd_txfm_opt;
av1_fwd_txfm2d_8x4_c(src_diff, coeff, diff_stride, tx_type, bd); int32_t *dst_coeff = (int32_t *)coeff;
av1_fwd_txfm2d_8x4_c(src_diff, dst_coeff, diff_stride, tx_type, bd);
} }
static void highbd_fwd_txfm_8x16(const int16_t *src_diff, tran_low_t *coeff, static void highbd_fwd_txfm_8x16(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TX_TYPE tx_type, int diff_stride, TX_TYPE tx_type,
FWD_TXFM_OPT fwd_txfm_opt, const int bd) { FWD_TXFM_OPT fwd_txfm_opt, const int bd) {
(void)fwd_txfm_opt; (void)fwd_txfm_opt;
av1_fwd_txfm2d_8x16_c(src_diff, coeff, diff_stride, tx_type, bd); int32_t *dst_coeff = (int32_t *)coeff