Commit 3bd83775 authored by Yi Luo's avatar Yi Luo

High bit depth 32x32 inverse DCT_DCT transform, AVX2

- Witness the follow user-level speedup on AV1 baseline:
 Encoding time reduction: 4.26%
 Decoding time reduction: 25.35%

Change-Id: Ideaf3cd473ad45ed9256c80d5a5daed0a6e098cf
parent b2d26453
......@@ -144,6 +144,8 @@ AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/av1_fwd_txfm2d_sse4.c
endif
ifeq ($(CONFIG_AOM_HIGHBITDEPTH),yes)
AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/highbd_txfm_utility_sse4.h
AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/highbd_inv_txfm_sse4.c
AV1_COMMON_SRCS-$(HAVE_AVX2) += common/x86/highbd_inv_txfm_avx2.c
endif
ifneq ($(CONFIG_AOM_HIGHBITDEPTH),yes)
......
......@@ -140,7 +140,6 @@ AV1_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/dct_ssse3.c
AV1_CX_SRCS-$(HAVE_AVX2) += encoder/x86/hybrid_fwd_txfm_avx2.c
ifeq ($(CONFIG_AOM_HIGHBITDEPTH),yes)
AV1_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/highbd_fwd_txfm_sse4.c
AV1_CX_SRCS-$(HAVE_SSE4_1) += common/x86/highbd_inv_txfm_sse4.c
AV1_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/av1_highbd_quantize_sse4.c
endif
......
......@@ -565,7 +565,7 @@ if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") {
add_proto qw/void av1_inv_txfm2d_add_16x16/, "const int32_t *input, uint16_t *output, int stride, int tx_type, int bd";
specialize qw/av1_inv_txfm2d_add_16x16 sse4_1/;
add_proto qw/void av1_inv_txfm2d_add_32x32/, "const int32_t *input, uint16_t *output, int stride, int tx_type, int bd";
specialize qw/av1_inv_txfm2d_add_32x32/;
specialize qw/av1_inv_txfm2d_add_32x32 avx2/;
add_proto qw/void av1_inv_txfm2d_add_64x64/, "const int32_t *input, uint16_t *output, int stride, int tx_type, int bd";
specialize qw/av1_inv_txfm2d_add_64x64/;
}
......
This diff is collapsed.
......@@ -86,6 +86,8 @@ class AV1HighbdInvHTNxN : public ::testing::TestWithParam<IHbdHtParam> {
return 8;
} else if (256 == num_coeffs_) {
return 16;
} else if (1024 == num_coeffs_) {
return 32;
} else {
return 0;
}
......@@ -216,4 +218,19 @@ INSTANTIATE_TEST_CASE_P(SSE4_1, AV1HighbdInvHTNxN,
::testing::ValuesIn(kArrayIhtParam));
#endif // HAVE_SSE4_1 && CONFIG_AOM_HIGHBITDEPTH
#if HAVE_AVX2 && CONFIG_AOM_HIGHBITDEPTH
#define PARAM_LIST_32X32 \
&av1_fwd_txfm2d_32x32_c, &av1_inv_txfm2d_add_32x32_avx2, \
&av1_inv_txfm2d_add_32x32_c, 1024
const IHbdHtParam kArrayIhtParam32x32[] = {
// 32x32
make_tuple(PARAM_LIST_32X32, DCT_DCT, 10),
make_tuple(PARAM_LIST_32X32, DCT_DCT, 12),
};
INSTANTIATE_TEST_CASE_P(AVX2, AV1HighbdInvHTNxN,
::testing::ValuesIn(kArrayIhtParam32x32));
#endif // HAVE_AVX2 && CONFIG_AOM_HIGHBITDEPTH
} // namespace
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment