Commit d6db46b4 authored by Maxym Dmytrychenko's avatar Maxym Dmytrychenko

AVX2 implementation for convolve_2d

Can be up to 40% faster with bit exact results

Change-Id: Ia67ba154222fdfb6173bf8942275649e511abe43
parent 771a80ab
......@@ -301,6 +301,10 @@ if (CONFIG_CONVOLVE_ROUND)
${AOM_AV1_COMMON_INTRIN_SSE2}
"${AOM_ROOT}/av1/common/x86/convolve_2d_sse2.c")
set(AOM_AV1_COMMON_INTRIN_AVX2
${AOM_AV1_COMMON_INTRIN_AVX2}
"${AOM_ROOT}/av1/common/x86/convolve_2d_avx2.c")
if (CONFIG_HIGHBITDEPTH)
set(AOM_AV1_COMMON_INTRIN_SSSE3
${AOM_AV1_COMMON_INTRIN_SSSE3}
......
......@@ -162,6 +162,7 @@ endif
ifeq ($(CONFIG_CONVOLVE_ROUND),yes)
AV1_COMMON_SRCS-$(HAVE_SSE2) += common/x86/convolve_2d_sse2.c
AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/convolve_2d_sse4.c
AV1_COMMON_SRCS-$(HAVE_AVX2) += common/x86/convolve_2d_avx2.c
ifeq ($(CONFIG_HIGHBITDEPTH),yes)
AV1_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/highbd_convolve_2d_ssse3.c
endif
......
......@@ -566,7 +566,7 @@ if (aom_config("CONFIG_LOOP_RESTORATION") eq "yes") {
if (aom_config("CONFIG_CONVOLVE_ROUND") eq "yes") {
add_proto qw/void av1_convolve_2d/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
specialize qw/av1_convolve_2d sse2/;
specialize qw/av1_convolve_2d sse2 avx2/;
add_proto qw/void av1_convolve_rounding/, "const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits";
specialize qw/av1_convolve_rounding avx2/;
......
This diff is collapsed.
......@@ -37,6 +37,9 @@ TEST_P(AV1Convolve2DTest, CheckOutput) { RunCheckOutput(GET_PARAM(2)); }
INSTANTIATE_TEST_CASE_P(
SSE2, AV1Convolve2DTest,
libaom_test::AV1Convolve2D::BuildParams(av1_convolve_2d_sse2));
INSTANTIATE_TEST_CASE_P(
AVX2, AV1Convolve2DTest,
libaom_test::AV1Convolve2D::BuildParams(av1_convolve_2d_avx2));
#endif // CONFIG_JNT_COMP
#if CONFIG_HIGHBITDEPTH && HAVE_SSSE3
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment