Commit 8b909189 authored by Scott LaVarnway's avatar Scott LaVarnway Committed by Yunqing Wang

Add av1_convolve_ x,y _avx2()

Based on av1_convolve_2d_avx2().  Special case added to call
the sse2 version which is faster for widths < 16.

Change-Id: Ia03ccb1c7d30e7b0d2ba7b36c7c8d5775a6d4e8f
parent e7f3b89f
......@@ -530,9 +530,9 @@ specialize qw/av1_convolve_2d_copy sse2/;
add_proto qw/void av1_convolve_2d_copy_sr/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
specialize qw/av1_convolve_2d_copy_sr c/;
add_proto qw/void av1_convolve_x/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
specialize qw/av1_convolve_x sse2/;
specialize qw/av1_convolve_x sse2 avx2/;
add_proto qw/void av1_convolve_y/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
specialize qw/av1_convolve_y sse2/;
specialize qw/av1_convolve_y sse2 avx2/;
add_proto qw/void av1_convolve_x_sr/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
specialize qw/av1_convolve_x_sr c/;
add_proto qw/void av1_convolve_y_sr/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
......
This diff is collapsed.
......@@ -26,6 +26,8 @@ using libaom_test::AV1HighbdConvolve2D::AV1HighbdJntConvolve2DTest;
namespace {
TEST_P(AV1Convolve2DTest, DISABLED_Speed) { RunSpeedTest(GET_PARAM(2)); }
TEST_P(AV1Convolve2DTest, CheckOutput) { RunCheckOutput(GET_PARAM(2)); }
INSTANTIATE_TEST_CASE_P(
......@@ -57,6 +59,14 @@ INSTANTIATE_TEST_CASE_P(
libaom_test::AV1Convolve2D::BuildParams(av1_convolve_2d_sse2, 1, 1, 1));
#if HAVE_AVX2
INSTANTIATE_TEST_CASE_P(
AVX2_X, AV1Convolve2DTest,
libaom_test::AV1Convolve2D::BuildParams(av1_convolve_x_avx2, 1, 0, 1));
INSTANTIATE_TEST_CASE_P(
AVX2_Y, AV1Convolve2DTest,
libaom_test::AV1Convolve2D::BuildParams(av1_convolve_y_avx2, 0, 1, 1));
INSTANTIATE_TEST_CASE_P(
AVX2, AV1Convolve2DTest,
libaom_test::AV1Convolve2D::BuildParams(av1_convolve_2d_avx2, 1, 1, 1));
......
......@@ -11,8 +11,9 @@
#include "test/av1_convolve_2d_test_util.h"
#include "av1/common/convolve.h"
#include "aom_ports/aom_timer.h"
#include "av1/common/common_data.h"
#include "av1/common/convolve.h"
using std::tr1::tuple;
using std::tr1::make_tuple;
......@@ -107,6 +108,51 @@ void AV1Convolve2DTest::RunCheckOutput(convolve_2d_func test_impl) {
delete[] output2;
}
void AV1Convolve2DTest::RunSpeedTest(convolve_2d_func test_impl) {
const int w = 128, h = 128;
const int out_w = GET_PARAM(0), out_h = GET_PARAM(1);
int i, j;
const int has_subx = GET_PARAM(3);
const int has_suby = GET_PARAM(4);
const int is_compound = GET_PARAM(5);
(void)is_compound;
uint8_t *input = new uint8_t[h * w];
int output_n = out_h * MAX_SB_SIZE;
CONV_BUF_TYPE *output2 = new CONV_BUF_TYPE[output_n];
for (i = 0; i < h; ++i)
for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand8();
int hfilter = EIGHTTAP_REGULAR, vfilter = EIGHTTAP_REGULAR;
int subx = 0, suby = 0;
InterpFilterParams filter_params_x =
av1_get_interp_filter_params((InterpFilter)hfilter);
InterpFilterParams filter_params_y =
av1_get_interp_filter_params((InterpFilter)vfilter);
const int do_average = 0;
ConvolveParams conv_params2 =
get_conv_params_no_round(0, do_average, 0, output2, MAX_SB_SIZE, 1);
int x;
aom_usec_timer timer;
aom_usec_timer_start(&timer);
for (x = 0; x < 100000; ++x)
test_impl(input, w, NULL, 0, out_w, out_h, &filter_params_x,
&filter_params_y, subx, suby, &conv_params2);
aom_usec_timer_mark(&timer);
const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
printf("%d,%d convolve w: %d h: %d time: %5d ms\n", has_subx, has_suby, out_w,
out_h, elapsed_time / 1000);
delete[] input;
delete[] output2;
}
#if CONFIG_JNT_COMP
AV1JntConvolve2DTest::~AV1JntConvolve2DTest() {}
void AV1JntConvolve2DTest::SetUp() {
......
......@@ -46,6 +46,7 @@ class AV1Convolve2DTest : public ::testing::TestWithParam<Convolve2DParam> {
protected:
void RunCheckOutput(convolve_2d_func test_impl);
void RunSpeedTest(convolve_2d_func test_impl);
libaom_test::ACMRandom rnd_;
};
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment