Commit fbaf5135 authored by Cheng Chen's avatar Cheng Chen

JNT_COMP: SIMD for av1_warp_affine

Add low bit-depth SIMD function for av1_warp_affine based on
existing SIMD implementation.
Unit tests are added.

Change-Id: I1b4033fa75b53a81cb20a4bb5cc60413708b568c
parent 6147b1b6
......@@ -425,6 +425,12 @@ set(AOM_AV1_COMMON_INTRIN_SSSE3
${AOM_AV1_COMMON_INTRIN_SSSE3}
"${AOM_ROOT}/av1/common/x86/warp_plane_ssse3.c")
if (CONFIG_JNT_COMP)
set(AOM_AV1_COMMON_INTRIN_SSE4_1
${AOM_AV1_COMMON_INTRIN_SSE4_1}
"${AOM_ROOT}/av1/common/x86/warp_plane_sse4.c")
endif ()
if (CONFIG_HIGHBITDEPTH)
set(AOM_AV1_COMMON_INTRIN_SSSE3
${AOM_AV1_COMMON_INTRIN_SSSE3}
......
......@@ -147,6 +147,9 @@ endif
AV1_COMMON_SRCS-$(HAVE_SSE2) += common/x86/warp_plane_sse2.c
AV1_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/warp_plane_ssse3.c
ifeq ($(CONFIG_JNT_COMP), yes)
AV1_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/warp_plane_sse4.c
endif
ifeq ($(CONFIG_HIGHBITDEPTH),yes)
AV1_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/highbd_warp_plane_ssse3.c
endif
......
......@@ -502,7 +502,14 @@ if ($opts{config} !~ /libs-x86-win32-vs.*/) {
# WARPED_MOTION / GLOBAL_MOTION functions
add_proto qw/void av1_warp_affine/, "const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta";
specialize qw/av1_warp_affine sse2 ssse3/;
if (aom_config("CONFIG_JNT_COMP") eq "yes") {
if (aom_config("CONFIG_JNT_COMP") eq "yes") {
specialize qw/av1_warp_affine sse4_1/;
}
} else {
specialize qw/av1_warp_affine sse2 ssse3/;
}
if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
add_proto qw/void av1_highbd_warp_affine/, "const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta";
......
This diff is collapsed.
......@@ -22,6 +22,16 @@ using libaom_test::AV1HighbdWarpFilter::AV1HighbdWarpFilterTest;
namespace {
#if CONFIG_JNT_COMP && CONFIG_CONVOLVE_ROUND && HAVE_SSE4_1
TEST_P(AV1WarpFilterTest, CheckOutput) { RunCheckOutput(GET_PARAM(3)); }
INSTANTIATE_TEST_CASE_P(
SSE4_1, AV1WarpFilterTest,
libaom_test::AV1WarpFilter::BuildParams(av1_warp_affine_sse4_1));
// TODO(chengchen): add unit tests for high bit depth
#else // CONFIG_JNT_COMP && CONFIG_CONVOLVE_ROUND && HAVE_SSE4_1
TEST_P(AV1WarpFilterTest, CheckOutput) { RunCheckOutput(GET_PARAM(3)); }
INSTANTIATE_TEST_CASE_P(
......@@ -42,5 +52,6 @@ TEST_P(AV1HighbdWarpFilterTest, CheckOutput) {
INSTANTIATE_TEST_CASE_P(SSSE3, AV1HighbdWarpFilterTest,
libaom_test::AV1HighbdWarpFilter::GetDefaultParams());
#endif
#endif // CONFIG_JNT_COMP && CONFIG_CONVOVLE_ROUND && HAVE_SSE4_1
} // namespace
......@@ -132,48 +132,76 @@ void AV1WarpFilterTest::RunCheckOutput(warp_affine_func test_impl) {
for (sub_x = 0; sub_x < 2; ++sub_x)
for (sub_y = 0; sub_y < 2; ++sub_y) {
generate_model(mat, &alpha, &beta, &gamma, &delta);
#if CONFIG_JNT_COMP && CONFIG_CONVOLVE_ROUND
for (int ii = 0; ii < 2; ++ii) {
for (int jj = 0; jj < 5; ++jj) {
#endif // CONFIG_JNT_COMP && CONFIG_CONVOLVE_ROUND
#if CONFIG_CONVOLVE_ROUND
if (use_no_round) {
// Prepare two copies of the destination
for (j = 0; j < out_w * out_h; ++j) {
int32_t v = rnd_.Rand16();
dsta[j] = v;
dstb[j] = v;
}
conv_params = get_conv_params_no_round(0, 0, 0, dsta, out_w);
} else {
conv_params = get_conv_params(0, 0, 0);
}
if (use_no_round) {
// Prepare two copies of the destination
for (j = 0; j < out_w * out_h; ++j) {
int32_t v = rnd_.Rand16();
dsta[j] = v;
dstb[j] = v;
}
conv_params = get_conv_params_no_round(0, 0, 0, dsta, out_w);
} else {
conv_params = get_conv_params(0, 0, 0);
}
#endif
av1_warp_affine_c(mat, input, w, h, stride, output, 32, 32, out_w,
out_h, out_w, sub_x, sub_y, &conv_params, alpha, beta,
gamma, delta);
#if CONFIG_JNT_COMP && CONFIG_CONVOLVE_ROUND
if (jj >= 4) {
conv_params.fwd_offset = -1;
conv_params.bck_offset = -1;
} else {
conv_params.fwd_offset = quant_dist_lookup_table[ii][jj][0];
conv_params.bck_offset = quant_dist_lookup_table[ii][jj][1];
}
#endif // CONFIG_JNT_COMP && CONFIG_CONVOLVE_ROUND
av1_warp_affine_c(mat, input, w, h, stride, output, 32, 32, out_w,
out_h, out_w, sub_x, sub_y, &conv_params, alpha,
beta, gamma, delta);
#if CONFIG_CONVOLVE_ROUND
if (use_no_round) {
conv_params = get_conv_params_no_round(0, 0, 0, dstb, out_w);
}
if (use_no_round) {
conv_params = get_conv_params_no_round(0, 0, 0, dstb, out_w);
}
#endif
test_impl(mat, input, w, h, stride, output2, 32, 32, out_w, out_h,
out_w, sub_x, sub_y, &conv_params, alpha, beta, gamma, delta);
#if CONFIG_JNT_COMP && CONFIG_CONVOLVE_ROUND
if (jj >= 4) {
conv_params.fwd_offset = -1;
conv_params.bck_offset = -1;
} else {
conv_params.fwd_offset = quant_dist_lookup_table[ii][jj][0];
conv_params.bck_offset = quant_dist_lookup_table[ii][jj][1];
}
#endif // CONFIG_JNT_COMP && CONFIG_CONVOLVE_ROUND
test_impl(mat, input, w, h, stride, output2, 32, 32, out_w, out_h,
out_w, sub_x, sub_y, &conv_params, alpha, beta, gamma,
delta);
#if CONFIG_CONVOLVE_ROUND
if (use_no_round) {
for (j = 0; j < out_w * out_h; ++j)
ASSERT_EQ(dsta[j], dstb[j])
<< "Pixel mismatch at index " << j << " = (" << (j % out_w)
<< ", " << (j / out_w) << ") on iteration " << i;
} else {
for (j = 0; j < out_w * out_h; ++j)
ASSERT_EQ(output[j], output2[j])
<< "Pixel mismatch at index " << j << " = (" << (j % out_w)
<< ", " << (j / out_w) << ") on iteration " << i;
}
if (use_no_round) {
for (j = 0; j < out_w * out_h; ++j)
ASSERT_EQ(dsta[j], dstb[j])
<< "Pixel mismatch at index " << j << " = (" << (j % out_w)
<< ", " << (j / out_w) << ") on iteration " << i;
} else {
for (j = 0; j < out_w * out_h; ++j)
ASSERT_EQ(output[j], output2[j])
<< "Pixel mismatch at index " << j << " = (" << (j % out_w)
<< ", " << (j / out_w) << ") on iteration " << i;
}
#else
for (j = 0; j < out_w * out_h; ++j)
ASSERT_EQ(output[j], output2[j])
<< "Pixel mismatch at index " << j << " = (" << (j % out_w)
<< ", " << (j / out_w) << ") on iteration " << i;
#endif
#if CONFIG_JNT_COMP && CONFIG_CONVOLVE_ROUND
}
}
#endif // CONFIG_JNT_COMP && CONFIG_CONVOLVE_ROUND
}
}
delete[] input_;
......@@ -313,51 +341,77 @@ void AV1HighbdWarpFilterTest::RunCheckOutput(
for (sub_x = 0; sub_x < 2; ++sub_x)
for (sub_y = 0; sub_y < 2; ++sub_y) {
generate_model(mat, &alpha, &beta, &gamma, &delta);
#if CONFIG_JNT_COMP && CONFIG_CONVOLVE_ROUND
for (int ii = 0; ii < 2; ++ii) {
for (int jj = 0; jj < 5; ++jj) {
#endif // CONFIG_JNT_COMP && CONFIG_CONVOLVE_ROUND
#if CONFIG_CONVOLVE_ROUND
if (use_no_round) {
// Prepare two copies of the destination
for (j = 0; j < out_w * out_h; ++j) {
int32_t v = rnd_.Rand16();
dsta[j] = v;
dstb[j] = v;
}
conv_params = get_conv_params_no_round(0, 0, 0, dsta, out_w);
} else {
conv_params = get_conv_params(0, 0, 0);
}
if (use_no_round) {
// Prepare two copies of the destination
for (j = 0; j < out_w * out_h; ++j) {
int32_t v = rnd_.Rand16();
dsta[j] = v;
dstb[j] = v;
}
conv_params = get_conv_params_no_round(0, 0, 0, dsta, out_w);
} else {
conv_params = get_conv_params(0, 0, 0);
}
#endif
av1_highbd_warp_affine_c(mat, input, w, h, stride, output, 32, 32,
out_w, out_h, out_w, sub_x, sub_y, bd,
&conv_params, alpha, beta, gamma, delta);
#if CONFIG_JNT_COMP && CONFIG_CONVOLVE_ROUND
if (jj >= 4) {
conv_params.fwd_offset = -1;
conv_params.bck_offset = -1;
} else {
conv_params.fwd_offset = quant_dist_lookup_table[ii][jj][0];
conv_params.bck_offset = quant_dist_lookup_table[ii][jj][1];
}
#endif // CONFIG_JNT_COMP && CONFIG_CONVOLVE_ROUND
av1_highbd_warp_affine_c(mat, input, w, h, stride, output, 32, 32,
out_w, out_h, out_w, sub_x, sub_y, bd,
&conv_params, alpha, beta, gamma, delta);
#if CONFIG_CONVOLVE_ROUND
if (use_no_round) {
// TODO(angiebird): Change this to test_impl once we have SIMD
// implementation
conv_params = get_conv_params_no_round(0, 0, 0, dstb, out_w);
}
if (use_no_round) {
// TODO(angiebird): Change this to test_impl once we have SIMD
// implementation
conv_params = get_conv_params_no_round(0, 0, 0, dstb, out_w);
}
#endif
test_impl(mat, input, w, h, stride, output2, 32, 32, out_w, out_h,
out_w, sub_x, sub_y, bd, &conv_params, alpha, beta, gamma,
delta);
#if CONFIG_JNT_COMP && CONFIG_CONVOLVE_ROUND
if (jj >= 4) {
conv_params.fwd_offset = -1;
conv_params.bck_offset = -1;
} else {
conv_params.fwd_offset = quant_dist_lookup_table[ii][jj][0];
conv_params.bck_offset = quant_dist_lookup_table[ii][jj][1];
}
#endif // CONFIG_JNT_COMP && CONFIG_CONVOLVE_ROUND
test_impl(mat, input, w, h, stride, output2, 32, 32, out_w, out_h,
out_w, sub_x, sub_y, bd, &conv_params, alpha, beta, gamma,
delta);
#if CONFIG_CONVOLVE_ROUND
if (use_no_round) {
for (j = 0; j < out_w * out_h; ++j)
ASSERT_EQ(dsta[j], dstb[j])
<< "Pixel mismatch at index " << j << " = (" << (j % out_w)
<< ", " << (j / out_w) << ") on iteration " << i;
} else {
for (j = 0; j < out_w * out_h; ++j)
ASSERT_EQ(output[j], output2[j])
<< "Pixel mismatch at index " << j << " = (" << (j % out_w)
<< ", " << (j / out_w) << ") on iteration " << i;
}
if (use_no_round) {
for (j = 0; j < out_w * out_h; ++j)
ASSERT_EQ(dsta[j], dstb[j])
<< "Pixel mismatch at index " << j << " = (" << (j % out_w)
<< ", " << (j / out_w) << ") on iteration " << i;
} else {
for (j = 0; j < out_w * out_h; ++j)
ASSERT_EQ(output[j], output2[j])
<< "Pixel mismatch at index " << j << " = (" << (j % out_w)
<< ", " << (j / out_w) << ") on iteration " << i;
}
#else
for (j = 0; j < out_w * out_h; ++j)
ASSERT_EQ(output[j], output2[j])
<< "Pixel mismatch at index " << j << " = (" << (j % out_w)
<< ", " << (j / out_w) << ") on iteration " << i;
#endif
#if CONFIG_JNT_COMP && CONFIG_CONVOLVE_ROUND
}
}
#endif // CONFIG_JNT_COMP && CONFIG_CONVOLVE_ROUND
}
}
......
......@@ -21,6 +21,7 @@
#include "test/register_state_check.h"
#include "av1/common/mv.h"
#include "av1/common/common_data.h"
namespace libaom_test {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment