Commit fa904a09 authored by Imdad Sardharwalla's avatar Imdad Sardharwalla

Fix and clean up warp filter SIMD code

- Fixed SSE4.1 implementation of the highbd warp filter.
- Removed the SSE2 and SSSE3 implementations of both the lowbd and highbd
  versions of the warp filter, as these will not work with CONFIG_JNT_COMP=1.
  (There are now only SSE4.1 implementations)
- The warp filter tests have been correspondingly updated.

Change-Id: I2bc1ebb69f2ed876803902908586c3848c03c11e
parent aec60dd8
......@@ -427,29 +427,13 @@ set(AOM_AV1_COMMON_SOURCES
"${AOM_ROOT}/av1/common/warped_motion.c"
"${AOM_ROOT}/av1/common/warped_motion.h")
set(AOM_AV1_COMMON_INTRIN_SSE2
${AOM_AV1_COMMON_INTRIN_SSE2}
"${AOM_ROOT}/av1/common/x86/warp_plane_sse2.c")
set(AOM_AV1_COMMON_INTRIN_SSSE3
${AOM_AV1_COMMON_INTRIN_SSSE3}
"${AOM_ROOT}/av1/common/x86/warp_plane_ssse3.c")
set(AOM_AV1_COMMON_INTRIN_SSE4_1
${AOM_AV1_COMMON_INTRIN_SSE4_1}
"${AOM_ROOT}/av1/common/x86/warp_plane_sse4.c")
if (CONFIG_JNT_COMP)
set(AOM_AV1_COMMON_INTRIN_SSE4_1
${AOM_AV1_COMMON_INTRIN_SSE4_1}
"${AOM_ROOT}/av1/common/x86/warp_plane_sse4.c")
endif ()
set(AOM_AV1_COMMON_INTRIN_SSSE3
${AOM_AV1_COMMON_INTRIN_SSSE3}
"${AOM_ROOT}/av1/common/x86/highbd_warp_plane_ssse3.c")
if (CONFIG_JNT_COMP)
set(AOM_AV1_COMMON_INTRIN_SSE4_1
${AOM_AV1_COMMON_INTRIN_SSE4_1}
"${AOM_ROOT}/av1/common/x86/highbd_warp_plane_sse4.c")
endif ()
"${AOM_ROOT}/av1/common/x86/highbd_warp_plane_sse4.c")
if (CONFIG_HASH_ME)
set(AOM_AV1_ENCODER_SOURCES
......
......@@ -450,22 +450,10 @@ if ($opts{config} !~ /libs-x86-win32-vs.*/) {
# WARPED_MOTION / GLOBAL_MOTION functions
add_proto qw/void av1_warp_affine/, "const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta";
if (aom_config("CONFIG_JNT_COMP") eq "yes") {
specialize qw/av1_warp_affine sse4_1/;
} else {
specialize qw/av1_warp_affine sse2 ssse3/;
}
specialize qw/av1_warp_affine sse4_1/;
add_proto qw/void av1_highbd_warp_affine/, "const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, ConvolveParams *conv_params, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta";
if (aom_config("CONFIG_JNT_COMP") eq "yes") {
specialize qw/av1_highbd_warp_affine sse4_1/;
} else {
specialize qw/av1_highbd_warp_affine ssse3/;
}
if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
add_proto qw/double compute_cross_correlation/, "unsigned char *im1, int stride1, int x1, int y1, unsigned char *im2, int stride2, int x2, int y2";
......
......@@ -39,6 +39,8 @@ void av1_highbd_warp_affine_sse4_1(const int32_t *mat, const uint16_t *ref,
conv_params->do_post_rounding = 1;
}
assert(FILTER_BITS == FILTER_BITS);
assert(!(bd == 12 && reduce_bits_horiz < 5));
#if CONFIG_JNT_COMP
const int w0 = conv_params->fwd_offset;
const int w1 = conv_params->bck_offset;
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -20,39 +20,19 @@ using std::tr1::tuple;
namespace {
#if CONFIG_JNT_COMP && HAVE_SSE4_1
#if HAVE_SSE4_1
TEST_P(AV1WarpFilterTest, CheckOutput) { RunCheckOutput(GET_PARAM(3)); }
INSTANTIATE_TEST_CASE_P(
SSE4_1, AV1WarpFilterTest,
libaom_test::AV1WarpFilter::BuildParams(av1_warp_affine_sse4_1));
TEST_P(AV1HighbdWarpFilterTest, CheckOutput) {
RunCheckOutput(av1_highbd_warp_affine_sse4_1);
}
TEST_P(AV1HighbdWarpFilterTest, CheckOutput) { RunCheckOutput(GET_PARAM(4)); }
INSTANTIATE_TEST_CASE_P(SSE4_1, AV1HighbdWarpFilterTest,
libaom_test::AV1HighbdWarpFilter::GetDefaultParams());
libaom_test::AV1HighbdWarpFilter::BuildParams(
av1_highbd_warp_affine_sse4_1));
#else // CONFIG_JNT_COMP && HAVE_SSE4_1
TEST_P(AV1WarpFilterTest, CheckOutput) { RunCheckOutput(GET_PARAM(3)); }
INSTANTIATE_TEST_CASE_P(
SSE2, AV1WarpFilterTest,
libaom_test::AV1WarpFilter::BuildParams(av1_warp_affine_sse2));
#if HAVE_SSSE3
INSTANTIATE_TEST_CASE_P(
SSSE3, AV1WarpFilterTest,
libaom_test::AV1WarpFilter::BuildParams(av1_warp_affine_ssse3));
TEST_P(AV1HighbdWarpFilterTest, CheckOutput) {
RunCheckOutput(av1_highbd_warp_affine_ssse3);
}
INSTANTIATE_TEST_CASE_P(SSSE3, AV1HighbdWarpFilterTest,
libaom_test::AV1HighbdWarpFilter::GetDefaultParams());
#endif
#endif // CONFIG_JNT_COMP && CONFIG_CONVOVLE_ROUND && HAVE_SSE4_1
#endif // HAVE_SSE4_1
} // namespace
......@@ -203,18 +203,19 @@ void AV1WarpFilterTest::RunCheckOutput(warp_affine_func test_impl) {
namespace AV1HighbdWarpFilter {
::testing::internal::ParamGenerator<HighbdWarpTestParam> GetDefaultParams() {
const HighbdWarpTestParam defaultParams[] = {
make_tuple(4, 4, 100, 8), make_tuple(8, 8, 100, 8),
make_tuple(64, 64, 100, 8), make_tuple(4, 16, 100, 8),
make_tuple(32, 8, 100, 8), make_tuple(4, 4, 100, 10),
make_tuple(8, 8, 100, 10), make_tuple(64, 64, 100, 10),
make_tuple(4, 16, 100, 10), make_tuple(32, 8, 100, 10),
make_tuple(4, 4, 100, 12), make_tuple(8, 8, 100, 12),
make_tuple(64, 64, 100, 12), make_tuple(4, 16, 100, 12),
make_tuple(32, 8, 100, 12),
::testing::internal::ParamGenerator<HighbdWarpTestParam> BuildParams(
highbd_warp_affine_func filter) {
const HighbdWarpTestParam params[] = {
make_tuple(4, 4, 100, 8, filter), make_tuple(8, 8, 100, 8, filter),
make_tuple(64, 64, 100, 8, filter), make_tuple(4, 16, 100, 8, filter),
make_tuple(32, 8, 100, 8, filter), make_tuple(4, 4, 100, 10, filter),
make_tuple(8, 8, 100, 10, filter), make_tuple(64, 64, 100, 10, filter),
make_tuple(4, 16, 100, 10, filter), make_tuple(32, 8, 100, 10, filter),
make_tuple(4, 4, 100, 12, filter), make_tuple(8, 8, 100, 12, filter),
make_tuple(64, 64, 100, 12, filter), make_tuple(4, 16, 100, 12, filter),
make_tuple(32, 8, 100, 12, filter),
};
return ::testing::ValuesIn(defaultParams);
return ::testing::ValuesIn(params);
}
AV1HighbdWarpFilterTest::~AV1HighbdWarpFilterTest() {}
......
......@@ -69,9 +69,11 @@ typedef void (*highbd_warp_affine_func)(const int32_t *mat, const uint16_t *ref,
int16_t alpha, int16_t beta,
int16_t gamma, int16_t delta);
typedef std::tr1::tuple<int, int, int, int> HighbdWarpTestParam;
typedef std::tr1::tuple<int, int, int, int, highbd_warp_affine_func>
HighbdWarpTestParam;
::testing::internal::ParamGenerator<HighbdWarpTestParam> GetDefaultParams();
::testing::internal::ParamGenerator<HighbdWarpTestParam> BuildParams(
highbd_warp_affine_func filter);
class AV1HighbdWarpFilterTest
: public ::testing::TestWithParam<HighbdWarpTestParam> {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment