Commit 5f0485b4 authored by Linfeng Zhang's avatar Linfeng Zhang
Browse files

Add av1_convolve_2d_copy_sr_sse2()

Change-Id: I7776ccaecb6933af47253a15fa8ed8a53346fac1
parent e07a675f
......@@ -526,7 +526,7 @@ specialize qw/av1_convolve_rounding avx2/;
add_proto qw/void av1_convolve_2d_copy/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
specialize qw/av1_convolve_2d_copy sse2/;
add_proto qw/void av1_convolve_2d_copy_sr/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
specialize qw/av1_convolve_2d_copy_sr c/;
specialize qw/av1_convolve_2d_copy_sr sse2/;
add_proto qw/void av1_convolve_x/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
specialize qw/av1_convolve_x sse2 avx2/;
add_proto qw/void av1_convolve_y/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
......
......@@ -320,6 +320,148 @@ void av1_convolve_2d_copy_sse2(const uint8_t *src, int src_stride,
}
}
#if CONFIG_EXT_PARTITION
static INLINE void copy_128(const uint8_t *src, uint8_t *dst) {
__m128i s[8];
s[0] = _mm_loadu_si128((__m128i *)(src + 0 * 16));
s[1] = _mm_loadu_si128((__m128i *)(src + 1 * 16));
s[2] = _mm_loadu_si128((__m128i *)(src + 2 * 16));
s[3] = _mm_loadu_si128((__m128i *)(src + 3 * 16));
s[4] = _mm_loadu_si128((__m128i *)(src + 4 * 16));
s[5] = _mm_loadu_si128((__m128i *)(src + 5 * 16));
s[6] = _mm_loadu_si128((__m128i *)(src + 6 * 16));
s[7] = _mm_loadu_si128((__m128i *)(src + 7 * 16));
_mm_store_si128((__m128i *)(dst + 0 * 16), s[0]);
_mm_store_si128((__m128i *)(dst + 1 * 16), s[1]);
_mm_store_si128((__m128i *)(dst + 2 * 16), s[2]);
_mm_store_si128((__m128i *)(dst + 3 * 16), s[3]);
_mm_store_si128((__m128i *)(dst + 4 * 16), s[4]);
_mm_store_si128((__m128i *)(dst + 5 * 16), s[5]);
_mm_store_si128((__m128i *)(dst + 6 * 16), s[6]);
_mm_store_si128((__m128i *)(dst + 7 * 16), s[7]);
}
#endif
void av1_convolve_2d_copy_sr_sse2(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride, int w, int h,
InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params) {
(void)filter_params_x;
(void)filter_params_y;
(void)subpel_x_q4;
(void)subpel_y_q4;
(void)conv_params;
if (w >= 16) {
assert(!((intptr_t)dst % 16));
assert(!(dst_stride % 16));
}
if (w == 2) {
do {
*(uint16_t *)dst = *(uint16_t *)src;
src += src_stride;
dst += dst_stride;
*(uint16_t *)dst = *(uint16_t *)src;
src += src_stride;
dst += dst_stride;
h -= 2;
} while (h);
} else if (w == 4) {
do {
*(uint32_t *)dst = *(uint32_t *)src;
src += src_stride;
dst += dst_stride;
*(uint32_t *)dst = *(uint32_t *)src;
src += src_stride;
dst += dst_stride;
h -= 2;
} while (h);
} else if (w == 8) {
do {
__m128i s[2];
s[0] = _mm_loadl_epi64((__m128i *)src);
src += src_stride;
s[1] = _mm_loadl_epi64((__m128i *)src);
src += src_stride;
_mm_storel_epi64((__m128i *)dst, s[0]);
dst += dst_stride;
_mm_storel_epi64((__m128i *)dst, s[1]);
dst += dst_stride;
h -= 2;
} while (h);
} else if (w == 16) {
do {
__m128i s[2];
s[0] = _mm_loadu_si128((__m128i *)src);
src += src_stride;
s[1] = _mm_loadu_si128((__m128i *)src);
src += src_stride;
_mm_store_si128((__m128i *)dst, s[0]);
dst += dst_stride;
_mm_store_si128((__m128i *)dst, s[1]);
dst += dst_stride;
h -= 2;
} while (h);
} else if (w == 32) {
do {
__m128i s[4];
s[0] = _mm_loadu_si128((__m128i *)(src + 0 * 16));
s[1] = _mm_loadu_si128((__m128i *)(src + 1 * 16));
src += src_stride;
s[2] = _mm_loadu_si128((__m128i *)(src + 0 * 16));
s[3] = _mm_loadu_si128((__m128i *)(src + 1 * 16));
src += src_stride;
_mm_store_si128((__m128i *)(dst + 0 * 16), s[0]);
_mm_store_si128((__m128i *)(dst + 1 * 16), s[1]);
dst += dst_stride;
_mm_store_si128((__m128i *)(dst + 0 * 16), s[2]);
_mm_store_si128((__m128i *)(dst + 1 * 16), s[3]);
dst += dst_stride;
h -= 2;
} while (h);
} else if (w == 64) {
do {
__m128i s[8];
s[0] = _mm_loadu_si128((__m128i *)(src + 0 * 16));
s[1] = _mm_loadu_si128((__m128i *)(src + 1 * 16));
s[2] = _mm_loadu_si128((__m128i *)(src + 2 * 16));
s[3] = _mm_loadu_si128((__m128i *)(src + 3 * 16));
src += src_stride;
s[4] = _mm_loadu_si128((__m128i *)(src + 0 * 16));
s[5] = _mm_loadu_si128((__m128i *)(src + 1 * 16));
s[6] = _mm_loadu_si128((__m128i *)(src + 2 * 16));
s[7] = _mm_loadu_si128((__m128i *)(src + 3 * 16));
src += src_stride;
_mm_store_si128((__m128i *)(dst + 0 * 16), s[0]);
_mm_store_si128((__m128i *)(dst + 1 * 16), s[1]);
_mm_store_si128((__m128i *)(dst + 2 * 16), s[2]);
_mm_store_si128((__m128i *)(dst + 3 * 16), s[3]);
dst += dst_stride;
_mm_store_si128((__m128i *)(dst + 0 * 16), s[4]);
_mm_store_si128((__m128i *)(dst + 1 * 16), s[5]);
_mm_store_si128((__m128i *)(dst + 2 * 16), s[6]);
_mm_store_si128((__m128i *)(dst + 3 * 16), s[7]);
dst += dst_stride;
h -= 2;
} while (h);
#if CONFIG_EXT_PARTITION
} else {
do {
copy_128(src, dst);
src += src_stride;
dst += dst_stride;
copy_128(src, dst);
src += src_stride;
dst += dst_stride;
h -= 2;
} while (h);
#endif // CONFIG_EXT_PARTITION
}
}
#if CONFIG_JNT_COMP
void av1_jnt_convolve_2d_copy_sse2(const uint8_t *src, int src_stride,
uint8_t *dst0, int dst_stride0, int w, int h,
......
......@@ -16,6 +16,7 @@ using std::tr1::tuple;
using std::tr1::make_tuple;
using libaom_test::ACMRandom;
using libaom_test::AV1Convolve2D::AV1Convolve2DTest;
using libaom_test::AV1Convolve2D::AV1Convolve2DSrTest;
#if CONFIG_JNT_COMP
using libaom_test::AV1Convolve2D::AV1JntConvolve2DTest;
#endif
......@@ -72,6 +73,14 @@ INSTANTIATE_TEST_CASE_P(
libaom_test::AV1Convolve2D::BuildParams(av1_convolve_2d_avx2, 1, 1, 1));
#endif
TEST_P(AV1Convolve2DSrTest, DISABLED_Speed) { RunSpeedTest(GET_PARAM(0)); }
TEST_P(AV1Convolve2DSrTest, CheckOutput) { RunCheckOutput(GET_PARAM(0)); }
INSTANTIATE_TEST_CASE_P(SSE2_COPY, AV1Convolve2DSrTest,
libaom_test::AV1Convolve2D::BuildParams(
av1_convolve_2d_copy_sr_sse2, 0, 0, 1));
#if CONFIG_JNT_COMP && HAVE_SSE4_1
TEST_P(AV1JntConvolve2DTest, CheckOutput) { RunCheckOutput(GET_PARAM(0)); }
......
......@@ -39,7 +39,6 @@ void AV1Convolve2DTest::RunCheckOutput(convolve_2d_func test_impl) {
const int has_subx = GET_PARAM(1);
const int has_suby = GET_PARAM(2);
const int is_compound = GET_PARAM(3);
int i, j;
int hfilter, vfilter, subx, suby;
uint8_t input[kMaxSize * kMaxSize];
DECLARE_ALIGNED(32, CONV_BUF_TYPE, output[MAX_SB_SQUARE]);
......@@ -47,9 +46,10 @@ void AV1Convolve2DTest::RunCheckOutput(convolve_2d_func test_impl) {
(void)is_compound;
for (i = 0; i < h; ++i)
for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand8();
for (i = 0; i < MAX_SB_SQUARE; ++i) output[i] = output2[i] = rnd_.Rand31();
for (int i = 0; i < h; ++i)
for (int j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand8();
for (int i = 0; i < MAX_SB_SQUARE; ++i)
output[i] = output2[i] = rnd_.Rand31();
for (int block_idx = BLOCK_4X4; block_idx < BLOCK_SIZES_ALL; ++block_idx) {
const int out_w = block_size_wide[block_idx];
......@@ -81,8 +81,8 @@ void AV1Convolve2DTest::RunCheckOutput(convolve_2d_func test_impl) {
out_h, &filter_params_x, &filter_params_y, subx, suby,
&conv_params2);
for (i = 0; i < out_h; ++i) {
for (j = 0; j < out_w; ++j) {
for (int i = 0; i < out_h; ++i) {
for (int j = 0; j < out_w; ++j) {
int idx = i * MAX_SB_SIZE + j;
ASSERT_EQ(output[idx], output2[idx])
<< out_w << "x" << out_h << " Pixel mismatch at index "
......@@ -101,7 +101,6 @@ void AV1Convolve2DTest::RunCheckOutput(convolve_2d_func test_impl) {
void AV1Convolve2DTest::RunSpeedTest(convolve_2d_func test_impl) {
const int w = kMaxSize, h = kMaxSize;
int i, j;
const int has_subx = GET_PARAM(1);
const int has_suby = GET_PARAM(2);
const int is_compound = GET_PARAM(3);
......@@ -110,8 +109,8 @@ void AV1Convolve2DTest::RunSpeedTest(convolve_2d_func test_impl) {
uint8_t input[kMaxSize * kMaxSize];
DECLARE_ALIGNED(32, CONV_BUF_TYPE, output[MAX_SB_SQUARE]);
for (i = 0; i < h; ++i)
for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand8();
for (int i = 0; i < h; ++i)
for (int j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand8();
int hfilter = EIGHTTAP_REGULAR, vfilter = EIGHTTAP_REGULAR;
int subx = 0, suby = 0;
......@@ -131,7 +130,7 @@ void AV1Convolve2DTest::RunSpeedTest(convolve_2d_func test_impl) {
aom_usec_timer timer;
aom_usec_timer_start(&timer);
for (i = 0; i < num_loops; ++i)
for (int i = 0; i < num_loops; ++i)
test_impl(input, w, NULL, 0, out_w, out_h, &filter_params_x,
&filter_params_y, subx, suby, &conv_params2);
......@@ -142,6 +141,138 @@ void AV1Convolve2DTest::RunSpeedTest(convolve_2d_func test_impl) {
}
}
AV1Convolve2DSrTest::~AV1Convolve2DSrTest() {}
void AV1Convolve2DSrTest::SetUp() {
rnd_.Reset(ACMRandom::DeterministicSeed());
}
void AV1Convolve2DSrTest::TearDown() { libaom_test::ClearSystemState(); }
void AV1Convolve2DSrTest::RunCheckOutput(convolve_2d_func test_impl) {
const int w = kMaxSize, h = kMaxSize;
const int has_subx = GET_PARAM(1);
const int has_suby = GET_PARAM(2);
const int is_compound = GET_PARAM(3);
int hfilter, vfilter, subx, suby;
uint8_t input[kMaxSize * kMaxSize];
DECLARE_ALIGNED(32, uint8_t, output[MAX_SB_SQUARE]);
DECLARE_ALIGNED(32, uint8_t, output2[MAX_SB_SQUARE]);
(void)is_compound;
for (int i = 0; i < h; ++i)
for (int j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand8();
for (int i = 0; i < MAX_SB_SQUARE; ++i)
output[i] = output2[i] = rnd_.Rand31();
for (int block_idx = BLOCK_4X4; block_idx < BLOCK_SIZES_ALL; ++block_idx) {
// Make sure that sizes 2xN and Nx2 are also tested for chroma.
const int num_sizes =
(block_size_wide[block_idx] == 4 || block_size_high[block_idx] == 4)
? 2
: 1;
for (int shift = 0; shift < num_sizes; ++shift) { // luma and chroma
const int out_w = block_size_wide[block_idx] >> shift;
const int out_h = block_size_high[block_idx] >> shift;
for (hfilter = EIGHTTAP_REGULAR; hfilter < INTERP_FILTERS_ALL;
++hfilter) {
for (vfilter = EIGHTTAP_REGULAR; vfilter < INTERP_FILTERS_ALL;
++vfilter) {
InterpFilterParams filter_params_x =
av1_get_interp_filter_params((InterpFilter)hfilter);
InterpFilterParams filter_params_y =
av1_get_interp_filter_params((InterpFilter)vfilter);
for (int do_average = 0; do_average <= 1; ++do_average) {
ConvolveParams conv_params1 =
get_conv_params_no_round(0, do_average, 0, NULL, 0, 1);
ConvolveParams conv_params2 =
get_conv_params_no_round(0, do_average, 0, NULL, 0, 1);
const int subx_range = has_subx ? 16 : 1;
const int suby_range = has_suby ? 16 : 1;
for (subx = 0; subx < subx_range; ++subx) {
for (suby = 0; suby < suby_range; ++suby) {
// Choose random locations within the source block
const int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
const int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
av1_convolve_2d_sr_c(input + offset_r * w + offset_c, w, output,
MAX_SB_SIZE, out_w, out_h,
&filter_params_x, &filter_params_y, subx,
suby, &conv_params1);
test_impl(input + offset_r * w + offset_c, w, output2,
MAX_SB_SIZE, out_w, out_h, &filter_params_x,
&filter_params_y, subx, suby, &conv_params2);
if (memcmp(output, output2, sizeof(output))) {
for (int i = 0; i < MAX_SB_SIZE; ++i) {
for (int j = 0; j < MAX_SB_SIZE; ++j) {
int idx = i * MAX_SB_SIZE + j;
ASSERT_EQ(output[idx], output2[idx])
<< out_w << "x" << out_h
<< " Pixel mismatch at index " << idx << " = (" << i
<< ", " << j << "), sub pixel offset = (" << suby
<< ", " << subx << ")";
}
}
}
}
}
}
}
}
}
}
}
void AV1Convolve2DSrTest::RunSpeedTest(convolve_2d_func test_impl) {
const int w = kMaxSize, h = kMaxSize;
const int has_subx = GET_PARAM(1);
const int has_suby = GET_PARAM(2);
const int is_compound = GET_PARAM(3);
(void)is_compound;
uint8_t input[kMaxSize * kMaxSize];
DECLARE_ALIGNED(32, uint8_t, output[MAX_SB_SQUARE]);
for (int i = 0; i < h; ++i)
for (int j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand8();
int hfilter = EIGHTTAP_REGULAR, vfilter = EIGHTTAP_REGULAR;
int subx = 0, suby = 0;
InterpFilterParams filter_params_x =
av1_get_interp_filter_params((InterpFilter)hfilter);
InterpFilterParams filter_params_y =
av1_get_interp_filter_params((InterpFilter)vfilter);
const int do_average = 0;
ConvolveParams conv_params2 =
get_conv_params_no_round(0, do_average, 0, NULL, 0, 1);
for (int block_idx = BLOCK_4X4; block_idx < BLOCK_SIZES_ALL; ++block_idx) {
// Make sure that sizes 2xN and Nx2 are also tested for chroma.
const int num_sizes =
(block_size_wide[block_idx] == 4 || block_size_high[block_idx] == 4)
? 2
: 1;
for (int shift = 0; shift < num_sizes; ++shift) { // luma and chroma
const int out_w = block_size_wide[block_idx] >> shift;
const int out_h = block_size_high[block_idx] >> shift;
const int num_loops = 1000000000 / (out_w + out_h);
aom_usec_timer timer;
aom_usec_timer_start(&timer);
for (int i = 0; i < num_loops; ++i)
test_impl(input, w, output, MAX_SB_SIZE, out_w, out_h, &filter_params_x,
&filter_params_y, subx, suby, &conv_params2);
aom_usec_timer_mark(&timer);
const int elapsed_time = static_cast<int>(aom_usec_timer_elapsed(&timer));
printf("%d,%d convolve %3dx%-3d: %7.2f ns\n", has_subx, has_suby, out_w,
out_h, 1000.0 * elapsed_time / num_loops);
}
}
}
#if CONFIG_JNT_COMP
AV1JntConvolve2DTest::~AV1JntConvolve2DTest() {}
void AV1JntConvolve2DTest::SetUp() {
......@@ -155,7 +286,6 @@ void AV1JntConvolve2DTest::RunCheckOutput(convolve_2d_func test_impl) {
const int has_subx = GET_PARAM(1);
const int has_suby = GET_PARAM(2);
const int is_compound = GET_PARAM(3);
int i, j, k, l;
int hfilter, vfilter, subx, suby;
uint8_t input[kMaxSize * kMaxSize];
DECLARE_ALIGNED(32, CONV_BUF_TYPE, output[MAX_SB_SQUARE]);
......@@ -163,9 +293,10 @@ void AV1JntConvolve2DTest::RunCheckOutput(convolve_2d_func test_impl) {
(void)is_compound;
for (i = 0; i < h; ++i)
for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand8();
for (i = 0; i < MAX_SB_SQUARE; ++i) output[i] = output2[i] = rnd_.Rand31();
for (int i = 0; i < h; ++i)
for (int j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand8();
for (int i = 0; i < MAX_SB_SQUARE; ++i)
output[i] = output2[i] = rnd_.Rand31();
for (int block_idx = BLOCK_4X4; block_idx < BLOCK_SIZES_ALL; ++block_idx) {
const int out_w = block_size_wide[block_idx];
......@@ -202,8 +333,8 @@ void AV1JntConvolve2DTest::RunCheckOutput(convolve_2d_func test_impl) {
out_h, &filter_params_x, &filter_params_y, subx, suby,
&conv_params2);
for (i = 0; i < out_h; ++i) {
for (j = 0; j < out_w; ++j) {
for (int i = 0; i < out_h; ++i) {
for (int j = 0; j < out_w; ++j) {
int idx = i * MAX_SB_SIZE + j;
ASSERT_EQ(output[idx], output2[idx])
<< "Mismatch at unit tests for av1_jnt_convolve_2d\n"
......@@ -217,8 +348,8 @@ void AV1JntConvolve2DTest::RunCheckOutput(convolve_2d_func test_impl) {
}
// Test different combination of fwd and bck offset weights
for (k = 0; k < 2; ++k) {
for (l = 0; l < 4; ++l) {
for (int k = 0; k < 2; ++k) {
for (int l = 0; l < 4; ++l) {
conv_params1.use_jnt_comp_avg = 1;
conv_params2.use_jnt_comp_avg = 1;
conv_params1.fwd_offset = quant_dist_lookup_table[k][l][0];
......@@ -239,8 +370,8 @@ void AV1JntConvolve2DTest::RunCheckOutput(convolve_2d_func test_impl) {
out_h, &filter_params_x, &filter_params_y, subx,
suby, &conv_params2);
for (i = 0; i < out_h; ++i) {
for (j = 0; j < out_w; ++j) {
for (int i = 0; i < out_h; ++i) {
for (int j = 0; j < out_w; ++j) {
int idx = i * MAX_SB_SIZE + j;
ASSERT_EQ(output[idx], output2[idx])
<< "Mismatch at unit tests for "
......@@ -282,15 +413,16 @@ void AV1HighbdConvolve2DTest::RunCheckOutput(
highbd_convolve_2d_func test_impl) {
const int w = kMaxSize, h = kMaxSize;
const int bd = GET_PARAM(0);
int i, j;
int hfilter, vfilter, subx, suby;
uint16_t input[kMaxSize * kMaxSize];
DECLARE_ALIGNED(32, CONV_BUF_TYPE, output[MAX_SB_SQUARE]);
DECLARE_ALIGNED(32, CONV_BUF_TYPE, output2[MAX_SB_SQUARE]);
for (i = 0; i < h; ++i)
for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
for (i = 0; i < MAX_SB_SQUARE; ++i) output[i] = output2[i] = rnd_.Rand31();
for (int i = 0; i < h; ++i)
for (int j = 0; j < w; ++j)
input[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
for (int i = 0; i < MAX_SB_SQUARE; ++i)
output[i] = output2[i] = rnd_.Rand31();
for (int block_idx = BLOCK_4X4; block_idx < BLOCK_SIZES_ALL; ++block_idx) {
const int out_w = block_size_wide[block_idx];
......@@ -321,8 +453,8 @@ void AV1HighbdConvolve2DTest::RunCheckOutput(
MAX_SB_SIZE, out_w, out_h, &filter_params_x,
&filter_params_y, subx, suby, &conv_params2, bd);
for (i = 0; i < out_h; ++i) {
for (j = 0; j < out_w; ++j) {
for (int i = 0; i < out_h; ++i) {
for (int j = 0; j < out_w; ++j) {
int idx = i * MAX_SB_SIZE + j;
ASSERT_EQ(output[idx], output2[idx])
<< out_w << "x" << out_h << " Pixel mismatch at index "
......@@ -351,15 +483,16 @@ void AV1HighbdJntConvolve2DTest::RunCheckOutput(
highbd_convolve_2d_func test_impl) {
const int w = kMaxSize, h = kMaxSize;
const int bd = GET_PARAM(0);
int i, j, k, l;
int hfilter, vfilter, subx, suby;
uint16_t input[kMaxSize * kMaxSize];
DECLARE_ALIGNED(32, CONV_BUF_TYPE, output[MAX_SB_SQUARE]);
DECLARE_ALIGNED(32, CONV_BUF_TYPE, output2[MAX_SB_SQUARE]);
for (i = 0; i < h; ++i)
for (j = 0; j < w; ++j) input[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
for (i = 0; i < MAX_SB_SQUARE; ++i) output[i] = output2[i] = rnd_.Rand31();
for (int i = 0; i < h; ++i)
for (int j = 0; j < w; ++j)
input[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1);
for (int i = 0; i < MAX_SB_SQUARE; ++i)
output[i] = output2[i] = rnd_.Rand31();
for (int block_idx = BLOCK_4X4; block_idx < BLOCK_SIZES_ALL; ++block_idx) {
const int out_w = block_size_wide[block_idx];
......@@ -394,8 +527,8 @@ void AV1HighbdJntConvolve2DTest::RunCheckOutput(
MAX_SB_SIZE, out_w, out_h, &filter_params_x,
&filter_params_y, subx, suby, &conv_params2, bd);
for (i = 0; i < out_h; ++i) {
for (j = 0; j < out_w; ++j) {
for (int i = 0; i < out_h; ++i) {
for (int j = 0; j < out_w; ++j) {
int idx = i * MAX_SB_SIZE + j;
ASSERT_EQ(output[idx], output2[idx])
<< out_w << "x" << out_h << " Pixel mismatch at index "
......@@ -408,8 +541,8 @@ void AV1HighbdJntConvolve2DTest::RunCheckOutput(
}
// Test different combination of fwd and bck offset weights
for (k = 0; k < 2; ++k) {
for (l = 0; l < 4; ++l) {
for (int k = 0; k < 2; ++k) {
for (int l = 0; l < 4; ++l) {
conv_params1.use_jnt_comp_avg = 1;
conv_params2.use_jnt_comp_avg = 1;
conv_params1.fwd_offset = quant_dist_lookup_table[k][l][0];
......@@ -430,8 +563,8 @@ void AV1HighbdJntConvolve2DTest::RunCheckOutput(
MAX_SB_SIZE, out_w, out_h, &filter_params_x,
&filter_params_y, subx, suby, &conv_params2, bd);
for (i = 0; i < out_h; ++i) {
for (j = 0; j < out_w; ++j) {
for (int i = 0; i < out_h; ++i) {
for (int j = 0; j < out_w; ++j) {
int idx = i * MAX_SB_SIZE + j;
ASSERT_EQ(output[idx], output2[idx])
<< out_w << "x" << out_h
......
......@@ -50,6 +50,20 @@ class AV1Convolve2DTest : public ::testing::TestWithParam<Convolve2DParam> {
libaom_test::ACMRandom rnd_;
};
class AV1Convolve2DSrTest : public ::testing::TestWithParam<Convolve2DParam> {
public:
virtual ~AV1Convolve2DSrTest();
virtual void SetUp();
virtual void TearDown();
protected:
void RunCheckOutput(convolve_2d_func test_impl);
void RunSpeedTest(convolve_2d_func test_impl);
libaom_test::ACMRandom rnd_;
};
#if CONFIG_JNT_COMP
class AV1JntConvolve2DTest : public ::testing::TestWithParam<Convolve2DParam> {
public:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment