Commit f8daa92d authored by Peng Bin's avatar Peng Bin Committed by Bin Peng

Remove aom_comp_mask_upsampled_pred from rtcd

Since aom_comp_mask_upsampled_pred just call aom_upsampled_pred
and aom_comp_mask_pred, no need to separate c version from simd
version any more.

Change-Id: I1ff8bcae87d501c68a80708fd2dc6b74c6952f88
parent a64c05b5
......@@ -1784,8 +1784,6 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") {
add_proto qw/void aom_comp_mask_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask";
specialize qw/aom_comp_mask_pred ssse3/;
add_proto qw/void aom_comp_mask_upsampled_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, int subsample_x_q3, int subsample_y_q3, const uint8_t *ref, int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask";
specialize qw/aom_comp_mask_upsampled_pred ssse3/;
add_proto qw/void aom_highbd_comp_mask_pred/, "uint16_t *comp_pred, const uint8_t *pred8, int width, int height, const uint8_t *ref8, int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask";
add_proto qw/void aom_highbd_comp_mask_upsampled_pred/, "uint16_t *comp_pred, const uint8_t *pred8, int width, int height, int subsample_x_q3, int subsample_y_q3, const uint8_t *ref8, int ref_stride, const uint8_t *mask, int mask_stride, int invert_mask, int bd";
......
......@@ -1044,25 +1044,19 @@ void aom_comp_mask_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width,
}
}
void aom_comp_mask_upsampled_pred_c(uint8_t *comp_pred, const uint8_t *pred,
int width, int height, int subpel_x_q3,
int subpel_y_q3, const uint8_t *ref,
int ref_stride, const uint8_t *mask,
int mask_stride, int invert_mask) {
int i, j;
const uint8_t *src0 = invert_mask ? pred : comp_pred;
const uint8_t *src1 = invert_mask ? comp_pred : pred;
aom_upsampled_pred(comp_pred, width, height, subpel_x_q3, subpel_y_q3, ref,
ref_stride);
for (i = 0; i < height; i++) {
for (j = 0; j < width; j++) {
comp_pred[j] = AOM_BLEND_A64(mask[j], src0[j], src1[j]);
}
comp_pred += width;
src0 += width;
src1 += width;
mask += mask_stride;
void aom_comp_mask_upsampled_pred(uint8_t *comp_pred, const uint8_t *pred,
int width, int height, int subpel_x_q3,
int subpel_y_q3, const uint8_t *ref,
int ref_stride, const uint8_t *mask,
int mask_stride, int invert_mask) {
if (subpel_x_q3 | subpel_y_q3) {
aom_upsampled_pred(comp_pred, width, height, subpel_x_q3, subpel_y_q3, ref,
ref_stride);
ref = comp_pred;
ref_stride = width;
}
aom_comp_mask_pred(comp_pred, pred, width, height, ref, ref_stride, mask,
mask_stride, invert_mask);
}
#define MASK_SUBPIX_VAR(W, H) \
......
......@@ -76,6 +76,12 @@ typedef unsigned int (*aom_masked_subpixvariance_fn_t)(
const uint8_t *src, int src_stride, int xoffset, int yoffset,
const uint8_t *ref, int ref_stride, const uint8_t *second_pred,
const uint8_t *msk, int msk_stride, int invert_mask, unsigned int *sse);
void aom_comp_mask_upsampled_pred(uint8_t *comp_pred, const uint8_t *pred,
int width, int height, int subsample_x_q3,
int subsample_y_q3, const uint8_t *ref,
int ref_stride, const uint8_t *mask,
int mask_stride, int invert_mask);
#endif // CONFIG_AV1
#if CONFIG_AV1
......
......@@ -1095,18 +1095,3 @@ INLINE void aom_comp_mask_pred_ssse3(uint8_t *comp_pred, const uint8_t *pred,
i += 2;
} while (i < height);
}
void aom_comp_mask_upsampled_pred_ssse3(uint8_t *comp_pred, const uint8_t *pred,
int width, int height, int subpel_x_q3,
int subpel_y_q3, const uint8_t *ref,
int ref_stride, const uint8_t *mask,
int mask_stride, int invert_mask) {
if (subpel_x_q3 || subpel_y_q3) {
aom_upsampled_pred(comp_pred, width, height, subpel_x_q3, subpel_y_q3, ref,
ref_stride);
ref = comp_pred;
ref_stride = width;
}
aom_comp_mask_pred_ssse3(comp_pred, pred, width, height, ref, ref_stride,
mask, mask_stride, invert_mask);
}
......@@ -11,11 +11,13 @@
#include <cstdlib>
#include <new>
#include <vector>
#include "./aom_config.h"
#include "./aom_dsp_rtcd.h"
#include "aom/aom_codec.h"
#include "aom/aom_integer.h"
#include "aom_dsp/variance.h"
#include "aom_mem/aom_mem.h"
#include "aom_ports/aom_timer.h"
#include "aom_ports/mem.h"
......@@ -26,18 +28,19 @@
#include "test/util.h"
#include "third_party/googletest/src/googletest/include/gtest/gtest.h"
using std::vector;
namespace AV1CompMaskVariance {
typedef void (*comp_mask_pred_func)(uint8_t *comp_pred, const uint8_t *pred,
int width, int height, const uint8_t *ref,
int ref_stride, const uint8_t *mask,
int mask_stride, int invert_mask);
typedef void (*comp_mask_up_pred_func)(uint8_t *comp_pred, const uint8_t *pred,
int width, int height, int subpel_x_q3,
int subpel_y_q3, const uint8_t *ref,
int ref_stride, const uint8_t *mask,
int mask_stride, int invert_mask);
typedef std::tr1::tuple<intptr_t, BLOCK_SIZE> CompMaskPredParam;
const BLOCK_SIZE valid_bsize[] = {
BLOCK_8X8, BLOCK_8X16, BLOCK_8X32, BLOCK_16X8, BLOCK_16X16,
BLOCK_16X32, BLOCK_32X8, BLOCK_32X16, BLOCK_32X32,
};
typedef std::tr1::tuple<comp_mask_pred_func, BLOCK_SIZE> CompMaskPredParam;
class AV1CompMaskVarianceTest
: public ::testing::TestWithParam<CompMaskPredParam> {
......@@ -48,8 +51,20 @@ class AV1CompMaskVarianceTest
void TearDown();
protected:
void RunCheckOutput(comp_mask_pred_func test_impl, BLOCK_SIZE bsize);
void RunCheckOutput(comp_mask_pred_func test_impl, BLOCK_SIZE bsize, int inv);
void RunSpeedTest(comp_mask_pred_func test_impl, BLOCK_SIZE bsize);
bool CheckResult(int w, int h) {
for (int i = 0; i < h; ++i) {
for (int j = 0; j < w; ++j) {
int idx = i * w + j;
if (comp_pred1_[idx] != comp_pred2_[idx]) {
printf("%dx%d mismatch @%d(%d,%d) ", w, h, idx, i, j);
return false;
}
}
}
return true;
}
libaom_test::ACMRandom rnd_;
uint8_t *comp_pred1_;
......@@ -86,27 +101,20 @@ void AV1CompMaskVarianceTest::TearDown() {
}
void AV1CompMaskVarianceTest::RunCheckOutput(comp_mask_pred_func test_impl,
BLOCK_SIZE bsize) {
BLOCK_SIZE bsize, int inv) {
const int w = block_size_wide[bsize];
const int h = block_size_high[bsize];
int wedge_types = (1 << get_wedge_bits_lookup(bsize));
for (int wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
for (int inv = 0; inv < 2; ++inv) {
aom_comp_mask_pred_c(comp_pred1_, pred_, w, h, ref_, MAX_SB_SIZE, mask, w,
inv);
test_impl(comp_pred2_, pred_, w, h, ref_, MAX_SB_SIZE, mask, w, inv);
// check result
for (int i = 0; i < h; ++i) {
for (int j = 0; j < w; ++j) {
int idx = i * w + j;
ASSERT_EQ(comp_pred1_[idx], comp_pred2_[idx])
<< w << "x" << h << " Pixel mismatch at index " << idx << " = ("
<< i << ", " << j << "), wedge " << wedge_index << " inv " << inv;
}
}
}
aom_comp_mask_pred_c(comp_pred1_, pred_, w, h, ref_, MAX_SB_SIZE, mask, w,
inv);
test_impl(comp_pred2_, pred_, w, h, ref_, MAX_SB_SIZE, mask, w, inv);
ASSERT_EQ(CheckResult(w, h), true)
<< " wedge " << wedge_index << " inv " << inv;
}
}
......@@ -133,52 +141,44 @@ void AV1CompMaskVarianceTest::RunSpeedTest(comp_mask_pred_func test_impl,
double time = static_cast<double>(aom_usec_timer_elapsed(&timer));
elapsed_time[i] = 1000.0 * time / num_loops;
}
printf("comp_mask_pred %3dx%-3d: %7.2f/%7.2f ns", w, h, elapsed_time[0],
printf("compMask %3dx%-3d: %7.2f/%7.2fns", w, h, elapsed_time[0],
elapsed_time[1]);
printf(" (%3.2f)\n", elapsed_time[0] / elapsed_time[1]);
printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]);
}
TEST_P(AV1CompMaskVarianceTest, CheckOutput) {
RunCheckOutput((comp_mask_pred_func)GET_PARAM(0), GET_PARAM(1));
// inv = 0, 1
RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 0);
RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 1);
}
TEST_P(AV1CompMaskVarianceTest, DISABLED_Speed) {
RunSpeedTest((comp_mask_pred_func)GET_PARAM(0), GET_PARAM(1));
RunSpeedTest(GET_PARAM(0), GET_PARAM(1));
}
#if HAVE_SSSE3
const intptr_t comp_mask_pred_ssse3_f = (intptr_t)(&aom_comp_mask_pred_ssse3);
const CompMaskPredParam kArrayCompMaskPred_ssse3[] = {
testing::make_tuple(comp_mask_pred_ssse3_f, BLOCK_8X8),
testing::make_tuple(comp_mask_pred_ssse3_f, BLOCK_8X16),
testing::make_tuple(comp_mask_pred_ssse3_f, BLOCK_16X8),
testing::make_tuple(comp_mask_pred_ssse3_f, BLOCK_16X16),
testing::make_tuple(comp_mask_pred_ssse3_f, BLOCK_16X32),
testing::make_tuple(comp_mask_pred_ssse3_f, BLOCK_32X16),
testing::make_tuple(comp_mask_pred_ssse3_f, BLOCK_32X32),
testing::make_tuple(comp_mask_pred_ssse3_f, BLOCK_8X32),
testing::make_tuple(comp_mask_pred_ssse3_f, BLOCK_32X8),
};
INSTANTIATE_TEST_CASE_P(SSSE3, AV1CompMaskVarianceTest,
::testing::ValuesIn(kArrayCompMaskPred_ssse3));
INSTANTIATE_TEST_CASE_P(
SSSE3, AV1CompMaskVarianceTest,
::testing::Combine(::testing::Values(&aom_comp_mask_pred_ssse3),
::testing::ValuesIn(valid_bsize)));
#endif
#ifndef aom_comp_mask_pred
// can't run this test if aom_comp_mask_pred is defined to aom_comp_mask_pred_c
class AV1CompMaskUpVarianceTest : public AV1CompMaskVarianceTest {
public:
~AV1CompMaskUpVarianceTest();
protected:
void RunCheckOutput(comp_mask_up_pred_func test_impl, BLOCK_SIZE bsize);
void RunSpeedTest(comp_mask_up_pred_func test_impl, BLOCK_SIZE bsize);
void RunSpeedTestSub(comp_mask_up_pred_func test_impl, BLOCK_SIZE bsize,
int havSub);
void RunCheckOutput(comp_mask_pred_func test_impl, BLOCK_SIZE bsize, int inv);
void RunSpeedTest(comp_mask_pred_func test_impl, BLOCK_SIZE bsize,
int havSub);
};
AV1CompMaskUpVarianceTest::~AV1CompMaskUpVarianceTest() { ; }
void AV1CompMaskUpVarianceTest::RunCheckOutput(comp_mask_up_pred_func test_impl,
BLOCK_SIZE bsize) {
void AV1CompMaskUpVarianceTest::RunCheckOutput(comp_mask_pred_func test_impl,
BLOCK_SIZE bsize, int inv) {
const int w = block_size_wide[bsize];
const int h = block_size_high[bsize];
int wedge_types = (1 << get_wedge_bits_lookup(bsize));
......@@ -190,28 +190,22 @@ void AV1CompMaskUpVarianceTest::RunCheckOutput(comp_mask_up_pred_func test_impl,
for (int wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
for (int inv = 0; inv < 2; ++inv) {
aom_comp_mask_upsampled_pred_c(comp_pred1_, pred_, w, h, subx, suby,
ref_, MAX_SB_SIZE, mask, w, inv);
test_impl(comp_pred2_, pred_, w, h, subx, suby, ref_, MAX_SB_SIZE, mask,
w, inv);
// check result
for (int i = 0; i < h; ++i) {
for (int j = 0; j < w; ++j) {
int idx = i * w + j;
ASSERT_EQ(comp_pred1_[idx], comp_pred2_[idx])
<< w << "x" << h << " Pixel mismatch at index " << idx << " = ("
<< i << ", " << j << "), wedge " << wedge_index << " inv "
<< inv << "sub (" << subx << "," << suby << ")";
}
}
}
aom_comp_mask_pred = aom_comp_mask_pred_c; // ref
aom_comp_mask_upsampled_pred(comp_pred1_, pred_, w, h, subx, suby, ref_,
MAX_SB_SIZE, mask, w, inv);
aom_comp_mask_pred = test_impl; // test
aom_comp_mask_upsampled_pred(comp_pred2_, pred_, w, h, subx, suby, ref_,
MAX_SB_SIZE, mask, w, inv);
ASSERT_EQ(CheckResult(w, h), true)
<< " wedge " << wedge_index << " inv " << inv << "sub (" << subx
<< "," << suby << ")";
}
}
}
void AV1CompMaskUpVarianceTest::RunSpeedTestSub(
comp_mask_up_pred_func test_impl, BLOCK_SIZE bsize, int havSub) {
void AV1CompMaskUpVarianceTest::RunSpeedTest(comp_mask_pred_func test_impl,
BLOCK_SIZE bsize, int havSub) {
const int w = block_size_wide[bsize];
const int h = block_size_high[bsize];
const int subx = havSub ? 3 : 0;
......@@ -222,57 +216,40 @@ void AV1CompMaskUpVarianceTest::RunSpeedTestSub(
const uint8_t *mask = av1_get_contiguous_soft_mask(wedge_index, 1, bsize);
const int num_loops = 1000000000 / (w + h);
comp_mask_up_pred_func funcs[2] = { &aom_comp_mask_upsampled_pred_c,
test_impl };
comp_mask_pred_func funcs[2] = { &aom_comp_mask_pred_c, test_impl };
double elapsed_time[2] = { 0 };
for (int i = 0; i < 2; ++i) {
aom_usec_timer timer;
aom_usec_timer_start(&timer);
comp_mask_up_pred_func func = funcs[i];
aom_comp_mask_pred = funcs[i];
for (int j = 0; j < num_loops; ++j) {
func(comp_pred1_, pred_, w, h, subx, suby, ref_, MAX_SB_SIZE, mask, w, 0);
aom_comp_mask_upsampled_pred(comp_pred1_, pred_, w, h, subx, suby, ref_,
MAX_SB_SIZE, mask, w, 0);
}
aom_usec_timer_mark(&timer);
double time = static_cast<double>(aom_usec_timer_elapsed(&timer));
elapsed_time[i] = 1000.0 * time / num_loops;
}
printf("CompMask[%d] %3dx%-3d:%7.2f/%7.2fns", havSub, w, h, elapsed_time[0],
printf("CompMaskUp[%d] %3dx%-3d:%7.2f/%7.2fns", havSub, w, h, elapsed_time[0],
elapsed_time[1]);
printf("(%3.2f)\n", elapsed_time[0] / elapsed_time[1]);
}
void AV1CompMaskUpVarianceTest::RunSpeedTest(comp_mask_up_pred_func test_impl,
BLOCK_SIZE bsize) {
RunSpeedTestSub(test_impl, bsize, 0); // could skip upsample
RunSpeedTestSub(test_impl, bsize, 1);
}
TEST_P(AV1CompMaskUpVarianceTest, CheckOutput) {
RunCheckOutput((comp_mask_up_pred_func)GET_PARAM(0), GET_PARAM(1));
// inv mask = 0, 1
RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 0);
RunCheckOutput(GET_PARAM(0), GET_PARAM(1), 1);
}
TEST_P(AV1CompMaskUpVarianceTest, DISABLED_Speed) {
RunSpeedTest((comp_mask_up_pred_func)GET_PARAM(0), GET_PARAM(1));
RunSpeedTest(GET_PARAM(0), GET_PARAM(1), 1);
}
#if HAVE_SSSE3
const intptr_t comp_mask_up_pred_ssse3_f =
(intptr_t)(&aom_comp_mask_upsampled_pred_ssse3);
const CompMaskPredParam kArrayCompMaskUpPred_ssse3[] = {
testing::make_tuple(comp_mask_up_pred_ssse3_f, BLOCK_8X8),
testing::make_tuple(comp_mask_up_pred_ssse3_f, BLOCK_8X16),
testing::make_tuple(comp_mask_up_pred_ssse3_f, BLOCK_16X8),
testing::make_tuple(comp_mask_up_pred_ssse3_f, BLOCK_16X16),
testing::make_tuple(comp_mask_up_pred_ssse3_f, BLOCK_16X32),
testing::make_tuple(comp_mask_up_pred_ssse3_f, BLOCK_32X16),
testing::make_tuple(comp_mask_up_pred_ssse3_f, BLOCK_32X32),
testing::make_tuple(comp_mask_up_pred_ssse3_f, BLOCK_8X32),
testing::make_tuple(comp_mask_up_pred_ssse3_f, BLOCK_32X8),
};
INSTANTIATE_TEST_CASE_P(SSSE3, AV1CompMaskUpVarianceTest,
::testing::ValuesIn(kArrayCompMaskUpPred_ssse3));
INSTANTIATE_TEST_CASE_P(
SSSE3, AV1CompMaskUpVarianceTest,
::testing::Combine(::testing::Values(&aom_comp_mask_pred_ssse3),
::testing::ValuesIn(valid_bsize)));
#endif
#endif
} // namespace AV1CompMaskVariance
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment