Commit 7e40a55e authored by Peter de Rivaz's avatar Peter de Rivaz Committed by Deb Mukherjee

Added high bitdepth sse2 transform functions

Also removes some spurious changes in common/vp9_blockd.h which
was introduced by a rebase issue between nextgen and master branches.

Change-Id: If359f0e9a71bca9c2ba685a87a355873536bb282
(cherry picked from commit 005d80cd05269a299cd2f7ddbc3d4d8b791aebba)
(cherry picked from commit 08d2f548007fd8d6fd41da8ef7fdb488b6485af3)
(cherry picked from commit 4230c2306c194c058f56433a5275aa02a2e71d56)
parent 03e26715
This diff is collapsed.
......@@ -79,6 +79,10 @@ typedef std::tr1::tuple<FwdTxfmFunc, InvTxfmFunc, int, vpx_bit_depth_t>
Trans32x32Param;
#if CONFIG_VP9_HIGHBITDEPTH
void idct32x32_8(const tran_low_t *in, uint8_t *out, int stride) {
vp9_highbd_idct32x32_1024_add_c(in, out, stride, 8);
}
void idct32x32_10(const tran_low_t *in, uint8_t *out, int stride) {
vp9_highbd_idct32x32_1024_add_c(in, out, stride, 10);
}
......@@ -86,7 +90,7 @@ void idct32x32_10(const tran_low_t *in, uint8_t *out, int stride) {
void idct32x32_12(const tran_low_t *in, uint8_t *out, int stride) {
vp9_highbd_idct32x32_1024_add_c(in, out, stride, 12);
}
#endif
#endif // CONFIG_VP9_HIGHBITDEPTH
class Trans32x32Test : public ::testing::TestWithParam<Trans32x32Param> {
public:
......@@ -114,7 +118,7 @@ TEST_P(Trans32x32Test, AccuracyCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
uint32_t max_error = 0;
int64_t total_error = 0;
const int count_test_block = 1000;
const int count_test_block = 10000;
DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_temp_block, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
......@@ -127,7 +131,7 @@ TEST_P(Trans32x32Test, AccuracyCheck) {
for (int i = 0; i < count_test_block; ++i) {
// Initialize a test block with input range [-mask_, mask_].
for (int j = 0; j < kNumCoeffs; ++j) {
if (bit_depth_ == 8) {
if (bit_depth_ == VPX_BITS_8) {
src[j] = rnd.Rand8();
dst[j] = rnd.Rand8();
test_input_block[j] = src[j] - dst[j];
......@@ -282,7 +286,7 @@ TEST_P(Trans32x32Test, InverseAccuracy) {
reference_32x32_dct_2d(in, out_r);
for (int j = 0; j < kNumCoeffs; ++j)
coeff[j] = round(out_r[j]);
coeff[j] = static_cast<tran_low_t>(round(out_r[j]));
if (bit_depth_ == VPX_BITS_8) {
ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, dst, 32));
#if CONFIG_VP9_HIGHBITDEPTH
......@@ -331,7 +335,7 @@ INSTANTIATE_TEST_CASE_P(
&vp9_idct32x32_1024_add_c, 0, VPX_BITS_8),
make_tuple(&vp9_fdct32x32_rd_c,
&vp9_idct32x32_1024_add_c, 1, VPX_BITS_8)));
#endif
#endif // CONFIG_VP9_HIGHBITDEPTH
#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P(
......@@ -341,7 +345,7 @@ INSTANTIATE_TEST_CASE_P(
&vp9_idct32x32_1024_add_neon, 0, VPX_BITS_8),
make_tuple(&vp9_fdct32x32_rd_c,
&vp9_idct32x32_1024_add_neon, 1, VPX_BITS_8)));
#endif
#endif // HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P(
......@@ -351,7 +355,23 @@ INSTANTIATE_TEST_CASE_P(
&vp9_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
make_tuple(&vp9_fdct32x32_rd_sse2,
&vp9_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
#endif
#endif // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
#if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P(
SSE2, Trans32x32Test,
::testing::Values(
make_tuple(&vp9_highbd_fdct32x32_sse2, &idct32x32_10, 0, VPX_BITS_10),
make_tuple(&vp9_highbd_fdct32x32_rd_sse2, &idct32x32_10, 1,
VPX_BITS_10),
make_tuple(&vp9_highbd_fdct32x32_sse2, &idct32x32_12, 0, VPX_BITS_12),
make_tuple(&vp9_highbd_fdct32x32_rd_sse2, &idct32x32_12, 1,
VPX_BITS_12),
make_tuple(&vp9_fdct32x32_sse2, &vp9_idct32x32_1024_add_c, 0,
VPX_BITS_8),
make_tuple(&vp9_fdct32x32_rd_sse2, &vp9_idct32x32_1024_add_c, 1,
VPX_BITS_8)));
#endif // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
#if HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P(
......@@ -361,5 +381,5 @@ INSTANTIATE_TEST_CASE_P(
&vp9_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
make_tuple(&vp9_fdct32x32_rd_avx2,
&vp9_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
#endif
#endif // HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
} // namespace
......@@ -75,7 +75,17 @@ void iwht4x4_10(const tran_low_t *in, uint8_t *out, int stride) {
void iwht4x4_12(const tran_low_t *in, uint8_t *out, int stride) {
vp9_highbd_iwht4x4_16_add_c(in, out, stride, 12);
}
#endif
#if HAVE_SSE2
void idct4x4_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
vp9_highbd_idct4x4_16_add_sse2(in, out, stride, 10);
}
void idct4x4_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
vp9_highbd_idct4x4_16_add_sse2(in, out, stride, 12);
}
#endif // HAVE_SSE2
#endif // CONFIG_VP9_HIGHBITDEPTH
class Trans4x4TestBase {
public:
......@@ -416,7 +426,7 @@ INSTANTIATE_TEST_CASE_P(
C, Trans4x4DCT,
::testing::Values(
make_tuple(&vp9_fdct4x4_c, &vp9_idct4x4_16_add_c, 0, VPX_BITS_8)));
#endif
#endif // CONFIG_VP9_HIGHBITDEPTH
#if CONFIG_VP9_HIGHBITDEPTH
INSTANTIATE_TEST_CASE_P(
......@@ -442,7 +452,7 @@ INSTANTIATE_TEST_CASE_P(
make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 1, VPX_BITS_8),
make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 2, VPX_BITS_8),
make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 3, VPX_BITS_8)));
#endif
#endif // CONFIG_VP9_HIGHBITDEPTH
#if CONFIG_VP9_HIGHBITDEPTH
INSTANTIATE_TEST_CASE_P(
......@@ -456,7 +466,7 @@ INSTANTIATE_TEST_CASE_P(
C, Trans4x4WHT,
::testing::Values(
make_tuple(&vp9_fwht4x4_c, &vp9_iwht4x4_16_add_c, 0, VPX_BITS_8)));
#endif
#endif // CONFIG_VP9_HIGHBITDEPTH
#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P(
......@@ -471,7 +481,7 @@ INSTANTIATE_TEST_CASE_P(
make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 1, VPX_BITS_8),
make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 2, VPX_BITS_8),
make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 3, VPX_BITS_8)));
#endif
#endif // HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
#if CONFIG_USE_X86INC && HAVE_MMX && !CONFIG_VP9_HIGHBITDEPTH && \
!CONFIG_EMULATE_HARDWARE
......@@ -494,6 +504,33 @@ INSTANTIATE_TEST_CASE_P(
make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 1, VPX_BITS_8),
make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 2, VPX_BITS_8),
make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 3, VPX_BITS_8)));
#endif
#endif // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
#if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P(
SSE2, Trans4x4DCT,
::testing::Values(
make_tuple(&vp9_highbd_fdct4x4_c, &idct4x4_10_sse2, 0, VPX_BITS_10),
make_tuple(&vp9_highbd_fdct4x4_sse2, &idct4x4_10_sse2, 0, VPX_BITS_10),
make_tuple(&vp9_highbd_fdct4x4_c, &idct4x4_12_sse2, 0, VPX_BITS_12),
make_tuple(&vp9_highbd_fdct4x4_sse2, &idct4x4_12_sse2, 0, VPX_BITS_12),
make_tuple(&vp9_fdct4x4_sse2, &vp9_idct4x4_16_add_c, 0,
VPX_BITS_8)));
INSTANTIATE_TEST_CASE_P(
SSE2, Trans4x4HT,
::testing::Values(
make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_10, 0, VPX_BITS_10),
make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_10, 1, VPX_BITS_10),
make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_10, 2, VPX_BITS_10),
make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_10, 3, VPX_BITS_10),
make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_12, 0, VPX_BITS_12),
make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_12, 1, VPX_BITS_12),
make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_12, 2, VPX_BITS_12),
make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_12, 3, VPX_BITS_12),
make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 0, VPX_BITS_8),
make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 1, VPX_BITS_8),
make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 2, VPX_BITS_8),
make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 3, VPX_BITS_8)));
#endif // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
} // namespace
......@@ -71,6 +71,7 @@ typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
typedef std::tr1::tuple<FdctFunc, IdctFunc, int, vpx_bit_depth_t> Dct8x8Param;
typedef std::tr1::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t> Ht8x8Param;
typedef std::tr1::tuple<IdctFunc, IdctFunc, int, vpx_bit_depth_t> Idct8x8Param;
void fdct8x8_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
vp9_fdct8x8_c(in, out, stride);
......@@ -96,7 +97,33 @@ void iht8x8_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
void iht8x8_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
vp9_highbd_iht8x8_64_add_c(in, out, stride, tx_type, 12);
}
#endif
void idct8x8_10_add_10_c(const tran_low_t *in, uint8_t *out, int stride) {
vp9_highbd_idct8x8_10_add_c(in, out, stride, 10);
}
void idct8x8_10_add_12_c(const tran_low_t *in, uint8_t *out, int stride) {
vp9_highbd_idct8x8_10_add_c(in, out, stride, 12);
}
#if HAVE_SSE2
void idct8x8_10_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
vp9_highbd_idct8x8_10_add_sse2(in, out, stride, 10);
}
void idct8x8_10_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
vp9_highbd_idct8x8_10_add_sse2(in, out, stride, 12);
}
void idct8x8_64_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
vp9_highbd_idct8x8_64_add_sse2(in, out, stride, 10);
}
void idct8x8_64_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
vp9_highbd_idct8x8_64_add_sse2(in, out, stride, 12);
}
#endif // HAVE_SSE2
#endif // CONFIG_VP9_HIGHBITDEPTH
class FwdTrans8x8TestBase {
public:
......@@ -146,9 +173,10 @@ class FwdTrans8x8TestBase {
memset(count_sign_block, 0, sizeof(count_sign_block));
for (int i = 0; i < count_test_block; ++i) {
// Initialize a test block with input range [-15, 15].
// Initialize a test block with input range [-mask_ / 16, mask_ / 16].
for (int j = 0; j < 64; ++j)
test_input_block[j] = (rnd.Rand8() >> 4) - (rnd.Rand8() >> 4);
test_input_block[j] = ((rnd.Rand16() & mask_) >> 4) -
((rnd.Rand16() & mask_) >> 4);
ASM_REGISTER_STATE_CHECK(
RunFwdTxfm(test_input_block, test_output_block, pitch_));
......@@ -188,7 +216,7 @@ class FwdTrans8x8TestBase {
#endif
for (int i = 0; i < count_test_block; ++i) {
// Initialize a test block with input range [-255, 255].
// Initialize a test block with input range [-mask_, mask_].
for (int j = 0; j < 64; ++j) {
if (bit_depth_ == VPX_BITS_8) {
src[j] = rnd.Rand8();
......@@ -427,6 +455,63 @@ class FwdTrans8x8TestBase {
}
}
}
void CompareInvReference(IdctFunc ref_txfm, int thresh) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
const int count_test_block = 10000;
const int eob = 12;
DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, uint8_t, ref, kNumCoeffs);
#if CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
DECLARE_ALIGNED_ARRAY(16, uint16_t, ref16, kNumCoeffs);
#endif
const int16_t *scan = vp9_default_scan_orders[TX_8X8].scan;
for (int i = 0; i < count_test_block; ++i) {
for (int j = 0; j < kNumCoeffs; ++j) {
if (j < eob) {
// Random values less than the threshold, either positive or negative
coeff[scan[j]] = rnd(thresh) * (1-2*(i%2));
} else {
coeff[scan[j]] = 0;
}
if (bit_depth_ == VPX_BITS_8) {
dst[j] = 0;
ref[j] = 0;
#if CONFIG_VP9_HIGHBITDEPTH
} else {
dst16[j] = 0;
ref16[j] = 0;
#endif
}
}
if (bit_depth_ == VPX_BITS_8) {
ref_txfm(coeff, ref, pitch_);
ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
#if CONFIG_VP9_HIGHBITDEPTH
} else {
ref_txfm(coeff, CONVERT_TO_BYTEPTR(ref16), pitch_);
ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16),
pitch_));
#endif
}
for (int j = 0; j < kNumCoeffs; ++j) {
#if CONFIG_VP9_HIGHBITDEPTH
const uint32_t diff =
bit_depth_ == VPX_BITS_8 ? dst[j] - ref[j] : dst16[j] - ref16[j];
#else
const uint32_t diff = dst[j] - ref[j];
#endif
const uint32_t error = diff * diff;
EXPECT_EQ(0u, error)
<< "Error: 8x8 IDCT has error " << error
<< " at index " << j;
}
}
}
int pitch_;
int tx_type_;
FhtFunc fwd_txfm_ref;
......@@ -526,6 +611,38 @@ TEST_P(FwdTrans8x8HT, ExtremalCheck) {
RunExtremalCheck();
}
class InvTrans8x8DCT
: public FwdTrans8x8TestBase,
public ::testing::TestWithParam<Idct8x8Param> {
public:
virtual ~InvTrans8x8DCT() {}
virtual void SetUp() {
ref_txfm_ = GET_PARAM(0);
inv_txfm_ = GET_PARAM(1);
thresh_ = GET_PARAM(2);
pitch_ = 8;
bit_depth_ = GET_PARAM(3);
mask_ = (1 << bit_depth_) - 1;
}
virtual void TearDown() { libvpx_test::ClearSystemState(); }
protected:
void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
inv_txfm_(out, dst, stride);
}
void RunFwdTxfm(int16_t *out, tran_low_t *dst, int stride) {}
IdctFunc ref_txfm_;
IdctFunc inv_txfm_;
int thresh_;
};
TEST_P(InvTrans8x8DCT, CompareReference) {
CompareInvReference(ref_txfm_, thresh_);
}
using std::tr1::make_tuple;
#if CONFIG_VP9_HIGHBITDEPTH
......@@ -540,7 +657,7 @@ INSTANTIATE_TEST_CASE_P(
C, FwdTrans8x8DCT,
::testing::Values(
make_tuple(&vp9_fdct8x8_c, &vp9_idct8x8_64_add_c, 0, VPX_BITS_8)));
#endif
#endif // CONFIG_VP9_HIGHBITDEPTH
#if CONFIG_VP9_HIGHBITDEPTH
INSTANTIATE_TEST_CASE_P(
......@@ -566,7 +683,7 @@ INSTANTIATE_TEST_CASE_P(
make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 1, VPX_BITS_8),
make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 2, VPX_BITS_8),
make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 3, VPX_BITS_8)));
#endif
#endif // CONFIG_VP9_HIGHBITDEPTH
#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P(
......@@ -581,7 +698,7 @@ INSTANTIATE_TEST_CASE_P(
make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 1, VPX_BITS_8),
make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 2, VPX_BITS_8),
make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 3, VPX_BITS_8)));
#endif
#endif // HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P(
......@@ -596,7 +713,45 @@ INSTANTIATE_TEST_CASE_P(
make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 1, VPX_BITS_8),
make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 2, VPX_BITS_8),
make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 3, VPX_BITS_8)));
#endif
#endif // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
#if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P(
SSE2, FwdTrans8x8DCT,
::testing::Values(
make_tuple(&vp9_highbd_fdct8x8_c,
&idct8x8_64_add_10_sse2, 12, VPX_BITS_10),
make_tuple(&vp9_highbd_fdct8x8_sse2,
&idct8x8_64_add_10_sse2, 12, VPX_BITS_10),
make_tuple(&vp9_highbd_fdct8x8_c,
&idct8x8_64_add_12_sse2, 12, VPX_BITS_12),
make_tuple(&vp9_highbd_fdct8x8_sse2,
&idct8x8_64_add_12_sse2, 12, VPX_BITS_12),
make_tuple(&vp9_fdct8x8_sse2, &vp9_idct8x8_64_add_c, 0, VPX_BITS_8)));
INSTANTIATE_TEST_CASE_P(
SSE2, FwdTrans8x8HT,
::testing::Values(
make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_c, 0, VPX_BITS_8),
make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_c, 1, VPX_BITS_8),
make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_c, 2, VPX_BITS_8),
make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_c, 3, VPX_BITS_8)));
// Optimizations take effect at a threshold of 6201, so we use a value close to
// that to test both branches.
INSTANTIATE_TEST_CASE_P(
SSE2, InvTrans8x8DCT,
::testing::Values(
make_tuple(&idct8x8_10_add_10_c,
&idct8x8_10_add_10_sse2, 6225, VPX_BITS_10),
make_tuple(&idct8x8_10,
&idct8x8_64_add_10_sse2, 6225, VPX_BITS_10),
make_tuple(&idct8x8_10_add_12_c,
&idct8x8_10_add_12_sse2, 6225, VPX_BITS_12),
make_tuple(&idct8x8_12,
&idct8x8_64_add_12_sse2, 6225, VPX_BITS_12)));
#endif // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
#if HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_VP9_HIGHBITDEPTH && \
!CONFIG_EMULATE_HARDWARE
......
......@@ -112,9 +112,6 @@ typedef struct {
// Common for both INTER and INTRA blocks
BLOCK_SIZE sb_type;
PREDICTION_MODE mode;
#if CONFIG_FILTERINTRA
int filterbit, uv_filterbit;
#endif
TX_SIZE tx_size;
int8_t skip;
int8_t segment_id;
......@@ -130,17 +127,11 @@ typedef struct {
uint8_t mode_context[MAX_REF_FRAMES];
INTERP_FILTER interp_filter;
#if CONFIG_EXT_TX
EXT_TX_TYPE ext_txfrm;
#endif
} MB_MODE_INFO;
typedef struct MODE_INFO {
struct MODE_INFO *src_mi;
MB_MODE_INFO mbmi;
#if CONFIG_FILTERINTRA
int b_filter_info[4];
#endif
b_mode_info bmi[4];
} MODE_INFO;
......@@ -149,17 +140,6 @@ static INLINE PREDICTION_MODE get_y_mode(const MODE_INFO *mi, int block) {
: mi->mbmi.mode;
}
#if CONFIG_FILTERINTRA
static INLINE int is_filter_allowed(PREDICTION_MODE mode) {
(void)mode;
return 1;
}
static INLINE int is_filter_enabled(TX_SIZE txsize) {
return (txsize < TX_SIZES);
}
#endif
static INLINE int is_inter_block(const MB_MODE_INFO *mbmi) {
return mbmi->ref_frame[0] > INTRA_FRAME;
}
......@@ -257,33 +237,13 @@ static INLINE BLOCK_SIZE get_subsize(BLOCK_SIZE bsize,
extern const TX_TYPE intra_mode_to_tx_type_lookup[INTRA_MODES];
#if CONFIG_EXT_TX
static TX_TYPE ext_tx_to_txtype(EXT_TX_TYPE ext_tx) {
switch (ext_tx) {
case NORM:
default:
return DCT_DCT;
case ALT:
return ADST_ADST;
}
}
#endif
static INLINE TX_TYPE get_tx_type(PLANE_TYPE plane_type,
const MACROBLOCKD *xd) {
const MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
#if CONFIG_EXT_TX
if (plane_type != PLANE_TYPE_Y || xd->lossless)
return DCT_DCT;
if (is_inter_block(mbmi)) {
return ext_tx_to_txtype(mbmi->ext_txfrm);
}
#else
if (plane_type != PLANE_TYPE_Y || xd->lossless || is_inter_block(mbmi))
return DCT_DCT;
#endif
return intra_mode_to_tx_type_lookup[mbmi->mode];
}
......@@ -291,17 +251,8 @@ static INLINE TX_TYPE get_tx_type_4x4(PLANE_TYPE plane_type,
const MACROBLOCKD *xd, int ib) {
const MODE_INFO *const mi = xd->mi[0].src_mi;
#if CONFIG_EXT_TX
if (plane_type != PLANE_TYPE_Y || xd->lossless)
return DCT_DCT;
if (is_inter_block(&mi->mbmi)) {
return ext_tx_to_txtype(mi->mbmi.ext_txfrm);
}
#else
if (plane_type != PLANE_TYPE_Y || xd->lossless || is_inter_block(&mi->mbmi))
return DCT_DCT;
#endif
return intra_mode_to_tx_type_lookup[get_y_mode(mi, ib)];
}
......
......@@ -750,27 +750,12 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vp9_highbd_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/vp9_highbd_idct4x4_1_add/;
add_proto qw/void vp9_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/vp9_highbd_idct4x4_16_add/;
add_proto qw/void vp9_highbd_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/vp9_highbd_idct8x8_1_add/;
add_proto qw/void vp9_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/vp9_highbd_idct8x8_64_add/;
add_proto qw/void vp9_highbd_idct8x8_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/vp9_highbd_idct8x8_10_add/;
add_proto qw/void vp9_highbd_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/vp9_highbd_idct16x16_1_add/;
add_proto qw/void vp9_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/vp9_highbd_idct16x16_256_add/;
add_proto qw/void vp9_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/vp9_highbd_idct16x16_10_add/;
add_proto qw/void vp9_highbd_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/vp9_highbd_idct32x32_1024_add/;
......@@ -796,6 +781,42 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vp9_highbd_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/vp9_highbd_iwht4x4_16_add/;
# Force C versions if CONFIG_EMULATE_HARDWARE is 1
if (vpx_config("CONFIG_EMULATE_HARDWARE") eq "yes") {
add_proto qw/void vp9_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/vp9_highbd_idct4x4_16_add/;
add_proto qw/void vp9_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/vp9_highbd_idct8x8_64_add/;
add_proto qw/void vp9_highbd_idct8x8_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/vp9_highbd_idct8x8_10_add/;
add_proto qw/void vp9_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/vp9_highbd_idct16x16_256_add/;
add_proto qw/void vp9_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/vp9_highbd_idct16x16_10_add/;
} else {
add_proto qw/void vp9_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/vp9_highbd_idct4x4_16_add sse2/;
add_proto qw/void vp9_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/vp9_highbd_idct8x8_64_add sse2/;
add_proto qw/void vp9_highbd_idct8x8_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/vp9_highbd_idct8x8_10_add sse2/;
add_proto qw/void vp9_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/vp9_highbd_idct16x16_256_add sse2/;
add_proto qw/void vp9_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/vp9_highbd_idct16x16_10_add sse2/;
}
}
#
......@@ -1184,43 +1205,43 @@ if (vpx_config("CONFIG_INTERNAL_STATS") eq "yes") {
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vp9_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
specialize qw/vp9_fht4x4/;
specialize qw/vp9_fht4x4 sse2/;
add_proto qw/void vp9_fht8x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
specialize qw/vp9_fht8x8/;
specialize qw/vp9_fht8x8 sse2/;
add_proto qw/void vp9_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
specialize qw/vp9_fht16x16/;
specialize qw/vp9_fht16x16 sse2/;
add_proto qw/void vp9_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp9_fwht4x4/;
specialize qw/vp9_fwht4x4/, "$mmx_x86inc";
add_proto qw/void vp9_fdct4x4_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp9_fdct4x4_1/;
specialize qw/vp9_fdct4x4_1 sse2/;
add_proto qw/void vp9_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp9_fdct4x4/;
specialize qw/vp9_fdct4x4 sse2/;
add_proto qw/void vp9_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp9_fdct8x8_1/;
specialize qw/vp9_fdct8x8_1 sse2/;
add_proto qw/void vp9_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp9_fdct8x8/;
specialize qw/vp9_fdct8x8 sse2/;
add_proto qw/void vp9_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp9_fdct16x16_1/;
specialize qw/vp9_fdct16x16_1 sse2/;
add_proto qw/void vp9_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp9_fdct16x16/;
specialize qw/vp9_fdct16x16 sse2/;
add_proto qw/void vp9_fdct32x32_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp9_fdct32x32_1/;
specialize qw/vp9_fdct32x32_1 sse2/;
add_proto qw/void vp9_fdct32x32/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp9_fdct32x32/;
specialize qw/vp9_fdct32x32 sse2/;
add_proto qw/void vp9_fdct32x32_rd/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp9_fdct32x32_rd/;
specialize qw/vp9_fdct32x32_rd sse2/;
} else {
add_proto qw/void vp9_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
specialize qw/vp9_fht4x4 sse2/;
......@@ -1882,40 +1903,40 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
# fdct functions
add_proto qw/void vp9_highbd_fht4x4/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
specialize qw/vp9_highbd_fht4x4/;
specialize qw/vp9_highbd_fht4x4 sse2/;
add_proto qw/void vp9_highbd_fht8x8/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
specialize qw/vp9_highbd_fht8x8/;
specialize qw/vp9_highbd_fht8x8 sse2/;
add_proto qw/void vp9_highbd_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
specialize qw/vp9_highbd_fht16x16/;
specialize qw/vp9_highbd_fht16x16 sse2/;
add_proto qw/void vp9_highbd_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp9_highbd_fwht4x4/;
add_proto qw/void vp9_highbd_fdct4x4/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp9_highbd_fdct4x4/;
specialize qw/vp9_highbd_fdct4x4 sse2/;
add_proto qw/void vp9_highbd_fdct8x8_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp9_highbd_fdct8x8_1/;
add_proto qw/void vp9_highbd_fdct8x8/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp9_highbd_fdct8x8/;
specialize qw/vp9_highbd_fdct8x8 sse2/;
add_proto qw/void vp9_highbd_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp9_highbd_fdct16x16_1/;
add_proto qw/void vp9_highbd_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp9_highbd_fdct16x16/;
specialize qw/vp9_highbd_fdct16x16 sse2/;