Commit 4fc8df67 authored by Frederic Barbier's avatar Frederic Barbier Committed by Fred BARBIER
Browse files

Cleanup dead high-bitdepth inverse-tx functions

This patch removes dead code and prevents future implementations
to rely on obsolete transforms. Future optimizations and tests should
be based on latest C-functions (av1/common/av1_inv_txfm1d.c)

Cleanup related last unit-test callers.
BUG=aomedia:442

Change-Id: I24953cc1baf30dd7b720df8a72dd91b356b74cad
parent e3980281
......@@ -460,15 +460,6 @@ if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
add_proto qw/void aom_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/aom_highbd_idct4x4_16_add sse2/;
add_proto qw/void aom_highbd_idct8x8_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/aom_highbd_idct8x8_10_add sse2/;
add_proto qw/void aom_highbd_idct16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/aom_highbd_idct16x16_256_add sse2/;
add_proto qw/void aom_highbd_idct16x16_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/aom_highbd_idct16x16_10_add sse2/;
}
} else {
{
......
......@@ -1602,32 +1602,6 @@ void aom_highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd) {
output[7] = HIGHBD_WRAPLOW(-x1, bd);
}
void aom_highbd_idct8x8_10_add_c(const tran_low_t *input, uint8_t *dest8,
int stride, int bd) {
tran_low_t out[8 * 8] = { 0 };
tran_low_t *outptr = out;
int i, j;
tran_low_t temp_in[8], temp_out[8];
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
// First transform rows.
// Only first 4 row has non-zero coefs.
for (i = 0; i < 4; ++i) {
aom_highbd_idct8_c(input, outptr, bd);
input += 8;
outptr += 8;
}
// Then transform columns.
for (i = 0; i < 8; ++i) {
for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i];
aom_highbd_idct8_c(temp_in, temp_out, bd);
for (j = 0; j < 8; ++j) {
dest[j * stride + i] = highbd_clip_pixel_add(
dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd);
}
}
}
void aom_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd) {
tran_low_t step1[16], step2[16];
tran_high_t temp1, temp2;
......@@ -1794,32 +1768,6 @@ void aom_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd) {
output[15] = HIGHBD_WRAPLOW(step2[0] - step2[15], bd);
}
void aom_highbd_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest8,
int stride, int bd) {
tran_low_t out[16 * 16];
tran_low_t *outptr = out;
int i, j;
tran_low_t temp_in[16], temp_out[16];
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
// First transform rows.
for (i = 0; i < 16; ++i) {
aom_highbd_idct16_c(input, outptr, bd);
input += 16;
outptr += 16;
}
// Then transform columns.
for (i = 0; i < 16; ++i) {
for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i];
aom_highbd_idct16_c(temp_in, temp_out, bd);
for (j = 0; j < 16; ++j) {
dest[j * stride + i] = highbd_clip_pixel_add(
dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
}
}
}
void aom_highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd) {
tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8;
tran_high_t s9, s10, s11, s12, s13, s14, s15;
......@@ -1990,33 +1938,6 @@ void aom_highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd) {
output[15] = HIGHBD_WRAPLOW(-x1, bd);
}
void aom_highbd_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest8,
int stride, int bd) {
tran_low_t out[16 * 16] = { 0 };
tran_low_t *outptr = out;
int i, j;
tran_low_t temp_in[16], temp_out[16];
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
// First transform rows. Since all non-zero dct coefficients are in
// upper-left 4x4 area, we only need to calculate first 4 rows here.
for (i = 0; i < 4; ++i) {
aom_highbd_idct16_c(input, outptr, bd);
input += 16;
outptr += 16;
}
// Then transform columns.
for (i = 0; i < 16; ++i) {
for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i];
aom_highbd_idct16_c(temp_in, temp_out, bd);
for (j = 0; j < 16; ++j) {
dest[j * stride + i] = highbd_clip_pixel_add(
dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
}
}
}
void aom_highbd_idct32_c(const tran_low_t *input, tran_low_t *output, int bd) {
tran_low_t step1[32], step2[32];
tran_high_t temp1, temp2;
......
......@@ -3731,234 +3731,4 @@ void aom_highbd_idct8x8_10_add_sse2(const tran_low_t *input, uint8_t *dest8,
}
}
void aom_highbd_idct16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest8,
int stride, int bd) {
tran_low_t out[16 * 16];
tran_low_t *outptr = out;
int i, j, test;
__m128i inptr[32];
__m128i min_input, max_input, temp1, temp2, sign_bits;
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
const __m128i zero = _mm_set1_epi16(0);
const __m128i rounding = _mm_set1_epi16(32);
const __m128i max = _mm_set1_epi16(3155);
const __m128i min = _mm_set1_epi16(-3155);
int optimised_cols = 0;
// Load input into __m128i & pack to 16 bits
for (i = 0; i < 16; i++) {
temp1 = _mm_loadu_si128((const __m128i *)(input + 16 * i));
temp2 = _mm_loadu_si128((const __m128i *)(input + 16 * i + 4));
inptr[i] = _mm_packs_epi32(temp1, temp2);
temp1 = _mm_loadu_si128((const __m128i *)(input + 16 * i + 8));
temp2 = _mm_loadu_si128((const __m128i *)(input + 16 * i + 12));
inptr[i + 16] = _mm_packs_epi32(temp1, temp2);
}
// Find the min & max for the row transform
max_input = _mm_max_epi16(inptr[0], inptr[1]);
min_input = _mm_min_epi16(inptr[0], inptr[1]);
for (i = 2; i < 32; i++) {
max_input = _mm_max_epi16(max_input, inptr[i]);
min_input = _mm_min_epi16(min_input, inptr[i]);
}
max_input = _mm_cmpgt_epi16(max_input, max);
min_input = _mm_cmplt_epi16(min_input, min);
temp1 = _mm_or_si128(max_input, min_input);
test = _mm_movemask_epi8(temp1);
if (!test) {
// Do the row transform
aom_idct16_sse2(inptr, inptr + 16);
// Find the min & max for the column transform
max_input = _mm_max_epi16(inptr[0], inptr[1]);
min_input = _mm_min_epi16(inptr[0], inptr[1]);
for (i = 2; i < 32; i++) {
max_input = _mm_max_epi16(max_input, inptr[i]);
min_input = _mm_min_epi16(min_input, inptr[i]);
}
max_input = _mm_cmpgt_epi16(max_input, max);
min_input = _mm_cmplt_epi16(min_input, min);
temp1 = _mm_or_si128(max_input, min_input);
test = _mm_movemask_epi8(temp1);
if (test) {
array_transpose_16x16(inptr, inptr + 16);
for (i = 0; i < 16; i++) {
sign_bits = _mm_cmplt_epi16(inptr[i], zero);
temp1 = _mm_unpacklo_epi16(inptr[i], sign_bits);
temp2 = _mm_unpackhi_epi16(inptr[i], sign_bits);
_mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4)), temp1);
_mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4 + 1)), temp2);
sign_bits = _mm_cmplt_epi16(inptr[i + 16], zero);
temp1 = _mm_unpacklo_epi16(inptr[i + 16], sign_bits);
temp2 = _mm_unpackhi_epi16(inptr[i + 16], sign_bits);
_mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4 + 2)), temp1);
_mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4 + 3)), temp2);
}
} else {
// Set to use the optimised transform for the column
optimised_cols = 1;
}
} else {
// Run the un-optimised row transform
for (i = 0; i < 16; ++i) {
aom_highbd_idct16_c(input, outptr, bd);
input += 16;
outptr += 16;
}
}
if (optimised_cols) {
aom_idct16_sse2(inptr, inptr + 16);
// Final round & shift and Reconstruction and Store
{
__m128i d[2];
for (i = 0; i < 16; i++) {
inptr[i] = _mm_add_epi16(inptr[i], rounding);
inptr[i + 16] = _mm_add_epi16(inptr[i + 16], rounding);
d[0] = _mm_loadu_si128((const __m128i *)(dest + stride * i));
d[1] = _mm_loadu_si128((const __m128i *)(dest + stride * i + 8));
inptr[i] = _mm_srai_epi16(inptr[i], 6);
inptr[i + 16] = _mm_srai_epi16(inptr[i + 16], 6);
d[0] = clamp_high_sse2(_mm_add_epi16(d[0], inptr[i]), bd);
d[1] = clamp_high_sse2(_mm_add_epi16(d[1], inptr[i + 16]), bd);
// Store
_mm_storeu_si128((__m128i *)(dest + stride * i), d[0]);
_mm_storeu_si128((__m128i *)(dest + stride * i + 8), d[1]);
}
}
} else {
// Run the un-optimised column transform
tran_low_t temp_in[16], temp_out[16];
for (i = 0; i < 16; ++i) {
for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i];
aom_highbd_idct16_c(temp_in, temp_out, bd);
for (j = 0; j < 16; ++j) {
dest[j * stride + i] = highbd_clip_pixel_add(
dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
}
}
}
}
void aom_highbd_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest8,
int stride, int bd) {
tran_low_t out[16 * 16] = { 0 };
tran_low_t *outptr = out;
int i, j, test;
__m128i inptr[32];
__m128i min_input, max_input, temp1, temp2, sign_bits;
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
const __m128i zero = _mm_set1_epi16(0);
const __m128i rounding = _mm_set1_epi16(32);
const __m128i max = _mm_set1_epi16(3155);
const __m128i min = _mm_set1_epi16(-3155);
int optimised_cols = 0;
// Load input into __m128i & pack to 16 bits
for (i = 0; i < 16; i++) {
temp1 = _mm_loadu_si128((const __m128i *)(input + 16 * i));
temp2 = _mm_loadu_si128((const __m128i *)(input + 16 * i + 4));
inptr[i] = _mm_packs_epi32(temp1, temp2);
temp1 = _mm_loadu_si128((const __m128i *)(input + 16 * i + 8));
temp2 = _mm_loadu_si128((const __m128i *)(input + 16 * i + 12));
inptr[i + 16] = _mm_packs_epi32(temp1, temp2);
}
// Find the min & max for the row transform
// Since all non-zero dct coefficients are in upper-left 4x4 area,
// we only need to consider first 4 rows here.
max_input = _mm_max_epi16(inptr[0], inptr[1]);
min_input = _mm_min_epi16(inptr[0], inptr[1]);
for (i = 2; i < 4; i++) {
max_input = _mm_max_epi16(max_input, inptr[i]);
min_input = _mm_min_epi16(min_input, inptr[i]);
}
max_input = _mm_cmpgt_epi16(max_input, max);
min_input = _mm_cmplt_epi16(min_input, min);
temp1 = _mm_or_si128(max_input, min_input);
test = _mm_movemask_epi8(temp1);
if (!test) {
// Do the row transform (N.B. This transposes inptr)
aom_idct16_sse2(inptr, inptr + 16);
// Find the min & max for the column transform
// N.B. Only first 4 cols contain non-zero coeffs
max_input = _mm_max_epi16(inptr[0], inptr[1]);
min_input = _mm_min_epi16(inptr[0], inptr[1]);
for (i = 2; i < 16; i++) {
max_input = _mm_max_epi16(max_input, inptr[i]);
min_input = _mm_min_epi16(min_input, inptr[i]);
}
max_input = _mm_cmpgt_epi16(max_input, max);
min_input = _mm_cmplt_epi16(min_input, min);
temp1 = _mm_or_si128(max_input, min_input);
test = _mm_movemask_epi8(temp1);
if (test) {
// Use fact only first 4 rows contain non-zero coeffs
array_transpose_8x8(inptr, inptr);
array_transpose_8x8(inptr + 8, inptr + 16);
for (i = 0; i < 4; i++) {
sign_bits = _mm_cmplt_epi16(inptr[i], zero);
temp1 = _mm_unpacklo_epi16(inptr[i], sign_bits);
temp2 = _mm_unpackhi_epi16(inptr[i], sign_bits);
_mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4)), temp1);
_mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4 + 1)), temp2);
sign_bits = _mm_cmplt_epi16(inptr[i + 16], zero);
temp1 = _mm_unpacklo_epi16(inptr[i + 16], sign_bits);
temp2 = _mm_unpackhi_epi16(inptr[i + 16], sign_bits);
_mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4 + 2)), temp1);
_mm_storeu_si128((__m128i *)(outptr + 4 * (i * 4 + 3)), temp2);
}
} else {
// Set to use the optimised transform for the column
optimised_cols = 1;
}
} else {
// Run the un-optimised row transform
for (i = 0; i < 4; ++i) {
aom_highbd_idct16_c(input, outptr, bd);
input += 16;
outptr += 16;
}
}
if (optimised_cols) {
aom_idct16_sse2(inptr, inptr + 16);
// Final round & shift and Reconstruction and Store
{
__m128i d[2];
for (i = 0; i < 16; i++) {
inptr[i] = _mm_add_epi16(inptr[i], rounding);
inptr[i + 16] = _mm_add_epi16(inptr[i + 16], rounding);
d[0] = _mm_loadu_si128((const __m128i *)(dest + stride * i));
d[1] = _mm_loadu_si128((const __m128i *)(dest + stride * i + 8));
inptr[i] = _mm_srai_epi16(inptr[i], 6);
inptr[i + 16] = _mm_srai_epi16(inptr[i + 16], 6);
d[0] = clamp_high_sse2(_mm_add_epi16(d[0], inptr[i]), bd);
d[1] = clamp_high_sse2(_mm_add_epi16(d[1], inptr[i + 16]), bd);
// Store
_mm_storeu_si128((__m128i *)(dest + stride * i), d[0]);
_mm_storeu_si128((__m128i *)(dest + stride * i + 8), d[1]);
}
}
} else {
// Run the un-optimised column transform
tran_low_t temp_in[16], temp_out[16];
for (i = 0; i < 16; ++i) {
for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i];
aom_highbd_idct16_c(temp_in, temp_out, bd);
for (j = 0; j < 16; ++j) {
dest[j * stride + i] = highbd_clip_pixel_add(
dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
}
}
}
}
#endif // CONFIG_HIGHBITDEPTH
......@@ -255,24 +255,6 @@ void iht16x16_ref(const tran_low_t *in, uint8_t *dest, int stride,
}
#if CONFIG_HIGHBITDEPTH
void idct16x16_10(const tran_low_t *in, uint8_t *out, int stride) {
aom_highbd_idct16x16_256_add_c(in, out, stride, 10);
}
void idct16x16_12(const tran_low_t *in, uint8_t *out, int stride) {
aom_highbd_idct16x16_256_add_c(in, out, stride, 12);
}
void idct16x16_10_ref(const tran_low_t *in, uint8_t *out, int stride,
int /*tx_type*/) {
idct16x16_10(in, out, stride);
}
void idct16x16_12_ref(const tran_low_t *in, uint8_t *out, int stride,
int /*tx_type*/) {
idct16x16_12(in, out, stride);
}
void iht16x16_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
av1_highbd_iht16x16_256_add_c(in, out, stride, tx_type, 10);
}
......@@ -280,32 +262,6 @@ void iht16x16_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
void iht16x16_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
av1_highbd_iht16x16_256_add_c(in, out, stride, tx_type, 12);
}
#if HAVE_SSE2
void idct16x16_10_add_10_c(const tran_low_t *in, uint8_t *out, int stride) {
aom_highbd_idct16x16_10_add_c(in, out, stride, 10);
}
void idct16x16_10_add_12_c(const tran_low_t *in, uint8_t *out, int stride) {
aom_highbd_idct16x16_10_add_c(in, out, stride, 12);
}
void idct16x16_256_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
aom_highbd_idct16x16_256_add_sse2(in, out, stride, 10);
}
void idct16x16_256_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
aom_highbd_idct16x16_256_add_sse2(in, out, stride, 12);
}
void idct16x16_10_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
aom_highbd_idct16x16_10_add_sse2(in, out, stride, 10);
}
void idct16x16_10_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
aom_highbd_idct16x16_10_add_sse2(in, out, stride, 12);
}
#endif // HAVE_SSE2
#endif // CONFIG_HIGHBITDEPTH
class Trans16x16TestBase {
......@@ -623,15 +579,7 @@ class Trans16x16DCT : public Trans16x16TestBase,
fwd_txfm_ref = fdct16x16_ref;
inv_txfm_ref = idct16x16_ref;
mask_ = (1 << bit_depth_) - 1;
#if CONFIG_HIGHBITDEPTH
switch (bit_depth_) {
case AOM_BITS_10: inv_txfm_ref = idct16x16_10_ref; break;
case AOM_BITS_12: inv_txfm_ref = idct16x16_12_ref; break;
default: inv_txfm_ref = idct16x16_ref; break;
}
#else
inv_txfm_ref = idct16x16_ref;
#endif
}
virtual void TearDown() { libaom_test::ClearSystemState(); }
......@@ -804,12 +752,10 @@ TEST_P(PartialTrans16x16Test, Random) {
using std::tr1::make_tuple;
#if CONFIG_HIGHBITDEPTH
INSTANTIATE_TEST_CASE_P(
C, Trans16x16DCT,
::testing::Values(
make_tuple(&aom_highbd_fdct16x16_c, &idct16x16_10, 0, AOM_BITS_10),
make_tuple(&aom_highbd_fdct16x16_c, &idct16x16_12, 0, AOM_BITS_12),
make_tuple(&aom_fdct16x16_c, &aom_idct16x16_256_add_c, 0, AOM_BITS_8)));
INSTANTIATE_TEST_CASE_P(C, Trans16x16DCT,
::testing::Values(make_tuple(&aom_fdct16x16_c,
&aom_idct16x16_256_add_c,
0, AOM_BITS_8)));
#else
INSTANTIATE_TEST_CASE_P(C, Trans16x16DCT,
::testing::Values(make_tuple(&aom_fdct16x16_c,
......@@ -885,17 +831,10 @@ INSTANTIATE_TEST_CASE_P(AVX2, PartialTrans16x16Test,
#endif // HAVE_AVX2 && !CONFIG_HIGHBITDEPTH
#if HAVE_SSE2 && CONFIG_HIGHBITDEPTH
INSTANTIATE_TEST_CASE_P(
SSE2, Trans16x16DCT,
::testing::Values(
make_tuple(&aom_highbd_fdct16x16_sse2, &idct16x16_10, 0, AOM_BITS_10),
make_tuple(&aom_highbd_fdct16x16_c, &idct16x16_256_add_10_sse2, 0,
AOM_BITS_10),
make_tuple(&aom_highbd_fdct16x16_sse2, &idct16x16_12, 0, AOM_BITS_12),
make_tuple(&aom_highbd_fdct16x16_c, &idct16x16_256_add_12_sse2, 0,
AOM_BITS_12),
make_tuple(&aom_fdct16x16_sse2, &aom_idct16x16_256_add_c, 0,
AOM_BITS_8)));
INSTANTIATE_TEST_CASE_P(SSE2, Trans16x16DCT,
::testing::Values(make_tuple(&aom_fdct16x16_sse2,
&aom_idct16x16_256_add_c,
0, AOM_BITS_8)));
INSTANTIATE_TEST_CASE_P(
SSE2, Trans16x16HT,
::testing::Values(
......@@ -904,18 +843,6 @@ INSTANTIATE_TEST_CASE_P(
make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_c, 2, AOM_BITS_8),
make_tuple(&av1_fht16x16_sse2, &av1_iht16x16_256_add_c, 3,
AOM_BITS_8)));
// Optimizations take effect at a threshold of 3155, so we use a value close to
// that to test both branches.
INSTANTIATE_TEST_CASE_P(
SSE2, InvTrans16x16DCT,
::testing::Values(make_tuple(&idct16x16_10_add_10_c,
&idct16x16_10_add_10_sse2, 3167, AOM_BITS_10),
make_tuple(&idct16x16_10, &idct16x16_256_add_10_sse2,
3167, AOM_BITS_10),
make_tuple(&idct16x16_10_add_12_c,
&idct16x16_10_add_12_sse2, 3167, AOM_BITS_12),
make_tuple(&idct16x16_12, &idct16x16_256_add_12_sse2,
3167, AOM_BITS_12)));
// TODO(luoyi):
// For this test case, we should test function: aom_highbd_fdct16x16_1_sse2.
// However this function is not available yet. if we mistakely test
......
......@@ -95,23 +95,6 @@ void iht8x8_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
av1_highbd_iht8x8_64_add_c(in, out, stride, tx_type, 12);
}
#if HAVE_SSE2
void idct8x8_10_add_10_c(const tran_low_t *in, uint8_t *out, int stride) {
aom_highbd_idct8x8_10_add_c(in, out, stride, 10);
}
void idct8x8_10_add_12_c(const tran_low_t *in, uint8_t *out, int stride) {
aom_highbd_idct8x8_10_add_c(in, out, stride, 12);
}
void idct8x8_10_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
aom_highbd_idct8x8_10_add_sse2(in, out, stride, 10);
}
void idct8x8_10_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
aom_highbd_idct8x8_10_add_sse2(in, out, stride, 12);
}
#endif // HAVE_SSE2
#endif // CONFIG_HIGHBITDEPTH
class FwdTrans8x8TestBase {
......@@ -689,14 +672,6 @@ INSTANTIATE_TEST_CASE_P(
make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_c, 2, AOM_BITS_8),
make_tuple(&av1_fht8x8_sse2, &av1_iht8x8_64_add_c, 3, AOM_BITS_8)));
// Optimizations take effect at a threshold of 6201, so we use a value close to
// that to test both branches.
INSTANTIATE_TEST_CASE_P(
SSE2, InvTrans8x8DCT,
::testing::Values(make_tuple(&idct8x8_10_add_10_c, &idct8x8_10_add_10_sse2,
6225, AOM_BITS_10),
make_tuple(&idct8x8_10_add_12_c, &idct8x8_10_add_12_sse2,
6225, AOM_BITS_12)));
#endif // HAVE_SSE2 && CONFIG_HIGHBITDEPTH
#if HAVE_SSSE3 && ARCH_X86_64
......
......@@ -286,15 +286,6 @@ using std::tr1::make_tuple;
const PartialInvTxfmParam c_partial_idct_tests[] = {
#if CONFIG_HIGHBITDEPTH
make_tuple(
&aom_highbd_fdct16x16_c, &highbd_wrapper<aom_highbd_idct16x16_256_add_c>,
&highbd_wrapper<aom_highbd_idct16x16_10_add_c>, TX_16X16, 10, 8, 2),
make_tuple(
&aom_highbd_fdct16x16_c, &highbd_wrapper<aom_highbd_idct16x16_256_add_c>,
&highbd_wrapper<aom_highbd_idct16x16_10_add_c>, TX_16X16, 10, 10, 2),
make_tuple(
&aom_highbd_fdct16x16_c, &highbd_wrapper<aom_highbd_idct16x16_256_add_c>,
&highbd_wrapper<aom_highbd_idct16x16_10_add_c>, TX_16X16, 10, 12, 2),
make_tuple(&aom_highbd_fdct4x4_c,
&highbd_wrapper<aom_highbd_idct4x4_16_add_c>,
&highbd_wrapper<aom_highbd_idct4x4_16_add_c>, TX_4X4, 16, 8, 2),
......@@ -368,24 +359,6 @@ INSTANTIATE_TEST_CASE_P(NEON, PartialIDctTest,
#if HAVE_SSE2
const PartialInvTxfmParam sse2_partial_idct_tests[] = {
#if CONFIG_HIGHBITDEPTH
make_tuple(
&aom_highbd_fdct16x16_c, &highbd_wrapper<aom_highbd_idct16x16_256_add_c>,
&highbd_wrapper<aom_highbd_idct16x16_256_add_sse2>, TX_16X16, 256, 8, 2),
make_tuple(
&aom_highbd_fdct16x16_c, &highbd_wrapper<aom_highbd_idct16x16_256_add_c>,
&highbd_wrapper<aom_highbd_idct16x16_256_add_sse2>, TX_16X16, 256, 10, 2),
make_tuple(
&aom_highbd_fdct16x16_c, &highbd_wrapper<aom_highbd_idct16x16_256_add_c>,
&highbd_wrapper<aom_highbd_idct16x16_256_add_sse2>, TX_16X16, 256, 12, 2),
make_tuple(
&aom_highbd_fdct16x16_c, &highbd_wrapper<aom_highbd_idct16x16_256_add_c>,
&highbd_wrapper<aom_highbd_idct16x16_10_add_sse2>, TX_16X16, 10, 8, 2),
make_tuple(
&aom_highbd_fdct16x16_c, &highbd_wrapper<aom_highbd_idct16x16_256_add_c>,
&highbd_wrapper<aom_highbd_idct16x16_10_add_sse2>, TX_16X16, 10, 10, 2),
make_tuple(
&aom_highbd_fdct16x16_c, &highbd_wrapper<aom_highbd_idct16x16_256_add_c>,
&highbd_wrapper<aom_highbd_idct16x16_10_add_sse2>, TX_16X16, 10, 12, 2),
make_tuple(&aom_highbd_fdct4x4_c,
&highbd_wrapper<aom_highbd_idct4x4_16_add_c>,
&highbd_wrapper<aom_highbd_idct4x4_16_add_sse2>, TX_4X4, 16, 8, 2),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment