Commit 24d565b4 authored by Frederic Barbier's avatar Frederic Barbier Committed by Sebastien Alaiwan

Cleanup dead inv-txfm functions in HBD data path

Cleanup related unit-tests.

Change-Id: Ic756e6bbad80f5b9947ca1cdd55cdef77b985f81
parent c8b38b0b
......@@ -395,18 +395,6 @@ if (aom_config("CONFIG_AV1") eq "yes") {
add_proto qw/void aom_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/aom_iwht4x4_16_add sse2/;
add_proto qw/void aom_highbd_idct4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
add_proto qw/void aom_highbd_idct8x8_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
add_proto qw/void aom_highbd_idct16x16_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
add_proto qw/void aom_highbd_idct32x32_1024_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
add_proto qw/void aom_highbd_idct32x32_34_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
add_proto qw/void aom_highbd_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
add_proto qw/void aom_highbd_iwht4x4_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
add_proto qw/void aom_highbd_iwht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
......@@ -452,9 +440,6 @@ if (aom_config("CONFIG_AV1") eq "yes") {
add_proto qw/void aom_idct32x32_1_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride";
specialize qw/aom_idct32x32_1_add sse2 avx2/;
add_proto qw/void aom_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/aom_highbd_idct4x4_16_add sse2/;
}
if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
} else {
......
......@@ -1404,72 +1404,3 @@ void aom_highbd_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest8,
dest++;
}
}
// TODO(sarahparker) this one still needs to be removed but will be done in
// a followup because of its use in encoder/encodemb.c
void aom_highbd_idct4_c(const tran_low_t *input, tran_low_t *output, int bd) {
tran_low_t step[4];
tran_high_t temp1, temp2;
(void)bd;
// stage 1
temp1 = (input[0] + input[2]) * cospi_16_64;
temp2 = (input[0] - input[2]) * cospi_16_64;
step[0] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
step[1] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64;
temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64;
step[2] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd);
step[3] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd);
// stage 2
output[0] = HIGHBD_WRAPLOW(step[0] + step[3], bd);
output[1] = HIGHBD_WRAPLOW(step[1] + step[2], bd);
output[2] = HIGHBD_WRAPLOW(step[1] - step[2], bd);
output[3] = HIGHBD_WRAPLOW(step[0] - step[3], bd);
}
void aom_highbd_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
int stride, int bd) {
tran_low_t out[4 * 4];
tran_low_t *outptr = out;
int i, j;
tran_low_t temp_in[4], temp_out[4];
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
// Rows
for (i = 0; i < 4; ++i) {
aom_highbd_idct4_c(input, outptr, bd);
input += 4;
outptr += 4;
}
// Columns
for (i = 0; i < 4; ++i) {
for (j = 0; j < 4; ++j) temp_in[j] = out[j * 4 + i];
aom_highbd_idct4_c(temp_in, temp_out, bd);
for (j = 0; j < 4; ++j) {
dest[j * stride + i] = highbd_clip_pixel_add(
dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd);
}
}
}
void aom_highbd_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest8,
int dest_stride, int bd) {
int i;
tran_high_t a1;
tran_low_t out =
HIGHBD_WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), bd);
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
out = HIGHBD_WRAPLOW(dct_const_round_shift(out * cospi_16_64), bd);
a1 = ROUND_POWER_OF_TWO(out, 4);
for (i = 0; i < 4; i++) {
dest[0] = highbd_clip_pixel_add(dest[0], a1, bd);
dest[1] = highbd_clip_pixel_add(dest[1], a1, bd);
dest[2] = highbd_clip_pixel_add(dest[2], a1, bd);
dest[3] = highbd_clip_pixel_add(dest[3], a1, bd);
dest += dest_stride;
}
}
......@@ -3498,131 +3498,3 @@ void idct32_8col(__m128i *in0, __m128i *in1) {
in1[14] = _mm_sub_epi16(stp1_1, stp1_30);
in1[15] = _mm_sub_epi16(stp1_0, stp1_31);
}
static INLINE __m128i clamp_high_sse2(__m128i value, int bd) {
__m128i ubounded, retval;
const __m128i zero = _mm_set1_epi16(0);
const __m128i one = _mm_set1_epi16(1);
const __m128i max = _mm_subs_epi16(_mm_slli_epi16(one, bd), one);
ubounded = _mm_cmpgt_epi16(value, max);
retval = _mm_andnot_si128(ubounded, value);
ubounded = _mm_and_si128(ubounded, max);
retval = _mm_or_si128(retval, ubounded);
retval = _mm_and_si128(retval, _mm_cmpgt_epi16(retval, zero));
return retval;
}
void aom_highbd_idct4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest8,
int stride, int bd) {
tran_low_t out[4 * 4];
tran_low_t *outptr = out;
int i, j;
__m128i inptr[4];
__m128i sign_bits[2];
__m128i temp_mm, min_input, max_input;
int test;
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
int optimised_cols = 0;
const __m128i zero = _mm_set1_epi16(0);
const __m128i eight = _mm_set1_epi16(8);
const __m128i max = _mm_set1_epi16(12043);
const __m128i min = _mm_set1_epi16(-12043);
// Load input into __m128i
inptr[0] = _mm_loadu_si128((const __m128i *)input);
inptr[1] = _mm_loadu_si128((const __m128i *)(input + 4));
inptr[2] = _mm_loadu_si128((const __m128i *)(input + 8));
inptr[3] = _mm_loadu_si128((const __m128i *)(input + 12));
// Pack to 16 bits
inptr[0] = _mm_packs_epi32(inptr[0], inptr[1]);
inptr[1] = _mm_packs_epi32(inptr[2], inptr[3]);
max_input = _mm_max_epi16(inptr[0], inptr[1]);
min_input = _mm_min_epi16(inptr[0], inptr[1]);
max_input = _mm_cmpgt_epi16(max_input, max);
min_input = _mm_cmplt_epi16(min_input, min);
temp_mm = _mm_or_si128(max_input, min_input);
test = _mm_movemask_epi8(temp_mm);
if (!test) {
// Do the row transform
aom_idct4_sse2(inptr);
// Check the min & max values
max_input = _mm_max_epi16(inptr[0], inptr[1]);
min_input = _mm_min_epi16(inptr[0], inptr[1]);
max_input = _mm_cmpgt_epi16(max_input, max);
min_input = _mm_cmplt_epi16(min_input, min);
temp_mm = _mm_or_si128(max_input, min_input);
test = _mm_movemask_epi8(temp_mm);
if (test) {
array_transpose_4x4(inptr);
sign_bits[0] = _mm_cmplt_epi16(inptr[0], zero);
sign_bits[1] = _mm_cmplt_epi16(inptr[1], zero);
inptr[3] = _mm_unpackhi_epi16(inptr[1], sign_bits[1]);
inptr[2] = _mm_unpacklo_epi16(inptr[1], sign_bits[1]);
inptr[1] = _mm_unpackhi_epi16(inptr[0], sign_bits[0]);
inptr[0] = _mm_unpacklo_epi16(inptr[0], sign_bits[0]);
_mm_storeu_si128((__m128i *)outptr, inptr[0]);
_mm_storeu_si128((__m128i *)(outptr + 4), inptr[1]);
_mm_storeu_si128((__m128i *)(outptr + 8), inptr[2]);
_mm_storeu_si128((__m128i *)(outptr + 12), inptr[3]);
} else {
// Set to use the optimised transform for the column
optimised_cols = 1;
}
} else {
// Run the un-optimised row transform
for (i = 0; i < 4; ++i) {
aom_highbd_idct4_c(input, outptr, bd);
input += 4;
outptr += 4;
}
}
if (optimised_cols) {
aom_idct4_sse2(inptr);
// Final round and shift
inptr[0] = _mm_add_epi16(inptr[0], eight);
inptr[1] = _mm_add_epi16(inptr[1], eight);
inptr[0] = _mm_srai_epi16(inptr[0], 4);
inptr[1] = _mm_srai_epi16(inptr[1], 4);
// Reconstruction and Store
{
__m128i d0 = _mm_loadl_epi64((const __m128i *)dest);
__m128i d2 = _mm_loadl_epi64((const __m128i *)(dest + stride * 2));
d0 = _mm_unpacklo_epi64(
d0, _mm_loadl_epi64((const __m128i *)(dest + stride)));
d2 = _mm_unpacklo_epi64(
d2, _mm_loadl_epi64((const __m128i *)(dest + stride * 3)));
d0 = clamp_high_sse2(_mm_adds_epi16(d0, inptr[0]), bd);
d2 = clamp_high_sse2(_mm_adds_epi16(d2, inptr[1]), bd);
// store input0
_mm_storel_epi64((__m128i *)dest, d0);
// store input1
d0 = _mm_srli_si128(d0, 8);
_mm_storel_epi64((__m128i *)(dest + stride), d0);
// store input2
_mm_storel_epi64((__m128i *)(dest + stride * 2), d2);
// store input3
d2 = _mm_srli_si128(d2, 8);
_mm_storel_epi64((__m128i *)(dest + stride * 3), d2);
}
} else {
// Run the un-optimised column transform
tran_low_t temp_in[4], temp_out[4];
// Columns
for (i = 0; i < 4; ++i) {
for (j = 0; j < 4; ++j) temp_in[j] = out[j * 4 + i];
aom_highbd_idct4_c(temp_in, temp_out, bd);
for (j = 0; j < 4; ++j) {
dest[j * stride + i] = highbd_clip_pixel_add(
dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd);
}
}
}
}
......@@ -1456,15 +1456,6 @@ static void inv_txfm_add_64x64(const tran_low_t *input, uint8_t *dest,
}
#endif // CONFIG_TX64X64
// idct
void av1_highbd_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
int eob, int bd) {
if (eob > 1)
aom_highbd_idct4x4_16_add(input, dest, stride, bd);
else
aom_highbd_idct4x4_1_add(input, dest, stride, bd);
}
void av1_highbd_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
int eob, int bd) {
if (eob > 1)
......
......@@ -70,8 +70,6 @@ void av1_inverse_transform_block_facade(MACROBLOCKD *xd, int plane, int block,
void av1_highbd_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
int eob, int bd);
void av1_highbd_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
int eob, int bd);
void av1_highbd_inv_txfm_add_4x4(const tran_low_t *input, uint8_t *dest,
int stride, int eob, int bd, TX_TYPE tx_type,
int lossless);
......
......@@ -63,14 +63,6 @@ void fht4x4_12(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
av1_fwd_txfm2d_4x4_c(in, out, stride, tx_type, 12);
}
void idct4x4_10(const tran_low_t *in, uint8_t *out, int stride) {
aom_highbd_idct4x4_16_add_c(in, out, stride, 10);
}
void idct4x4_12(const tran_low_t *in, uint8_t *out, int stride) {
aom_highbd_idct4x4_16_add_c(in, out, stride, 12);
}
void iht4x4_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
av1_inv_txfm2d_add_4x4_c(in, CONVERT_TO_SHORTPTR(out), stride, tx_type, 10);
}
......@@ -86,16 +78,6 @@ void iwht4x4_10(const tran_low_t *in, uint8_t *out, int stride) {
void iwht4x4_12(const tran_low_t *in, uint8_t *out, int stride) {
aom_highbd_iwht4x4_16_add_c(in, out, stride, 12);
}
#if HAVE_SSE2
void idct4x4_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
aom_highbd_idct4x4_16_add_sse2(in, out, stride, 10);
}
void idct4x4_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
aom_highbd_idct4x4_16_add_sse2(in, out, stride, 12);
}
#endif // HAVE_SSE2
#endif // CONFIG_HIGHBITDEPTH
class Trans4x4DCT : public libaom_test::TransformTestBase,
......@@ -221,19 +203,10 @@ TEST_P(Trans4x4WHT, MemCheck) { RunMemCheck(); }
TEST_P(Trans4x4WHT, InvAccuracyCheck) { RunInvAccuracyCheck(0); }
using std::tr1::make_tuple;
#if CONFIG_HIGHBITDEPTH
INSTANTIATE_TEST_CASE_P(
C, Trans4x4DCT,
::testing::Values(
make_tuple(&aom_highbd_fdct4x4_c, &idct4x4_10, 0, AOM_BITS_10, 16),
make_tuple(&aom_highbd_fdct4x4_c, &idct4x4_12, 0, AOM_BITS_12, 16),
make_tuple(&aom_fdct4x4_c, &aom_idct4x4_16_add_c, 0, AOM_BITS_8, 16)));
#else
INSTANTIATE_TEST_CASE_P(C, Trans4x4DCT,
::testing::Values(make_tuple(&aom_fdct4x4_c,
&aom_idct4x4_16_add_c, 0,
AOM_BITS_8, 16)));
#endif // CONFIG_HIGHBITDEPTH
#if CONFIG_HIGHBITDEPTH
INSTANTIATE_TEST_CASE_P(
......@@ -322,18 +295,6 @@ INSTANTIATE_TEST_CASE_P(
#endif // HAVE_SSE2 && !CONFIG_HIGHBITDEPTH
#if HAVE_SSE2 && CONFIG_HIGHBITDEPTH
INSTANTIATE_TEST_CASE_P(
SSE2, Trans4x4DCT,
::testing::Values(
make_tuple(&aom_highbd_fdct4x4_c, &idct4x4_10_sse2, 0, AOM_BITS_10, 16),
make_tuple(&aom_highbd_fdct4x4_sse2, &idct4x4_10_sse2, 0, AOM_BITS_10,
16),
make_tuple(&aom_highbd_fdct4x4_c, &idct4x4_12_sse2, 0, AOM_BITS_12, 16),
make_tuple(&aom_highbd_fdct4x4_sse2, &idct4x4_12_sse2, 0, AOM_BITS_12,
16),
make_tuple(&aom_fdct4x4_sse2, &aom_idct4x4_16_add_c, 0, AOM_BITS_8,
16)));
INSTANTIATE_TEST_CASE_P(
SSE2, Trans4x4HT,
::testing::Values(
......
......@@ -41,13 +41,6 @@ void wrapper(const tran_low_t *in, uint8_t *out, int stride, int bd) {
fn(in, out, stride);
}
#if CONFIG_HIGHBITDEPTH
template <InvTxfmWithBdFunc fn>
void highbd_wrapper(const tran_low_t *in, uint8_t *out, int stride, int bd) {
fn(in, CONVERT_TO_BYTEPTR(out), stride, bd);
}
#endif
typedef std::tr1::tuple<FwdTxfmFunc, InvTxfmWithBdFunc, InvTxfmWithBdFunc,
TX_SIZE, int, int, int>
PartialInvTxfmParam;
......@@ -285,26 +278,6 @@ TEST_P(PartialIDctTest, DISABLED_Speed) {
using std::tr1::make_tuple;
const PartialInvTxfmParam c_partial_idct_tests[] = {
#if CONFIG_HIGHBITDEPTH
make_tuple(&aom_highbd_fdct4x4_c,
&highbd_wrapper<aom_highbd_idct4x4_16_add_c>,
&highbd_wrapper<aom_highbd_idct4x4_16_add_c>, TX_4X4, 16, 8, 2),
make_tuple(&aom_highbd_fdct4x4_c,
&highbd_wrapper<aom_highbd_idct4x4_16_add_c>,
&highbd_wrapper<aom_highbd_idct4x4_16_add_c>, TX_4X4, 16, 10, 2),
make_tuple(&aom_highbd_fdct4x4_c,
&highbd_wrapper<aom_highbd_idct4x4_16_add_c>,
&highbd_wrapper<aom_highbd_idct4x4_16_add_c>, TX_4X4, 16, 12, 2),
make_tuple(&aom_highbd_fdct4x4_c,
&highbd_wrapper<aom_highbd_idct4x4_16_add_c>,
&highbd_wrapper<aom_highbd_idct4x4_1_add_c>, TX_4X4, 1, 8, 2),
make_tuple(&aom_highbd_fdct4x4_c,
&highbd_wrapper<aom_highbd_idct4x4_16_add_c>,
&highbd_wrapper<aom_highbd_idct4x4_1_add_c>, TX_4X4, 1, 10, 2),
make_tuple(&aom_highbd_fdct4x4_c,
&highbd_wrapper<aom_highbd_idct4x4_16_add_c>,
&highbd_wrapper<aom_highbd_idct4x4_1_add_c>, TX_4X4, 1, 12, 2),
#endif // CONFIG_HIGHBITDEPTH
make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>,
&wrapper<aom_idct32x32_1024_add_c>, TX_32X32, 1024, 8, 1),
make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>,
......@@ -358,17 +331,6 @@ INSTANTIATE_TEST_CASE_P(NEON, PartialIDctTest,
#if HAVE_SSE2
const PartialInvTxfmParam sse2_partial_idct_tests[] = {
#if CONFIG_HIGHBITDEPTH
make_tuple(&aom_highbd_fdct4x4_c,
&highbd_wrapper<aom_highbd_idct4x4_16_add_c>,
&highbd_wrapper<aom_highbd_idct4x4_16_add_sse2>, TX_4X4, 16, 8, 2),
make_tuple(
&aom_highbd_fdct4x4_c, &highbd_wrapper<aom_highbd_idct4x4_16_add_c>,
&highbd_wrapper<aom_highbd_idct4x4_16_add_sse2>, TX_4X4, 16, 10, 2),
make_tuple(
&aom_highbd_fdct4x4_c, &highbd_wrapper<aom_highbd_idct4x4_16_add_c>,
&highbd_wrapper<aom_highbd_idct4x4_16_add_sse2>, TX_4X4, 16, 12, 2),
#endif // CONFIG_HIGHBITDEPTH
make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>,
&wrapper<aom_idct32x32_1024_add_sse2>, TX_32X32, 1024, 8, 1),
make_tuple(&aom_fdct32x32_c, &wrapper<aom_idct32x32_1024_add_c>,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment