Commit 4a2e3b2d authored by James Zern's avatar James Zern

remove remaining refs to aom_highbd_idct8x8_64_add

fixes high-bitdepth build:
./libaom.a(aom_dsp_rtcd.c.o): In function `setup_rtcd_internal':
  ./aom_dsp_rtcd.h:2614: undefined reference to
  `aom_highbd_idct8x8_64_add_c'

missed in:
c756e4d0 Cleanup dead high-bitdepth inverse-tx functions

BUG=aomedia:442

Change-Id: I63ee6fc5dbf85fd48efd9ff721868df6fb05eb09
parent aa0c34b0
......@@ -459,9 +459,6 @@ if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") {
add_proto qw/void aom_highbd_idct4x4_16_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/aom_highbd_idct4x4_16_add sse2/;
add_proto qw/void aom_highbd_idct8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/aom_highbd_idct8x8_64_add sse2/;
add_proto qw/void aom_highbd_idct8x8_10_add/, "const tran_low_t *input, uint8_t *dest, int dest_stride, int bd";
specialize qw/aom_highbd_idct8x8_10_add sse2/;
......
......@@ -3685,106 +3685,6 @@ void aom_highbd_idct4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest8,
}
}
void aom_highbd_idct8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest8,
int stride, int bd) {
tran_low_t out[8 * 8];
tran_low_t *outptr = out;
int i, j, test;
__m128i inptr[8];
__m128i min_input, max_input, temp1, temp2, sign_bits;
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
const __m128i zero = _mm_set1_epi16(0);
const __m128i sixteen = _mm_set1_epi16(16);
const __m128i max = _mm_set1_epi16(6201);
const __m128i min = _mm_set1_epi16(-6201);
int optimised_cols = 0;
// Load input into __m128i & pack to 16 bits
for (i = 0; i < 8; i++) {
temp1 = _mm_loadu_si128((const __m128i *)(input + 8 * i));
temp2 = _mm_loadu_si128((const __m128i *)(input + 8 * i + 4));
inptr[i] = _mm_packs_epi32(temp1, temp2);
}
// Find the min & max for the row transform
max_input = _mm_max_epi16(inptr[0], inptr[1]);
min_input = _mm_min_epi16(inptr[0], inptr[1]);
for (i = 2; i < 8; i++) {
max_input = _mm_max_epi16(max_input, inptr[i]);
min_input = _mm_min_epi16(min_input, inptr[i]);
}
max_input = _mm_cmpgt_epi16(max_input, max);
min_input = _mm_cmplt_epi16(min_input, min);
temp1 = _mm_or_si128(max_input, min_input);
test = _mm_movemask_epi8(temp1);
if (!test) {
// Do the row transform
aom_idct8_sse2(inptr);
// Find the min & max for the column transform
max_input = _mm_max_epi16(inptr[0], inptr[1]);
min_input = _mm_min_epi16(inptr[0], inptr[1]);
for (i = 2; i < 8; i++) {
max_input = _mm_max_epi16(max_input, inptr[i]);
min_input = _mm_min_epi16(min_input, inptr[i]);
}
max_input = _mm_cmpgt_epi16(max_input, max);
min_input = _mm_cmplt_epi16(min_input, min);
temp1 = _mm_or_si128(max_input, min_input);
test = _mm_movemask_epi8(temp1);
if (test) {
array_transpose_8x8(inptr, inptr);
for (i = 0; i < 8; i++) {
sign_bits = _mm_cmplt_epi16(inptr[i], zero);
temp1 = _mm_unpackhi_epi16(inptr[i], sign_bits);
temp2 = _mm_unpacklo_epi16(inptr[i], sign_bits);
_mm_storeu_si128((__m128i *)(outptr + 4 * (2 * i + 1)), temp1);
_mm_storeu_si128((__m128i *)(outptr + 4 * (2 * i)), temp2);
}
} else {
// Set to use the optimised transform for the column
optimised_cols = 1;
}
} else {
// Run the un-optimised row transform
for (i = 0; i < 8; ++i) {
aom_highbd_idct8_c(input, outptr, bd);
input += 8;
outptr += 8;
}
}
if (optimised_cols) {
aom_idct8_sse2(inptr);
// Final round & shift and Reconstruction and Store
{
__m128i d[8];
for (i = 0; i < 8; i++) {
inptr[i] = _mm_add_epi16(inptr[i], sixteen);
d[i] = _mm_loadu_si128((const __m128i *)(dest + stride * i));
inptr[i] = _mm_srai_epi16(inptr[i], 5);
d[i] = clamp_high_sse2(_mm_adds_epi16(d[i], inptr[i]), bd);
// Store
_mm_storeu_si128((__m128i *)(dest + stride * i), d[i]);
}
}
} else {
// Run the un-optimised column transform
tran_low_t temp_in[8], temp_out[8];
for (i = 0; i < 8; ++i) {
for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i];
aom_highbd_idct8_c(temp_in, temp_out, bd);
for (j = 0; j < 8; ++j) {
dest[j * stride + i] = highbd_clip_pixel_add(
dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd);
}
}
}
}
void aom_highbd_idct8x8_10_add_sse2(const tran_low_t *input, uint8_t *dest8,
int stride, int bd) {
tran_low_t out[8 * 8] = { 0 };
......
......@@ -111,14 +111,6 @@ void idct8x8_10_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
void idct8x8_10_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
aom_highbd_idct8x8_10_add_sse2(in, out, stride, 12);
}
void idct8x8_64_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
aom_highbd_idct8x8_64_add_sse2(in, out, stride, 10);
}
void idct8x8_64_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
aom_highbd_idct8x8_64_add_sse2(in, out, stride, 12);
}
#endif // HAVE_SSE2
#endif // CONFIG_HIGHBITDEPTH
......@@ -684,18 +676,10 @@ INSTANTIATE_TEST_CASE_P(
#endif // HAVE_SSE2 && !CONFIG_HIGHBITDEPTH
#if HAVE_SSE2 && CONFIG_HIGHBITDEPTH
INSTANTIATE_TEST_CASE_P(
SSE2, FwdTrans8x8DCT,
::testing::Values(make_tuple(&aom_fdct8x8_sse2, &aom_idct8x8_64_add_c, 0,
AOM_BITS_8),
make_tuple(&aom_highbd_fdct8x8_c, &idct8x8_64_add_10_sse2,
12, AOM_BITS_10),
make_tuple(&aom_highbd_fdct8x8_sse2,
&idct8x8_64_add_10_sse2, 12, AOM_BITS_10),
make_tuple(&aom_highbd_fdct8x8_c, &idct8x8_64_add_12_sse2,
12, AOM_BITS_12),
make_tuple(&aom_highbd_fdct8x8_sse2,
&idct8x8_64_add_12_sse2, 12, AOM_BITS_12)));
INSTANTIATE_TEST_CASE_P(SSE2, FwdTrans8x8DCT,
::testing::Values(make_tuple(&aom_fdct8x8_sse2,
&aom_idct8x8_64_add_c, 0,
AOM_BITS_8)));
INSTANTIATE_TEST_CASE_P(
SSE2, FwdTrans8x8HT,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment