Commit 34c06df4 authored by Frank Galligan's avatar Frank Galligan Committed by Yaowu Xu

Revert "Avoid use of deprecated high-bitdepth functions"

This reverts commit f9d77bd5.

Reason for revert: 8x8 transform failures

BUG=https://bugs.chromium.org/p/aomedia/issues/detail?id=502

Change-Id: I2f6c10bc576a966bd5a878b7ee8389074bf45014
parent 1b6e3948
This diff is collapsed.
......@@ -3628,4 +3628,107 @@ void aom_highbd_idct4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest8,
}
}
void aom_highbd_idct8x8_10_add_sse2(const tran_low_t *input, uint8_t *dest8,
int stride, int bd) {
tran_low_t out[8 * 8] = { 0 };
tran_low_t *outptr = out;
int i, j, test;
__m128i inptr[8];
__m128i min_input, max_input, temp1, temp2, sign_bits;
uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
const __m128i zero = _mm_set1_epi16(0);
const __m128i sixteen = _mm_set1_epi16(16);
const __m128i max = _mm_set1_epi16(6201);
const __m128i min = _mm_set1_epi16(-6201);
int optimised_cols = 0;
// Load input into __m128i & pack to 16 bits
for (i = 0; i < 8; i++) {
temp1 = _mm_loadu_si128((const __m128i *)(input + 8 * i));
temp2 = _mm_loadu_si128((const __m128i *)(input + 8 * i + 4));
inptr[i] = _mm_packs_epi32(temp1, temp2);
}
// Find the min & max for the row transform
// only first 4 row has non-zero coefs
max_input = _mm_max_epi16(inptr[0], inptr[1]);
min_input = _mm_min_epi16(inptr[0], inptr[1]);
for (i = 2; i < 4; i++) {
max_input = _mm_max_epi16(max_input, inptr[i]);
min_input = _mm_min_epi16(min_input, inptr[i]);
}
max_input = _mm_cmpgt_epi16(max_input, max);
min_input = _mm_cmplt_epi16(min_input, min);
temp1 = _mm_or_si128(max_input, min_input);
test = _mm_movemask_epi8(temp1);
if (!test) {
// Do the row transform
aom_idct8_sse2(inptr);
// Find the min & max for the column transform
// N.B. Only first 4 cols contain non-zero coeffs
max_input = _mm_max_epi16(inptr[0], inptr[1]);
min_input = _mm_min_epi16(inptr[0], inptr[1]);
for (i = 2; i < 8; i++) {
max_input = _mm_max_epi16(max_input, inptr[i]);
min_input = _mm_min_epi16(min_input, inptr[i]);
}
max_input = _mm_cmpgt_epi16(max_input, max);
min_input = _mm_cmplt_epi16(min_input, min);
temp1 = _mm_or_si128(max_input, min_input);
test = _mm_movemask_epi8(temp1);
if (test) {
// Use fact only first 4 rows contain non-zero coeffs
array_transpose_4X8(inptr, inptr);
for (i = 0; i < 4; i++) {
sign_bits = _mm_cmplt_epi16(inptr[i], zero);
temp1 = _mm_unpackhi_epi16(inptr[i], sign_bits);
temp2 = _mm_unpacklo_epi16(inptr[i], sign_bits);
_mm_storeu_si128((__m128i *)(outptr + 4 * (2 * i + 1)), temp1);
_mm_storeu_si128((__m128i *)(outptr + 4 * (2 * i)), temp2);
}
} else {
// Set to use the optimised transform for the column
optimised_cols = 1;
}
} else {
// Run the un-optimised row transform
for (i = 0; i < 4; ++i) {
aom_highbd_idct8_c(input, outptr, bd);
input += 8;
outptr += 8;
}
}
if (optimised_cols) {
aom_idct8_sse2(inptr);
// Final round & shift and Reconstruction and Store
{
__m128i d[8];
for (i = 0; i < 8; i++) {
inptr[i] = _mm_add_epi16(inptr[i], sixteen);
d[i] = _mm_loadu_si128((const __m128i *)(dest + stride * i));
inptr[i] = _mm_srai_epi16(inptr[i], 5);
d[i] = clamp_high_sse2(_mm_adds_epi16(d[i], inptr[i]), bd);
// Store
_mm_storeu_si128((__m128i *)(dest + stride * i), d[i]);
}
}
} else {
// Run the un-optimised column transform
tran_low_t temp_in[8], temp_out[8];
for (i = 0; i < 8; ++i) {
for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i];
aom_highbd_idct8_c(temp_in, temp_out, bd);
for (j = 0; j < 8; ++j) {
dest[j * stride + i] = highbd_clip_pixel_add(
dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd);
}
}
}
}
#endif // CONFIG_HIGHBITDEPTH
......@@ -30,115 +30,64 @@ static INLINE TxfmFunc inv_txfm_type_to_func(TXFM_TYPE txfm_type) {
}
#if CONFIG_EXT_TX
const TXFM_2D_CFG *inv_txfm_cfg_ls[TX_TYPES][TX_SIZES] = {
// DCT_DCT
static const TXFM_2D_CFG *inv_txfm_cfg_ls[FLIPADST_ADST + 1][TX_SIZES] = {
{
#if CONFIG_CB4X4
NULL,
#endif
&inv_txfm_2d_cfg_dct_dct_4, &inv_txfm_2d_cfg_dct_dct_8,
&inv_txfm_2d_cfg_dct_dct_16, &inv_txfm_2d_cfg_dct_dct_32 },
// ADST_DCT
{
#if CONFIG_CB4X4
NULL,
#endif
&inv_txfm_2d_cfg_adst_dct_4, &inv_txfm_2d_cfg_adst_dct_8,
&inv_txfm_2d_cfg_adst_dct_16, &inv_txfm_2d_cfg_adst_dct_32 },
// DCT_ADST
{
#if CONFIG_CB4X4
NULL,
#endif
&inv_txfm_2d_cfg_dct_adst_4, &inv_txfm_2d_cfg_dct_adst_8,
&inv_txfm_2d_cfg_dct_adst_16, &inv_txfm_2d_cfg_dct_adst_32 },
// ADST_ADST
{
#if CONFIG_CB4X4
NULL,
#endif
&inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8,
&inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32 },
// FLIPADST_DCT
{
#if CONFIG_CB4X4
NULL,
#endif
&inv_txfm_2d_cfg_adst_dct_4, &inv_txfm_2d_cfg_adst_dct_8,
&inv_txfm_2d_cfg_adst_dct_16, &inv_txfm_2d_cfg_adst_dct_32 },
// DCT_FLIPADST
{
#if CONFIG_CB4X4
NULL,
#endif
&inv_txfm_2d_cfg_dct_adst_4, &inv_txfm_2d_cfg_dct_adst_8,
&inv_txfm_2d_cfg_dct_adst_16, &inv_txfm_2d_cfg_dct_adst_32 },
// FLIPADST_FLIPADST
{
#if CONFIG_CB4X4
NULL,
#endif
&inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8,
&inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32 },
// ADST_FLIPADST
{
#if CONFIG_CB4X4
NULL,
#endif
&inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8,
&inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32 },
// FLIPADST_ADST
{
#if CONFIG_CB4X4
NULL,
#endif
&inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8,
&inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32 },
{ // IDTX
#if CONFIG_CB4X4
NULL,
#endif
&inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8,
&inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32 },
{ // V_DCT
#if CONFIG_CB4X4
NULL,
#endif
&inv_txfm_2d_cfg_dct_adst_4, &inv_txfm_2d_cfg_dct_adst_8,
&inv_txfm_2d_cfg_dct_adst_16, &inv_txfm_2d_cfg_dct_adst_32 },
{ // H_DCT
#if CONFIG_CB4X4
NULL,
#endif
&inv_txfm_2d_cfg_adst_dct_4, &inv_txfm_2d_cfg_adst_dct_8,
&inv_txfm_2d_cfg_adst_dct_16, &inv_txfm_2d_cfg_adst_dct_32 },
{ // V_ADST
#if CONFIG_CB4X4
NULL,
#endif
&inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8,
&inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32 },
{ // H_ADST
#if CONFIG_CB4X4
NULL,
#endif
&inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8,
&inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32 },
{ // V_FLIP_ADST
#if CONFIG_CB4X4
NULL,
#endif
&inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8,
&inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32 },
{ // H_FLIP_ADST
#if CONFIG_CB4X4
NULL,
#endif
&inv_txfm_2d_cfg_adst_adst_4, &inv_txfm_2d_cfg_adst_adst_8,
&inv_txfm_2d_cfg_adst_adst_16, &inv_txfm_2d_cfg_adst_adst_32 },
};
#else
const TXFM_2D_CFG *inv_txfm_cfg_ls[TX_TYPES][TX_SIZES] = {
static const TXFM_2D_CFG *inv_txfm_cfg_ls[TX_TYPES][TX_SIZES] = {
{
#if CONFIG_CB4X4
NULL,
......
......@@ -442,6 +442,4 @@ static const TXFM_2D_CFG inv_txfm_2d_cfg_adst_dct_32 = {
TXFM_TYPE_DCT32
}; // .txfm_type_row
extern const TXFM_2D_CFG *inv_txfm_cfg_ls[TX_TYPES][TX_SIZES];
#endif // AV1_INV_TXFM2D_CFG_H_
This diff is collapsed.
......@@ -46,9 +46,7 @@ typedef struct {
} transform_2d;
#if CONFIG_HIGHBITDEPTH
typedef void (*highbd_transform_1d)(const tran_low_t *, tran_low_t *,
const int8_t *cos_bit,
const int8_t *stage_range, int bd);
typedef void (*highbd_transform_1d)(const tran_low_t *, tran_low_t *, int bd);
typedef struct {
highbd_transform_1d cols, rows; // vertical and horizontal
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment