Commit 06250276 authored by Angie Chiang's avatar Angie Chiang

Tune the inv_shift

Let the second stage of 10 bit inv txfms fit within 16 bits

Change-Id: Ia087d65484cd410651190dcd9d3292cce6594d34
parent a8b45c37
......@@ -246,14 +246,14 @@ static const TXFM_1D_CFG *inv_txfm_row_cfg_ls[TX_TYPES_1D][TX_SIZES_ALL] = {
};
static const int8_t inv_shift_4x4[2] = { 0, -4 };
static const int8_t inv_shift_8x8[2] = { 0, -5 };
static const int8_t inv_shift_16x16[2] = { -1, -5 };
static const int8_t inv_shift_32x32[2] = { -1, -5 };
static const int8_t inv_shift_8x8[2] = { -1, -4 };
static const int8_t inv_shift_16x16[2] = { -2, -4 };
static const int8_t inv_shift_32x32[2] = { -2, -4 };
#if CONFIG_TX64X64
static const int8_t inv_shift_64x64[2] = { -1, -5 };
static const int8_t inv_shift_64x64[2] = { -2, -4 };
#endif
static const int8_t inv_shift_4x8[2] = { 0, -4 };
static const int8_t inv_shift_8x4[2] = { 0, -4 };
static const int8_t inv_shift_4x8[2] = { -1, -3 };
static const int8_t inv_shift_8x4[2] = { -1, -3 };
static const int8_t inv_shift_8x16[2] = { -1, -4 };
static const int8_t inv_shift_16x8[2] = { -1, -4 };
static const int8_t inv_shift_16x32[2] = { -1, -4 };
......@@ -264,11 +264,11 @@ static const int8_t inv_shift_64x32[2] = { -1, -4 };
#endif
static const int8_t inv_shift_4x16[2] = { -1, -4 };
static const int8_t inv_shift_16x4[2] = { -1, -4 };
static const int8_t inv_shift_8x32[2] = { -1, -5 };
static const int8_t inv_shift_32x8[2] = { -1, -5 };
static const int8_t inv_shift_8x32[2] = { -2, -4 };
static const int8_t inv_shift_32x8[2] = { -2, -4 };
#if CONFIG_TX64X64
static const int8_t inv_shift_16x64[2] = { -1, -5 };
static const int8_t inv_shift_64x16[2] = { -1, -5 };
static const int8_t inv_shift_16x64[2] = { -2, -4 };
static const int8_t inv_shift_64x16[2] = { -2, -4 };
#endif // CONFIG_TX64X64
const int8_t *inv_txfm_shift_ls[TX_SIZES_ALL] = {
......
......@@ -831,6 +831,7 @@ void av1_inv_txfm2d_add_8x8_sse4_1(const int32_t *coeff, uint16_t *output,
transpose_8x8(in, out);
idct8x8_sse4_1(out, in, row_cfg->cos_bit[2]);
transpose_8x8(in, out);
round_shift_8x8(out, -shift[0]);
idct8x8_sse4_1(out, in, col_cfg->cos_bit[2]);
write_buffer_8x8(in, output, stride, 0, 0, -shift[1], bd);
break;
......@@ -841,6 +842,7 @@ void av1_inv_txfm2d_add_8x8_sse4_1(const int32_t *coeff, uint16_t *output,
transpose_8x8(in, out);
iadst8x8_sse4_1(out, in, row_cfg->cos_bit[2]);
transpose_8x8(in, out);
round_shift_8x8(out, -shift[0]);
idct8x8_sse4_1(out, in, col_cfg->cos_bit[2]);
write_buffer_8x8(in, output, stride, 0, 0, -shift[1], bd);
break;
......@@ -851,6 +853,7 @@ void av1_inv_txfm2d_add_8x8_sse4_1(const int32_t *coeff, uint16_t *output,
transpose_8x8(in, out);
idct8x8_sse4_1(out, in, row_cfg->cos_bit[2]);
transpose_8x8(in, out);
round_shift_8x8(out, -shift[0]);
iadst8x8_sse4_1(out, in, col_cfg->cos_bit[2]);
write_buffer_8x8(in, output, stride, 0, 0, -shift[1], bd);
break;
......@@ -861,6 +864,7 @@ void av1_inv_txfm2d_add_8x8_sse4_1(const int32_t *coeff, uint16_t *output,
transpose_8x8(in, out);
iadst8x8_sse4_1(out, in, row_cfg->cos_bit[2]);
transpose_8x8(in, out);
round_shift_8x8(out, -shift[0]);
iadst8x8_sse4_1(out, in, col_cfg->cos_bit[2]);
write_buffer_8x8(in, output, stride, 0, 0, -shift[1], bd);
break;
......@@ -871,6 +875,7 @@ void av1_inv_txfm2d_add_8x8_sse4_1(const int32_t *coeff, uint16_t *output,
transpose_8x8(in, out);
idct8x8_sse4_1(out, in, row_cfg->cos_bit[2]);
transpose_8x8(in, out);
round_shift_8x8(out, -shift[0]);
iadst8x8_sse4_1(out, in, col_cfg->cos_bit[2]);
write_buffer_8x8(in, output, stride, 0, 1, -shift[1], bd);
break;
......@@ -881,6 +886,7 @@ void av1_inv_txfm2d_add_8x8_sse4_1(const int32_t *coeff, uint16_t *output,
transpose_8x8(in, out);
iadst8x8_sse4_1(out, in, row_cfg->cos_bit[2]);
transpose_8x8(in, out);
round_shift_8x8(out, -shift[0]);
idct8x8_sse4_1(out, in, col_cfg->cos_bit[2]);
write_buffer_8x8(in, output, stride, 1, 0, -shift[1], bd);
break;
......@@ -891,6 +897,7 @@ void av1_inv_txfm2d_add_8x8_sse4_1(const int32_t *coeff, uint16_t *output,
transpose_8x8(in, out);
iadst8x8_sse4_1(out, in, row_cfg->cos_bit[2]);
transpose_8x8(in, out);
round_shift_8x8(out, -shift[0]);
iadst8x8_sse4_1(out, in, col_cfg->cos_bit[2]);
write_buffer_8x8(in, output, stride, 1, 0, -shift[1], bd);
break;
......@@ -901,6 +908,7 @@ void av1_inv_txfm2d_add_8x8_sse4_1(const int32_t *coeff, uint16_t *output,
transpose_8x8(in, out);
iadst8x8_sse4_1(out, in, row_cfg->cos_bit[2]);
transpose_8x8(in, out);
round_shift_8x8(out, -shift[0]);
iadst8x8_sse4_1(out, in, col_cfg->cos_bit[2]);
write_buffer_8x8(in, output, stride, 1, 1, -shift[1], bd);
break;
......@@ -911,6 +919,7 @@ void av1_inv_txfm2d_add_8x8_sse4_1(const int32_t *coeff, uint16_t *output,
transpose_8x8(in, out);
iadst8x8_sse4_1(out, in, row_cfg->cos_bit[2]);
transpose_8x8(in, out);
round_shift_8x8(out, -shift[0]);
iadst8x8_sse4_1(out, in, col_cfg->cos_bit[2]);
write_buffer_8x8(in, output, stride, 0, 1, -shift[1], bd);
break;
......
......@@ -145,19 +145,19 @@ vector<AV1InvTxfm2dParam> GetInvTxfm2dParamList() {
for (int t = 0; t < TX_TYPES; ++t) {
const TX_TYPE tx_type = static_cast<TX_TYPE>(t);
param_list.push_back(AV1InvTxfm2dParam(tx_type, TX_4X4, 2, 0.002));
param_list.push_back(AV1InvTxfm2dParam(tx_type, TX_8X8, 2, 0.025));
param_list.push_back(AV1InvTxfm2dParam(tx_type, TX_8X8, 2, 0.05));
param_list.push_back(AV1InvTxfm2dParam(tx_type, TX_16X16, 2, 0.04));
param_list.push_back(AV1InvTxfm2dParam(tx_type, TX_32X32, 4, 0.4));
#if CONFIG_TX64X64
if (tx_type == DCT_DCT) { // Other types not supported by these tx sizes.
param_list.push_back(AV1InvTxfm2dParam(tx_type, TX_64X64, 3, 0.2));
param_list.push_back(AV1InvTxfm2dParam(tx_type, TX_64X64, 3, 0.3));
}
#endif // CONFIG_TX64X64
param_list.push_back(AV1InvTxfm2dParam(tx_type, TX_4X8, 2, 0.016));
param_list.push_back(AV1InvTxfm2dParam(tx_type, TX_8X4, 2, 0.045));
param_list.push_back(AV1InvTxfm2dParam(tx_type, TX_8X16, 2, 0.2));
param_list.push_back(AV1InvTxfm2dParam(tx_type, TX_16X8, 2, 0.2));
param_list.push_back(AV1InvTxfm2dParam(tx_type, TX_4X8, 2, 0.09));
param_list.push_back(AV1InvTxfm2dParam(tx_type, TX_8X4, 2, 0.11));
param_list.push_back(AV1InvTxfm2dParam(tx_type, TX_8X16, 2, 0.03));
param_list.push_back(AV1InvTxfm2dParam(tx_type, TX_16X8, 2, 0.06));
param_list.push_back(AV1InvTxfm2dParam(tx_type, TX_16X32, 3, 0.4));
param_list.push_back(AV1InvTxfm2dParam(tx_type, TX_32X16, 3, 0.5));
......
......@@ -244,9 +244,9 @@ class FwdTrans8x8TestBase {
<< "Error: 8x8 FDCT/IDCT or FHT/IHT has an individual"
<< " roundtrip error > 1";
EXPECT_GE((count_test_block << 2 * (bit_depth_ - 8)) / 5, total_error)
EXPECT_GE((count_test_block << 2 * (bit_depth_ - 8)) / 4, total_error)
<< "Error: 8x8 FDCT/IDCT or FHT/IHT has average roundtrip "
<< "error > 1/5 per block";
<< "error > 1/4 per block";
}
void RunExtremalCheck() {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment