Commit 7580f25e authored by Yue Chen's avatar Yue Chen

Account for sqrt(2) in range computation of 1:2 tx

The cos bits and 2d tx algorithms are also adjusted accordingly to
meet the 32-bit limit.

Change-Id: I9048f3d3689ff1ef1bb84888ed9f43cdc4371411
parent da139a81
......@@ -261,21 +261,25 @@ void av1_gen_inv_stage_range(int8_t *stage_range_col, int8_t *stage_range_row,
const int txfm_size_row = cfg->col_cfg->txfm_size;
const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row);
if (txfm_size_col == txfm_size_row) assert(rect_type == 0);
int rect_type2_shift = 0;
if (rect_type == 2 || rect_type == -2) {
const int txfm_size_max = AOMMAX(txfm_size_col, txfm_size_row);
// For 16x4 / 4x16 shift 1 bit, for 32x8 / 8x32 / 64x16 / 16x64 no need
// for any additional shift.
rect_type2_shift = (txfm_size_max == 16 ? 1 : 0);
}
int rect_type_shift = 0;
// Take the shift from the larger dimension in the rectangular case.
const int8_t *shift = (txfm_size_col > txfm_size_row) ? cfg->row_cfg->shift
: cfg->col_cfg->shift;
int shift1 = shift[1];
while (rect_type2_shift > 0 && shift1 < 0) {
if (rect_type == 1 || rect_type == -1) {
rect_type_shift = 1;
} else if (rect_type == 2 || rect_type == -2) {
const int txfm_size_max = AOMMAX(txfm_size_col, txfm_size_row);
// For 16x4 / 4x16 shift 1 bit, for 32x8 / 8x32 / 64x16 / 16x64 no need
// for any additional shift.
rect_type_shift = (txfm_size_max == 16 ? 1 : 0);
}
while (rect_type_shift > 0 && shift1 < 0) {
shift1++;
rect_type2_shift--;
rect_type_shift--;
}
// i < MAX_TXFM_STAGE_NUM will mute above array bounds warning
for (int i = 0; i < cfg->row_cfg->stage_num && i < MAX_TXFM_STAGE_NUM; ++i) {
stage_range_row[i] = cfg->row_cfg->stage_range[i] + fwd_shift + bd + 1;
......@@ -283,7 +287,7 @@ void av1_gen_inv_stage_range(int8_t *stage_range_col, int8_t *stage_range_row,
// i < MAX_TXFM_STAGE_NUM will mute above array bounds warning
for (int i = 0; i < cfg->col_cfg->stage_num && i < MAX_TXFM_STAGE_NUM; ++i) {
stage_range_col[i] = cfg->col_cfg->stage_range[i] + fwd_shift + shift[0] +
bd + 1 + rect_type2_shift;
bd + 1 + rect_type_shift;
}
}
......@@ -304,8 +308,15 @@ static INLINE void inv_txfm2d_add_c(const int32_t *input, uint16_t *output,
: cfg->col_cfg->shift;
const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row);
int rect_type2_shift = 0;
int rect_type1_shift = 0;
int shift1 = shift[1];
if (rect_type == 2 || rect_type == -2) {
if (rect_type == 1 || rect_type == -1) {
rect_type1_shift = 1;
while (rect_type1_shift > 0 && shift1 < 0) {
shift1++;
rect_type1_shift--;
}
} else if (rect_type == 2 || rect_type == -2) {
const int txfm_size_max = AOMMAX(txfm_size_col, txfm_size_row);
// For 16x4 / 4x16 shift 1 bit, for 32x8 / 8x32 / 64x16 / 16x64 no need
// for any additional shift.
......@@ -341,7 +352,7 @@ static INLINE void inv_txfm2d_add_c(const int32_t *input, uint16_t *output,
// Multiply everything by Sqrt2 if the transform is rectangular with
// log ratio being 1 or -1, if the log ratio is 2 or -2, multiply by
// 2^rect_type2_shift.
if (abs(rect_type) == 1) {
if (rect_type1_shift == 1) {
for (c = 0; c < txfm_size_col; ++c)
buf_ptr[c] = (int32_t)dct_const_round_shift(buf_ptr[c] * Sqrt2);
} else if (rect_type2_shift) {
......@@ -362,6 +373,11 @@ static INLINE void inv_txfm2d_add_c(const int32_t *input, uint16_t *output,
temp_in[r] = buf[r * txfm_size_col + (txfm_size_col - c - 1)];
}
txfm_func_col(temp_in, temp_out, cos_bit_col, stage_range_col);
if (abs(rect_type) == 1 && rect_type1_shift == 0) {
for (r = 0; r < txfm_size_row; ++r) {
temp_out[r] = (int32_t)dct_const_round_shift(temp_out[r] * InvSqrt2);
}
}
av1_round_shift_array(temp_out, txfm_size_row, -shift1);
if (cfg->ud_flip == 0) {
for (r = 0; r < txfm_size_row; ++r) {
......
......@@ -134,8 +134,8 @@ static const int8_t fwd_stage_range_row_dct_4x8[4] =
static const int8_t fwd_stage_range_row_adst_4x8[6] =
ARRAYOFFSET6(3, 0, 0, 1, 2, 2, 2);
static const int8_t fwd_stage_range_row_idx_4x8[1] = { 4 };
static const int8_t fwd_cos_bit_row_dct_4x8[6] = { 13, 13, 13, 13 };
static const int8_t fwd_cos_bit_row_adst_4x8[6] = { 13, 13, 13, 13, 13, 13 };
static const int8_t fwd_cos_bit_row_dct_4x8[6] = { 13, 12, 12, 12 };
static const int8_t fwd_cos_bit_row_adst_4x8[6] = { 13, 13, 12, 12, 12, 12 };
// ---------------- 8x4 1D constants -----------------------
#define fwd_shift_8x4 fwd_shift_8
......@@ -144,9 +144,9 @@ static const int8_t fwd_stage_range_row_dct_8x4[6] =
static const int8_t fwd_stage_range_row_adst_8x4[8] =
ARRAYOFFSET8(2, 0, 0, 1, 2, 2, 3, 3, 3);
static const int8_t fwd_stage_range_row_idx_8x4[1] = { 3 };
static const int8_t fwd_cos_bit_row_dct_8x4[6] = { 13, 13, 13, 13, 13, 13 };
static const int8_t fwd_cos_bit_row_dct_8x4[6] = { 13, 13, 12, 12, 12, 12 };
static const int8_t fwd_cos_bit_row_adst_8x4[8] = { 13, 13, 13, 13,
13, 13, 13, 13 };
12, 12, 12, 12 };
// ---------------- 8x16 1D constants -----------------------
#define fwd_shift_8x16 fwd_shift_16
......@@ -155,9 +155,9 @@ static const int8_t fwd_stage_range_row_dct_8x16[6] =
static const int8_t fwd_stage_range_row_adst_8x16[8] =
ARRAYOFFSET8(4, 0, 0, 1, 2, 2, 3, 3, 3);
static const int8_t fwd_stage_range_row_idx_8x16[1] = { 5 };
static const int8_t fwd_cos_bit_row_dct_8x16[6] = { 13, 13, 12, 12, 12, 12 };
static const int8_t fwd_cos_bit_row_adst_8x16[8] = { 13, 13, 13, 13,
12, 12, 12, 12 };
static const int8_t fwd_cos_bit_row_dct_8x16[6] = { 12, 12, 11, 11, 11, 11 };
static const int8_t fwd_cos_bit_row_adst_8x16[8] = { 12, 12, 12, 12,
11, 11, 11, 11 };
// ---------------- 16x8 1D constants -----------------------
#define fwd_shift_16x8 fwd_shift_16
......@@ -166,10 +166,10 @@ static const int8_t fwd_stage_range_row_dct_16x8[8] =
static const int8_t fwd_stage_range_row_adst_16x8[10] =
ARRAYOFFSET10(3, 0, 0, 1, 2, 2, 3, 3, 4, 4, 4);
static const int8_t fwd_stage_range_row_idx_16x8[1] = { 5 };
static const int8_t fwd_cos_bit_row_dct_16x8[8] = { 13, 13, 13, 12,
12, 12, 12, 12 };
static const int8_t fwd_cos_bit_row_adst_16x8[10] = { 13, 13, 13, 13, 12,
12, 12, 12, 12, 12 };
static const int8_t fwd_cos_bit_row_dct_16x8[8] = { 12, 12, 12, 11,
11, 11, 11, 11 };
static const int8_t fwd_cos_bit_row_adst_16x8[10] = { 12, 12, 12, 12, 12,
12, 11, 11, 11, 11 };
// ---------------- 16x32 1D constants -----------------------
#define fwd_shift_16x32 fwd_shift_32
......@@ -178,10 +178,10 @@ static const int8_t fwd_stage_range_row_dct_16x32[8] =
static const int8_t fwd_stage_range_row_adst_16x32[10] =
ARRAYOFFSET10(5, 0, 0, 1, 2, 2, 3, 3, 4, 4, 4);
static const int8_t fwd_stage_range_row_idx_16x32[1] = { 7 };
static const int8_t fwd_cos_bit_row_dct_16x32[8] = { 13, 13, 13, 12,
12, 12, 12, 12 };
static const int8_t fwd_cos_bit_row_dct_16x32[8] = { 12, 12, 12, 11,
11, 11, 11, 11 };
static const int8_t fwd_cos_bit_row_adst_16x32[10] = { 12, 12, 12, 12, 12,
12, 12, 12, 12, 12 };
12, 11, 11, 11, 11 };
// ---------------- 32x16 1D constants -----------------------
#define fwd_shift_32x16 fwd_shift_32
......@@ -190,10 +190,10 @@ static const int8_t fwd_stage_range_row_dct_32x16[10] =
static const int8_t fwd_stage_range_row_adst_32x16[12] =
ARRAYOFFSET12(4, 0, 0, 1, 2, 2, 3, 3, 4, 4, 5, 5, 5);
static const int8_t fwd_stage_range_row_idx_32x16[1] = { 6 };
static const int8_t fwd_cos_bit_row_dct_32x16[10] = { 12, 12, 12, 12, 12,
12, 12, 12, 12, 12 };
static const int8_t fwd_cos_bit_row_dct_32x16[10] = { 12, 12, 12, 12, 11,
11, 11, 11, 11, 11 };
static const int8_t fwd_cos_bit_row_adst_32x16[12] = { 12, 12, 12, 12, 12, 12,
12, 12, 12, 12, 12, 12 };
12, 12, 11, 11, 11, 11 };
// ---------------- 32x64 1D constants -----------------------
#define fwd_shift_32x64 fwd_shift_64
......
......@@ -64,22 +64,28 @@ void av1_gen_fwd_stage_range(int8_t *stage_range_col, int8_t *stage_range_row,
}
const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row);
int rect_shift = 0;
int rect_type_shift = 0;
int shift2 = shift[2];
if (rect_type == 2 || rect_type == -2) {
if (rect_type == 1 || rect_type == -1) {
rect_type_shift = 1;
} else if (rect_type == 2 || rect_type == -2) {
const int txfm_size_max = AOMMAX(txfm_size_col, txfm_size_row);
// For 64x16 / 16x64 / 32x8 / 8x32 shift 2 bits, and
// For 16x4 / 4x16 shift by 1 bit.
rect_shift = (txfm_size_max >= 32) ? 2 : 1;
rect_type_shift = (txfm_size_max >= 32) ? 2 : 1;
}
while (rect_shift > 0 && shift2 < 0) {
while (rect_type_shift > 0 && shift2 < 0) {
shift2++;
rect_shift--;
rect_type_shift--;
}
// i < MAX_TXFM_STAGE_NUM will mute above array bounds warning
for (int i = 0; i < cfg->row_cfg->stage_num && i < MAX_TXFM_STAGE_NUM; ++i) {
stage_range_row[i] = cfg->row_cfg->stage_range[i] + shift[0] + shift[1] +
bd + 1 + rect_shift;
bd + 1 + rect_type_shift;
}
}
......@@ -100,8 +106,15 @@ static INLINE void fwd_txfm2d_c(const int16_t *input, int32_t *output,
: cfg->col_cfg->shift;
int shift2 = shift[2];
const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row);
int rect_type1_shift = 0;
int rect_type2_shift = 0;
if (rect_type == 2 || rect_type == -2) {
if (rect_type == 1 || rect_type == -1) {
rect_type1_shift = 1;
while (rect_type1_shift > 0 && shift2 < 0) {
shift2++;
rect_type1_shift--;
}
} else if (rect_type == 2 || rect_type == -2) {
const int txfm_size_max = AOMMAX(txfm_size_col, txfm_size_row);
// For 64x16 / 16x64 / 32x8 / 8x32 shift 2 bits, and
// For 16x4 / 4x16 shift by 1 bit.
......@@ -141,10 +154,10 @@ static INLINE void fwd_txfm2d_c(const int16_t *input, int32_t *output,
// transform is rectangular and the size difference is a factor of 2.
// If the size difference is a factor of 4, multiply by
// 2^rect_type_2_extra_shift.
if (rect_type == 1) {
if (abs(rect_type) == 1 && rect_type1_shift == 1) {
for (r = 0; r < txfm_size_row; ++r)
temp_out[r] = (int32_t)fdct_round_shift(temp_out[r] * Sqrt2);
} else if (rect_type == 2) {
} else if (abs(rect_type) == 2) {
av1_round_shift_array(temp_out, txfm_size_row, -rect_type2_shift);
}
av1_round_shift_array(temp_out, txfm_size_row, -shift[1]);
......@@ -160,20 +173,13 @@ static INLINE void fwd_txfm2d_c(const int16_t *input, int32_t *output,
// Rows
for (r = 0; r < txfm_size_row; ++r) {
// Multiply everything by Sqrt2 on the larger dimension if the
// transform is rectangular and the size difference is a factor of 2.
// If the size difference is a factor of 4, multiply by 2.
if (rect_type == -1) {
for (c = 0; c < txfm_size_col; ++c)
buf[r * txfm_size_col + c] =
(int32_t)fdct_round_shift(buf[r * txfm_size_col + c] * Sqrt2);
} else if (rect_type == -2) {
for (c = 0; c < txfm_size_col; ++c)
buf[r * txfm_size_col + c] =
buf[r * txfm_size_col + c] * (1 << rect_type2_shift);
}
txfm_func_row(buf + r * txfm_size_col, output + r * txfm_size_col,
cos_bit_row, stage_range_row);
if (abs(rect_type) == 1 && rect_type1_shift == 0) {
for (c = 0; c < txfm_size_col; ++c)
output[r * txfm_size_col + c] =
(int32_t)fdct_round_shift(output[r * txfm_size_col + c] * InvSqrt2);
}
av1_round_shift_array(output + r * txfm_size_col, txfm_size_col, -shift2);
}
}
......
......@@ -150,9 +150,9 @@ vector<AV1FwdTxfm2dParam> GetTxfm2dParamList() {
param_list.push_back(AV1FwdTxfm2dParam(tx_type, TX_4X8, 3.2, 0.58));
param_list.push_back(AV1FwdTxfm2dParam(tx_type, TX_8X4, 3.2, 0.58));
param_list.push_back(AV1FwdTxfm2dParam(tx_type, TX_8X16, 6.5, 1));
param_list.push_back(AV1FwdTxfm2dParam(tx_type, TX_16X8, 6.5, 1));
param_list.push_back(AV1FwdTxfm2dParam(tx_type, TX_16X32, 50, 7));
param_list.push_back(AV1FwdTxfm2dParam(tx_type, TX_8X16, 15, 1.5));
param_list.push_back(AV1FwdTxfm2dParam(tx_type, TX_16X8, 15, 1.5));
param_list.push_back(AV1FwdTxfm2dParam(tx_type, TX_16X32, 55, 7));
param_list.push_back(AV1FwdTxfm2dParam(tx_type, TX_32X16, 30, 7));
param_list.push_back(AV1FwdTxfm2dParam(tx_type, TX_4X16, 5, 0.7));
......
......@@ -156,8 +156,8 @@ vector<AV1InvTxfm2dParam> GetInvTxfm2dParamList() {
param_list.push_back(AV1InvTxfm2dParam(tx_type, TX_4X8, 2, 0.016));
param_list.push_back(AV1InvTxfm2dParam(tx_type, TX_8X4, 2, 0.016));
param_list.push_back(AV1InvTxfm2dParam(tx_type, TX_8X16, 2, 0.033));
param_list.push_back(AV1InvTxfm2dParam(tx_type, TX_16X8, 2, 0.033));
param_list.push_back(AV1InvTxfm2dParam(tx_type, TX_8X16, 2, 0.2));
param_list.push_back(AV1InvTxfm2dParam(tx_type, TX_16X8, 2, 0.2));
param_list.push_back(AV1InvTxfm2dParam(tx_type, TX_16X32, 3, 0.4));
param_list.push_back(AV1InvTxfm2dParam(tx_type, TX_32X16, 3, 0.4));
......@@ -168,8 +168,8 @@ vector<AV1InvTxfm2dParam> GetInvTxfm2dParamList() {
#if CONFIG_TX64X64
if (tx_type == DCT_DCT) { // Other types not supported by these tx sizes.
param_list.push_back(AV1InvTxfm2dParam(tx_type, TX_32X64, 4, 0.38));
param_list.push_back(AV1InvTxfm2dParam(tx_type, TX_64X32, 4, 0.38));
param_list.push_back(AV1InvTxfm2dParam(tx_type, TX_32X64, 5, 0.38));
param_list.push_back(AV1InvTxfm2dParam(tx_type, TX_64X32, 5, 0.38));
param_list.push_back(AV1InvTxfm2dParam(tx_type, TX_16X64, 3, 0.38));
param_list.push_back(AV1InvTxfm2dParam(tx_type, TX_64X16, 3, 0.38));
}
......
......@@ -330,6 +330,14 @@ void txfm_stage_range_check(const int8_t *stage_range, int stage_num,
// make sure there is no overflow while doing half_btf()
EXPECT_LE(stage_range[i] + cos_bit[i], high_range);
EXPECT_LE(stage_range[i + 1] + cos_bit[i], high_range);
if (stage_range[i] + cos_bit[i] > high_range) {
std::cout << i;
assert(0);
}
if (stage_range[i + 1] + cos_bit[i] > high_range) {
std::cout << i;
assert(0);
}
}
}
} // namespace libaom_test
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment