Commit 8e1d0f70 authored by Angie Chiang's avatar Angie Chiang

Change scales of fht 32x16 16x32 32x32 functions

Performance drop with ext_tx and rect_tx on
       BDRate
lowres -0.028
midres -0.075
hdres  -0.054

Change-Id: I50f89b9e9785d82ab05c3276a3c8b22b4dcfd408
parent 705ce47f
......@@ -1654,15 +1654,14 @@ void av1_fht16x32_c(const int16_t *input, tran_low_t *output, int stride,
(tran_low_t)fdct_round_shift(input[i * stride + j] * 4 * Sqrt2);
ht.rows(temp_in, temp_out);
for (j = 0; j < n; ++j)
out[j * n2 + i] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2);
out[j * n2 + i] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 4);
}
// Columns
for (i = 0; i < n; ++i) {
for (j = 0; j < n2; ++j) temp_in[j] = out[j + i * n2];
ht.cols(temp_in, temp_out);
for (j = 0; j < n2; ++j)
output[i + j * n] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2);
for (j = 0; j < n2; ++j) output[i + j * n] = temp_out[j];
}
// Note: overall scale factor of transform is 4 times unitary
}
......@@ -1707,15 +1706,14 @@ void av1_fht32x16_c(const int16_t *input, tran_low_t *output, int stride,
(tran_low_t)fdct_round_shift(input[j * stride + i] * 4 * Sqrt2);
ht.cols(temp_in, temp_out);
for (j = 0; j < n; ++j)
out[j * n2 + i] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2);
out[j * n2 + i] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 4);
}
// Rows
for (i = 0; i < n; ++i) {
for (j = 0; j < n2; ++j) temp_in[j] = out[j + i * n2];
ht.rows(temp_in, temp_out);
for (j = 0; j < n2; ++j)
output[j + i * n2] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2);
for (j = 0; j < n2; ++j) output[j + i * n2] = temp_out[j];
}
// Note: overall scale factor of transform is 4 times unitary
}
......@@ -2074,17 +2072,6 @@ void av1_highbd_fht16x16_c(const int16_t *input, tran_low_t *output, int stride,
}
#endif // CONFIG_AOM_HIGHBITDEPTH
// TODO(luoyi): Adding this function to avoid DCT_DCT overflow.
// Remove this function after we scale the column txfm output correctly.
static INLINE int range_check_dct32x32(const int16_t *input, int16_t bound,
int size) {
int i;
for (i = 0; i < size; ++i) {
if (abs(input[i]) > bound) return 1;
}
return 0;
}
void av1_fht32x32_c(const int16_t *input, tran_low_t *output, int stride,
int tx_type) {
static const transform_2d FHT[] = {
......@@ -2117,27 +2104,19 @@ void av1_fht32x32_c(const int16_t *input, tran_low_t *output, int stride,
maybe_flip_input(&input, &stride, 32, 32, flipped_input, tx_type);
#endif
if (DCT_DCT == tx_type) {
if (range_check_dct32x32(input, (1 << 6) - 1, 1 << 10)) {
aom_fdct32x32_c(input, output, stride);
return;
}
}
// Columns
for (i = 0; i < 32; ++i) {
for (j = 0; j < 32; ++j) temp_in[j] = input[j * stride + i] * 4;
ht.cols(temp_in, temp_out);
for (j = 0; j < 32; ++j)
out[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;
out[j * 32 + i] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 4);
}
// Rows
for (i = 0; i < 32; ++i) {
for (j = 0; j < 32; ++j) temp_in[j] = out[j + i * 32];
ht.rows(temp_in, temp_out);
for (j = 0; j < 32; ++j)
output[j + i * 32] =
(tran_low_t)((temp_out[j] + 1 + (temp_out[j] < 0)) >> 2);
for (j = 0; j < 32; ++j) output[j + i * 32] = temp_out[j];
}
}
......
......@@ -92,14 +92,14 @@ static void fwd_txfm_16x32(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TX_TYPE tx_type,
FWD_TXFM_OPT fwd_txfm_opt) {
(void)fwd_txfm_opt;
av1_fht16x32(src_diff, coeff, diff_stride, tx_type);
av1_fht16x32_c(src_diff, coeff, diff_stride, tx_type);
}
static void fwd_txfm_32x16(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TX_TYPE tx_type,
FWD_TXFM_OPT fwd_txfm_opt) {
(void)fwd_txfm_opt;
av1_fht32x16(src_diff, coeff, diff_stride, tx_type);
av1_fht32x16_c(src_diff, coeff, diff_stride, tx_type);
}
static void fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff,
......@@ -135,7 +135,7 @@ static void fwd_txfm_32x32(const int16_t *src_diff, tran_low_t *coeff,
av1_fwd_idtx_c(src_diff, coeff, diff_stride, 32, tx_type);
else
#endif
av1_fht32x32(src_diff, coeff, diff_stride, tx_type);
av1_fht32x32_c(src_diff, coeff, diff_stride, tx_type);
}
#if CONFIG_TX64X64
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment