Commit 60586676 authored by Debargha Mukherjee's avatar Debargha Mukherjee Committed by Urvang Joshi

Zero out half of 16x64 and 64x16 transforms

Constrain 16x64 transform so that the bottom 16x32 is zero;
constrain 64x16 transform so that the right 32x16 is zero;
Also implement 32x64 transform better to reduce intermediate
coefficient range.

Change-Id: Ia9050ee741ed1d5b02a42616635b496d637d932f
parent 5a88172c
......@@ -377,10 +377,27 @@ void av1_fwd_txfm2d_64x64_c(const int16_t *input, int32_t *output, int stride,
void av1_fwd_txfm2d_32x64_c(const int16_t *input, int32_t *output, int stride,
TX_TYPE tx_type, int bd) {
#if CONFIG_TXMG
int32_t txfm_buf[32 * 64];
int16_t rinput[64 * 32];
TX_SIZE tx_size = TX_32X64;
TX_SIZE rtx_size = av1_rotate_tx_size(tx_size);
TX_TYPE rtx_type = av1_rotate_tx_type(tx_type);
int w = tx_size_wide[tx_size];
int h = tx_size_high[tx_size];
int rw = h;
int rh = w;
transpose_int16(rinput, rw, input, stride, w, h);
TXFM_2D_FLIP_CFG cfg;
av1_get_fwd_txfm_cfg(rtx_type, rtx_size, &cfg);
fwd_txfm2d_c(rinput, txfm_buf, rw, &cfg, output, bd);
transpose_int32(output, w, txfm_buf, rw, rw, rh);
#else
int32_t txfm_buf[32 * 64];
TXFM_2D_FLIP_CFG cfg;
av1_get_fwd_txfm_cfg(tx_type, TX_32X64, &cfg);
fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
#endif // CONFIG_TXMG
// Zero out the bottom 32x32 area.
memset(output + 32 * 32, 0, 32 * 32 * sizeof(*output));
......@@ -427,6 +444,8 @@ void av1_fwd_txfm2d_16x64_c(const int16_t *input, int32_t *output, int stride,
av1_get_fwd_txfm_cfg(tx_type, TX_16X64, &cfg);
fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
#endif
// Zero out the bottom 16x32 area.
memset(output + 16 * 32, 0, 16 * 32 * sizeof(*output));
}
void av1_fwd_txfm2d_64x16_c(const int16_t *input, int32_t *output, int stride,
......@@ -435,6 +454,10 @@ void av1_fwd_txfm2d_64x16_c(const int16_t *input, int32_t *output, int stride,
TXFM_2D_FLIP_CFG cfg;
av1_get_fwd_txfm_cfg(tx_type, TX_64X16, &cfg);
fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd);
// Zero out right 32x16 area.
for (int row = 0; row < 16; ++row) {
memset(output + row * 64 + 32, 0, 32 * sizeof(*output));
}
}
#endif // CONFIG_TX64X64
......
......@@ -2664,6 +2664,8 @@ void av1_fht16x64_c(const int16_t *input, tran_low_t *output, int stride,
for (j = 0; j < n4; ++j)
output[i + j * n] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2);
}
// Zero out the bottom 16x32 area.
memset(output + 2 * n * n, 0, 2 * n * n * sizeof(*output));
// Note: overall scale factor of transform is 4 times unitary
}
......@@ -2714,6 +2716,10 @@ void av1_fht64x16_c(const int16_t *input, tran_low_t *output, int stride,
for (j = 0; j < n4; ++j)
output[j + i * n4] = ROUND_POWER_OF_TWO_SIGNED(temp_out[j], 2);
}
// Zero out right 32x16 area.
for (int row = 0; row < n; ++row) {
memset(output + row * n4 + 2 * n, 0, 2 * n * sizeof(*output));
}
// Note: overall scale factor of transform is 4 times unitary
}
#endif // CONFIG_TX64X64
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment