Commit 9ca05b7e authored by Jingning Han's avatar Jingning Han

Refactor var-tx pipeline to support cb4x4 mode

Replace hard coded 4x4 transform block step size assumption with
scalable table access.

Change-Id: Ib1cc555c2641e5634acdd91ca33217f00aeb0b89
parent a43a2d98
......@@ -237,11 +237,10 @@ static INLINE void inter_block_tx_count_update(AV1_COMMON *cm, MACROBLOCKD *xd,
MB_MODE_INFO *mbmi,
BLOCK_SIZE plane_bsize,
int ctx) {
const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize];
const int mi_height = num_4x4_blocks_high_lookup[plane_bsize];
const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
const int mi_height = block_size_high[plane_bsize] >> tx_size_wide_log2[0];
TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize];
BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size];
int bh = num_4x4_blocks_wide_lookup[txb_size];
int bh = tx_size_wide_unit[max_tx_size];
int idx, idy;
for (idy = 0; idy < mi_height; idy += bh)
......
......@@ -533,10 +533,10 @@ static void decode_reconstruct_tx(AV1_COMMON *cm, MACROBLOCKD *const xd,
const int eob =
av1_decode_block_tokens(xd, plane, sc, blk_col, blk_row, plane_tx_size,
tx_type, &max_scan_line, r, mbmi->segment_id);
inverse_transform_block(
xd, plane, tx_type, plane_tx_size,
&pd->dst.buf[4 * blk_row * pd->dst.stride + 4 * blk_col],
pd->dst.stride, max_scan_line, eob);
inverse_transform_block(xd, plane, tx_type, plane_tx_size,
&pd->dst.buf[(blk_row * pd->dst.stride + blk_col)
<< tx_size_wide_log2[0]],
pd->dst.stride, max_scan_line, eob);
*eob_total += eob;
} else {
const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
......@@ -1673,7 +1673,6 @@ static void decode_block(AV1Decoder *const pbi, MACROBLOCKD *const xd,
const TX_SIZE max_tx_size = max_txsize_rect_lookup[plane_bsize];
const int bh_var_tx = tx_size_high_unit[max_tx_size];
const int bw_var_tx = tx_size_wide_unit[max_tx_size];
for (row = 0; row < max_blocks_high; row += bh_var_tx)
for (col = 0; col < max_blocks_wide; col += bw_var_tx)
decode_reconstruct_tx(cm, xd, r, mbmi, plane, plane_bsize, row, col,
......
......@@ -346,7 +346,11 @@ static void read_tx_size_vartx(AV1_COMMON *cm, MACROBLOCKD *xd,
if (counts) ++counts->txfm_partition[ctx][1];
if (tx_size == TX_8X8) {
int idx, idy;
inter_tx_size[0][0] = TX_4X4;
for (idy = 0; idy < tx_size_high_unit[tx_size] / 2; ++idy)
for (idx = 0; idx < tx_size_wide_unit[tx_size] / 2; ++idx)
inter_tx_size[idy][idx] = tx_size;
mbmi->tx_size = TX_4X4;
mbmi->min_tx_size = get_min_tx_size(mbmi->tx_size);
txfm_partition_update(xd->above_txfm_context + tx_col,
......@@ -1969,8 +1973,8 @@ static void read_inter_frame_mode_info(AV1Decoder *const pbi,
mbmi->tx_size = read_tx_size_intra(cm, xd, r);
if (inter_block) {
const int width = num_4x4_blocks_wide_lookup[bsize];
const int height = num_4x4_blocks_high_lookup[bsize];
const int width = block_size_wide[bsize] >> tx_size_wide_log2[0];
const int height = block_size_high[bsize] >> tx_size_high_log2[0];
int idx, idy;
for (idy = 0; idy < height; ++idy)
for (idx = 0; idx < width; ++idx)
......
......@@ -1327,8 +1327,8 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const MODE_INFO *mi,
const TX_SIZE max_tx_size = max_txsize_rect_lookup[bsize];
const int bh = tx_size_high_unit[max_tx_size];
const int bw = tx_size_wide_unit[max_tx_size];
const int width = num_4x4_blocks_wide_lookup[bsize];
const int height = num_4x4_blocks_high_lookup[bsize];
const int width = block_size_wide[bsize] >> tx_size_wide_log2[0];
const int height = block_size_high[bsize] >> tx_size_wide_log2[0];
int idx, idy;
for (idy = 0; idy < height; idy += bh)
for (idx = 0; idx < width; idx += bw)
......@@ -1999,8 +1999,12 @@ static void write_modes_b(AV1_COMP *cpi, const TileInfo *const tile,
#if CONFIG_VAR_TX
const struct macroblockd_plane *const pd = &xd->plane[plane];
BLOCK_SIZE bsize = mbmi->sb_type;
#if CONFIG_CB4X4
const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
#else
const BLOCK_SIZE plane_bsize =
get_plane_block_size(AOMMAX(bsize, BLOCK_8X8), pd);
#endif
const int num_4x4_w =
block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
......
......@@ -158,9 +158,9 @@ struct macroblock {
int mv_row_max;
#if CONFIG_VAR_TX
uint8_t blk_skip[MAX_MB_PLANE][MAX_MIB_SIZE * MAX_MIB_SIZE * 4];
uint8_t blk_skip[MAX_MB_PLANE][MAX_MIB_SIZE * MAX_MIB_SIZE * 8];
#if CONFIG_REF_MV
uint8_t blk_skip_drl[MAX_MB_PLANE][MAX_MIB_SIZE * MAX_MIB_SIZE * 4];
uint8_t blk_skip_drl[MAX_MB_PLANE][MAX_MIB_SIZE * MAX_MIB_SIZE * 8];
#endif
#endif
......
......@@ -5261,8 +5261,8 @@ static void tx_partition_count_update(const AV1_COMMON *const cm, MACROBLOCK *x,
BLOCK_SIZE plane_bsize, int mi_row,
int mi_col, FRAME_COUNTS *td_counts) {
MACROBLOCKD *xd = &x->e_mbd;
const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize];
const int mi_height = num_4x4_blocks_high_lookup[plane_bsize];
const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
const int mi_height = block_size_high[plane_bsize] >> tx_size_wide_log2[0];
TX_SIZE max_tx_size = max_txsize_rect_lookup[plane_bsize];
const int bh = tx_size_high_unit[max_tx_size];
const int bw = tx_size_wide_unit[max_tx_size];
......@@ -5319,8 +5319,8 @@ static void set_txfm_context(MACROBLOCKD *xd, TX_SIZE tx_size, int blk_row,
static void tx_partition_set_contexts(const AV1_COMMON *const cm,
MACROBLOCKD *xd, BLOCK_SIZE plane_bsize,
int mi_row, int mi_col) {
const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize];
const int mi_height = num_4x4_blocks_high_lookup[plane_bsize];
const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
const int mi_height = block_size_high[plane_bsize] >> tx_size_high_log2[0];
TX_SIZE max_tx_size = max_txsize_rect_lookup[plane_bsize];
const int bh = tx_size_high_unit[max_tx_size];
const int bw = tx_size_wide_unit[max_tx_size];
......@@ -5482,8 +5482,8 @@ static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td,
av1_encode_sb((AV1_COMMON *)cm, x, block_size);
#if CONFIG_VAR_TX
if (mbmi->skip) mbmi->min_tx_size = get_min_tx_size(mbmi->tx_size);
av1_tokenize_sb_vartx(cpi, td, t, dry_run, mi_row, mi_col,
AOMMAX(bsize, BLOCK_8X8), rate);
av1_tokenize_sb_vartx(cpi, td, t, dry_run, mi_row, mi_col, block_size,
rate);
#else
av1_tokenize_sb(cpi, td, t, dry_run, block_size, rate);
#endif
......
......@@ -632,7 +632,7 @@ static void encode_block(int plane, int block, int blk_row, int blk_col,
#endif
#if CONFIG_VAR_TX
int i;
const int bwl = b_width_log2_lookup[plane_bsize];
int bw = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
#endif
dst = &pd->dst
.buf[(blk_row * pd->dst.stride + blk_col) << tx_size_wide_log2[0]];
......@@ -646,9 +646,9 @@ static void encode_block(int plane, int block, int blk_row, int blk_col,
#if CONFIG_VAR_TX
// Assert not magic number (uninitialized).
assert(x->blk_skip[plane][(blk_row << bwl) + blk_col] != 234);
assert(x->blk_skip[plane][blk_row * bw + blk_col] != 234);
if (x->blk_skip[plane][(blk_row << bwl) + blk_col] == 0) {
if (x->blk_skip[plane][blk_row * bw + blk_col] == 0) {
#else
{
#endif
......
......@@ -3150,8 +3150,11 @@ void av1_tx_block_rd_b(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
int txb_w = tx_size_wide_unit[tx_size];
int src_stride = p->src.stride;
uint8_t *src = &p->src.buf[4 * blk_row * src_stride + 4 * blk_col];
uint8_t *dst = &pd->dst.buf[4 * blk_row * pd->dst.stride + 4 * blk_col];
uint8_t *src =
&p->src.buf[(blk_row * src_stride + blk_col) << tx_size_wide_log2[0]];
uint8_t *dst =
&pd->dst
.buf[(blk_row * pd->dst.stride + blk_col) << tx_size_wide_log2[0]];
#if CONFIG_AOM_HIGHBITDEPTH
DECLARE_ALIGNED(16, uint16_t, rec_buffer16[MAX_TX_SQUARE]);
uint8_t *rec_buffer;
......@@ -3161,7 +3164,8 @@ void av1_tx_block_rd_b(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
int max_blocks_high = block_size_high[plane_bsize];
int max_blocks_wide = block_size_wide[plane_bsize];
const int diff_stride = max_blocks_wide;
const int16_t *diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)];
const int16_t *diff =
&p->src_diff[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]];
int txb_coeff_cost;
assert(tx_size < TX_SIZES_ALL);
......@@ -3206,10 +3210,11 @@ void av1_tx_block_rd_b(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
int blocks_height = AOMMIN(txb_h, max_blocks_high - blk_row);
int blocks_width = AOMMIN(txb_w, max_blocks_wide - blk_col);
tmp = 0;
for (idy = 0; idy < blocks_height; idy += 2) {
for (idx = 0; idx < blocks_width; idx += 2) {
const int16_t *d = diff + 4 * idy * diff_stride + 4 * idx;
tmp += aom_sum_squares_2d_i16(d, diff_stride, 8);
for (idy = 0; idy < blocks_height; ++idy) {
for (idx = 0; idx < blocks_width; ++idx) {
const int16_t *d =
diff + ((idy * diff_stride + idx) << tx_size_wide_log2[0]);
tmp += aom_sum_squares_2d_i16(d, diff_stride, 4);
}
}
} else {
......@@ -3247,11 +3252,13 @@ void av1_tx_block_rd_b(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size,
int blocks_height = AOMMIN(txb_h, max_blocks_high - blk_row);
int blocks_width = AOMMIN(txb_w, max_blocks_wide - blk_col);
tmp = 0;
for (idy = 0; idy < blocks_height; idy += 2) {
for (idx = 0; idx < blocks_width; idx += 2) {
uint8_t *const s = src + 4 * idy * src_stride + 4 * idx;
uint8_t *const r = rec_buffer + 4 * idy * MAX_TX_SIZE + 4 * idx;
cpi->fn_ptr[BLOCK_8X8].vf(s, src_stride, r, MAX_TX_SIZE, &this_dist);
for (idy = 0; idy < blocks_height; ++idy) {
for (idx = 0; idx < blocks_width; ++idx) {
uint8_t *const s =
src + ((idy * src_stride + idx) << tx_size_wide_log2[0]);
uint8_t *const r =
rec_buffer + ((idy * MAX_TX_SIZE + idx) << tx_size_wide_log2[0]);
cpi->fn_ptr[BLOCK_4X4].vf(s, src_stride, r, MAX_TX_SIZE, &this_dist);
tmp += this_dist;
}
}
......@@ -3428,8 +3435,8 @@ static void inter_block_yrd(const AV1_COMP *cpi, MACROBLOCK *x,
if (is_cost_valid) {
const struct macroblockd_plane *const pd = &xd->plane[0];
const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize];
const int mi_height = num_4x4_blocks_high_lookup[plane_bsize];
const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
const int mi_height = block_size_high[plane_bsize] >> tx_size_high_log2[0];
const TX_SIZE max_tx_size = max_txsize_rect_lookup[plane_bsize];
const int bh = tx_size_high_unit[max_tx_size];
const int bw = tx_size_wide_unit[max_tx_size];
......@@ -3445,7 +3452,7 @@ static void inter_block_yrd(const AV1_COMP *cpi, MACROBLOCK *x,
RD_STATS pn_rd_stats;
av1_init_rd_stats(&pn_rd_stats);
av1_get_entropy_contexts(bsize, TX_4X4, pd, ctxa, ctxl);
av1_get_entropy_contexts(bsize, 0, pd, ctxa, ctxl);
memcpy(tx_above, xd->above_txfm_context,
sizeof(TXFM_CONTEXT) * (mi_width >> 1));
memcpy(tx_left, xd->left_txfm_context,
......@@ -3552,8 +3559,8 @@ static void select_tx_type_yrd(const AV1_COMP *cpi, MACROBLOCK *x,
TX_SIZE best_tx_size[MAX_MIB_SIZE][MAX_MIB_SIZE];
TX_SIZE best_tx = max_txsize_lookup[bsize];
TX_SIZE best_min_tx_size = TX_SIZES_ALL;
uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE * 4];
const int n4 = 1 << (num_pels_log2_lookup[bsize] - 4);
uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE * 8];
const int n4 = 1 << (num_pels_log2_lookup[bsize] - 2 * tx_size_wide_log2[0]);
int idx, idy;
int prune = 0;
const int count32 = 1 << (2 * (cpi->common.mib_size_log2 -
......@@ -3716,8 +3723,8 @@ static int inter_block_uvrd(const AV1_COMP *cpi, MACROBLOCK *x,
for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
const struct macroblockd_plane *const pd = &xd->plane[plane];
const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize];
const int mi_height = num_4x4_blocks_high_lookup[plane_bsize];
const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
const int mi_height = block_size_high[plane_bsize] >> tx_size_high_log2[0];
const TX_SIZE max_tx_size = max_txsize_rect_lookup[plane_bsize];
const int bh = tx_size_high_unit[max_tx_size];
const int bw = tx_size_wide_unit[max_tx_size];
......@@ -3729,7 +3736,7 @@ static int inter_block_uvrd(const AV1_COMP *cpi, MACROBLOCK *x,
RD_STATS pn_rd_stats;
av1_init_rd_stats(&pn_rd_stats);
av1_get_entropy_contexts(bsize, TX_4X4, pd, ta, tl);
av1_get_entropy_contexts(bsize, 0, pd, ta, tl);
for (idy = 0; idy < mi_height; idy += bh) {
for (idx = 0; idx < mi_width; idx += bw) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment