Commit 7167d952 authored by Hui Su's avatar Hui Su

Reduce memory usage of inter_tx_size[] in MB_MODE_INFO

Reduce the length of inter_tx_size[] from 1024 to 16.

On a cif test sequence,
encoder memory consumption decreases by 18% (380MB -> 312MB);
decoder memory consumption decreases by 56% (21.4MB -> 9.4MB).

Change-Id: I42928eb9312748f96f4393c8d8040791f38f98b6
parent e5d166ef
......@@ -1419,11 +1419,8 @@ static void get_filter_level_and_masks_non420(
const int col_mask = 1 << c_step;
if (is_inter_block(mbmi) && !mbmi->skip) {
const int tx_row_idx =
(blk_row * mi_size_high[BLOCK_8X8] << TX_UNIT_HIGH_LOG2) >> 1;
const int tx_col_idx =
(blk_col * mi_size_wide[BLOCK_8X8] << TX_UNIT_WIDE_LOG2) >> 1;
const TX_SIZE mb_tx_size = mbmi->inter_tx_size[tx_row_idx][tx_col_idx];
const TX_SIZE mb_tx_size = mbmi->inter_tx_size[av1_get_txb_size_index(
sb_type, blk_row, blk_col)];
tx_size = (plane->plane_type == PLANE_TYPE_UV)
? av1_get_uv_tx_size(mbmi, ss_x, ss_y)
: mb_tx_size;
......@@ -1990,29 +1987,13 @@ static TX_SIZE av1_get_transform_size(
: av1_get_uv_tx_size(mbmi, plane_ptr->subsampling_x,
plane_ptr->subsampling_y);
assert(tx_size < TX_SIZES_ALL);
// mi_row and mi_col is the absolute position of the MI block.
// idx_c and idx_r is the relative offset of the MI within the super block
// c and r is the relative offset of the 8x8 block within the supert block
// blk_row and block_col is the relative offset of the current 8x8 block
// within the current partition.
const int idx_c = mi_col & MAX_MIB_MASK;
const int idx_r = mi_row & MAX_MIB_MASK;
const int c = idx_c >> mi_width_log2_lookup[BLOCK_8X8];
const int r = idx_r >> mi_height_log2_lookup[BLOCK_8X8];
const BLOCK_SIZE sb_type = mi->mbmi.sb_type;
const int blk_row = r & (num_8x8_blocks_high_lookup[sb_type] - 1);
const int blk_col = c & (num_8x8_blocks_wide_lookup[sb_type] - 1);
if (is_inter_block(mbmi) && !mbmi->skip) {
const int tx_row_idx =
(blk_row * mi_size_high[BLOCK_8X8] << TX_UNIT_HIGH_LOG2) >> 1;
const int tx_col_idx =
(blk_col * mi_size_wide[BLOCK_8X8] << TX_UNIT_WIDE_LOG2) >> 1;
const TX_SIZE mb_tx_size = mbmi->inter_tx_size[tx_row_idx][tx_col_idx];
const BLOCK_SIZE sb_type = mi->mbmi.sb_type;
const int blk_row = mi_row & (mi_size_high[sb_type] - 1);
const int blk_col = mi_col & (mi_size_wide[sb_type] - 1);
const TX_SIZE mb_tx_size =
mbmi->inter_tx_size[av1_get_txb_size_index(sb_type, blk_row, blk_col)];
assert(mb_tx_size < TX_SIZES_ALL);
tx_size = (plane == AOM_PLANE_Y)
? mb_tx_size
: av1_get_uv_tx_size(mbmi, plane_ptr->subsampling_x,
......
......@@ -231,16 +231,19 @@ typedef struct {
COMPOUND_TYPE interinter_compound_type;
} INTERINTER_COMPOUND_DATA;
// This structure now relates to 8x8 block regions.
#if CONFIG_TX64X64
#define INTER_TX_SIZE_BUF_LEN 16
#else
#define INTER_TX_SIZE_BUF_LEN 256
#endif
// This structure now relates to 4x4 block regions.
typedef struct MB_MODE_INFO {
// Common for both INTER and INTRA blocks
BLOCK_SIZE sb_type;
PREDICTION_MODE mode;
TX_SIZE tx_size;
// TODO(jingning): This effectively assigned a separate entry for each
// 8x8 block. Apparently it takes much more space than needed.
TX_SIZE inter_tx_size[MAX_MIB_SIZE][MAX_MIB_SIZE];
TX_SIZE min_tx_size;
uint8_t inter_tx_size[INTER_TX_SIZE_BUF_LEN];
int8_t skip;
#if CONFIG_EXT_SKIP
int8_t skip_mode;
......@@ -889,6 +892,20 @@ get_plane_block_size(BLOCK_SIZE bsize, const struct macroblockd_plane *pd) {
return ss_size_lookup[bsize][pd->subsampling_x][pd->subsampling_y];
}
static INLINE int av1_get_txb_size_index(BLOCK_SIZE bsize, int blk_row,
int blk_col) {
TX_SIZE txs = max_txsize_rect_lookup[1][bsize];
for (int level = 0; level < MAX_VARTX_DEPTH - 1; ++level)
txs = sub_tx_size_map[1][txs];
const int tx_w = tx_size_wide_unit[txs];
const int tx_h = tx_size_high_unit[txs];
const int bw_uint = mi_size_wide[bsize];
const int stride = bw_uint / tx_w;
const int index = (blk_row / tx_h) * stride + (blk_col / tx_w);
assert(index < INTER_TX_SIZE_BUF_LEN);
return index;
}
static INLINE TX_TYPE av1_get_tx_type(PLANE_TYPE plane_type,
const MACROBLOCKD *xd, int blk_row,
int blk_col, TX_SIZE tx_size,
......
......@@ -176,11 +176,10 @@ static void decode_reconstruct_tx(AV1_COMMON *cm, MACROBLOCKD *const xd,
(void)mi_row;
(void)mi_col;
const struct macroblockd_plane *const pd = &xd->plane[plane];
const int tx_row = blk_row >> (1 - pd->subsampling_y);
const int tx_col = blk_col >> (1 - pd->subsampling_x);
const TX_SIZE plane_tx_size =
plane ? av1_get_uv_tx_size(mbmi, pd->subsampling_x, pd->subsampling_y)
: mbmi->inter_tx_size[tx_row][tx_col];
: mbmi->inter_tx_size[av1_get_txb_size_index(plane_bsize, blk_row,
blk_col)];
// Scale to match transform block unit.
const int max_blocks_high = max_block_high(xd, plane_bsize, plane);
const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane);
......
......@@ -353,22 +353,20 @@ static void read_tx_size_vartx(AV1_COMMON *cm, MACROBLOCKD *xd,
FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
(void)cm;
int is_split = 0;
const int tx_row = blk_row >> 1;
const int tx_col = blk_col >> 1;
const int max_blocks_high = max_block_high(xd, mbmi->sb_type, 0);
const int max_blocks_wide = max_block_wide(xd, mbmi->sb_type, 0);
TX_SIZE(*const inter_tx_size)
[MAX_MIB_SIZE] =
(TX_SIZE(*)[MAX_MIB_SIZE]) & mbmi->inter_tx_size[tx_row][tx_col];
const BLOCK_SIZE bsize = mbmi->sb_type;
const int max_blocks_high = max_block_high(xd, bsize, 0);
const int max_blocks_wide = max_block_wide(xd, bsize, 0);
if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
assert(tx_size > TX_4X4);
if (depth == MAX_VARTX_DEPTH) {
int idx, idy;
inter_tx_size[0][0] = tx_size;
for (idy = 0; idy < AOMMAX(1, tx_size_high_unit[tx_size] / 2); ++idy)
for (idx = 0; idx < AOMMAX(1, tx_size_wide_unit[tx_size] / 2); ++idx)
inter_tx_size[idy][idx] = tx_size;
for (int idy = 0; idy < tx_size_high_unit[tx_size]; ++idy) {
for (int idx = 0; idx < tx_size_wide_unit[tx_size]; ++idx) {
const int index =
av1_get_txb_size_index(bsize, blk_row + idy, blk_col + idx);
mbmi->inter_tx_size[index] = tx_size;
}
}
mbmi->tx_size = tx_size;
mbmi->min_tx_size = TXSIZEMIN(mbmi->min_tx_size, tx_size);
txfm_partition_update(xd->above_txfm_context + blk_col,
......@@ -376,9 +374,9 @@ static void read_tx_size_vartx(AV1_COMMON *cm, MACROBLOCKD *xd,
return;
}
int ctx = txfm_partition_context(xd->above_txfm_context + blk_col,
xd->left_txfm_context + blk_row,
mbmi->sb_type, tx_size);
const int ctx = txfm_partition_context(xd->above_txfm_context + blk_col,
xd->left_txfm_context + blk_row,
mbmi->sb_type, tx_size);
is_split = aom_read_symbol(r, ec_ctx->txfm_partition_cdf[ctx], 2, ACCT_STR);
if (is_split) {
......@@ -387,11 +385,13 @@ static void read_tx_size_vartx(AV1_COMMON *cm, MACROBLOCKD *xd,
const int bsh = tx_size_high_unit[sub_txs];
if (sub_txs == TX_4X4) {
int idx, idy;
inter_tx_size[0][0] = sub_txs;
for (idy = 0; idy < AOMMAX(1, tx_size_high_unit[tx_size] / 2); ++idy)
for (idx = 0; idx < AOMMAX(1, tx_size_wide_unit[tx_size] / 2); ++idx)
inter_tx_size[idy][idx] = inter_tx_size[0][0];
for (int idy = 0; idy < tx_size_high_unit[tx_size]; ++idy) {
for (int idx = 0; idx < tx_size_wide_unit[tx_size]; ++idx) {
const int index =
av1_get_txb_size_index(bsize, blk_row + idy, blk_col + idx);
mbmi->inter_tx_size[index] = sub_txs;
}
}
mbmi->tx_size = sub_txs;
mbmi->min_tx_size = mbmi->tx_size;
txfm_partition_update(xd->above_txfm_context + blk_col,
......@@ -409,11 +409,13 @@ static void read_tx_size_vartx(AV1_COMMON *cm, MACROBLOCKD *xd,
}
}
} else {
int idx, idy;
inter_tx_size[0][0] = tx_size;
for (idy = 0; idy < AOMMAX(1, tx_size_high_unit[tx_size] / 2); ++idy)
for (idx = 0; idx < AOMMAX(1, tx_size_wide_unit[tx_size] / 2); ++idx)
inter_tx_size[idy][idx] = tx_size;
for (int idy = 0; idy < tx_size_high_unit[tx_size]; ++idy) {
for (int idx = 0; idx < tx_size_wide_unit[tx_size]; ++idx) {
const int index =
av1_get_txb_size_index(bsize, blk_row + idy, blk_col + idx);
mbmi->inter_tx_size[index] = tx_size;
}
}
mbmi->tx_size = tx_size;
mbmi->min_tx_size = TXSIZEMIN(mbmi->min_tx_size, tx_size);
txfm_partition_update(xd->above_txfm_context + blk_col,
......@@ -933,9 +935,7 @@ static void read_intrabc_info(AV1_COMMON *const cm, MACROBLOCKD *const xd,
}
} else {
mbmi->tx_size = read_tx_size(cm, xd, 1, !mbmi->skip, r);
for (int idy = 0; idy < height; ++idy)
for (int idx = 0; idx < width; ++idx)
mbmi->inter_tx_size[idy >> 1][idx >> 1] = mbmi->tx_size;
memset(mbmi->inter_tx_size, mbmi->tx_size, sizeof(mbmi->inter_tx_size));
mbmi->min_tx_size = mbmi->tx_size;
set_txfm_ctxs(mbmi->tx_size, xd->n8_w, xd->n8_h, mbmi->skip, xd);
}
......@@ -2188,14 +2188,8 @@ static void read_inter_frame_mode_info(AV1Decoder *const pbi,
read_tx_size_vartx(cm, xd, mbmi, max_tx_size, 0, idy, idx, r);
} else {
mbmi->tx_size = read_tx_size(cm, xd, inter_block, !mbmi->skip, r);
if (inter_block) {
const int width = block_size_wide[bsize] >> tx_size_wide_log2[0];
const int height = block_size_high[bsize] >> tx_size_high_log2[0];
for (int idy = 0; idy < height; ++idy)
for (int idx = 0; idx < width; ++idx)
mbmi->inter_tx_size[idy >> 1][idx >> 1] = mbmi->tx_size;
}
if (inter_block)
memset(mbmi->inter_tx_size, mbmi->tx_size, sizeof(mbmi->inter_tx_size));
mbmi->min_tx_size = mbmi->tx_size;
set_txfm_ctxs(mbmi->tx_size, xd->n8_w, xd->n8_h, mbmi->skip, xd);
}
......
......@@ -177,8 +177,6 @@ static void write_tx_size_vartx(const AV1_COMMON *cm, MACROBLOCKD *xd,
aom_writer *w) {
FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
(void)cm;
const int tx_row = blk_row >> 1;
const int tx_col = blk_col >> 1;
const int max_blocks_high = max_block_high(xd, mbmi->sb_type, 0);
const int max_blocks_wide = max_block_wide(xd, mbmi->sb_type, 0);
......@@ -190,12 +188,13 @@ static void write_tx_size_vartx(const AV1_COMMON *cm, MACROBLOCKD *xd,
return;
}
int ctx = txfm_partition_context(xd->above_txfm_context + blk_col,
xd->left_txfm_context + blk_row,
mbmi->sb_type, tx_size);
const int ctx = txfm_partition_context(xd->above_txfm_context + blk_col,
xd->left_txfm_context + blk_row,
mbmi->sb_type, tx_size);
const int txb_size_index =
av1_get_txb_size_index(mbmi->sb_type, blk_row, blk_col);
const int write_txfm_partition =
tx_size == mbmi->inter_tx_size[tx_row][tx_col];
tx_size == mbmi->inter_tx_size[txb_size_index];
if (write_txfm_partition) {
aom_write_symbol(w, 0, ec_ctx->txfm_partition_cdf[ctx], 2);
......@@ -464,8 +463,6 @@ static void pack_txb_tokens(aom_writer *w, AV1_COMMON *cm, MACROBLOCK *const x,
int block, int blk_row, int blk_col,
TX_SIZE tx_size, TOKEN_STATS *token_stats) {
const struct macroblockd_plane *const pd = &xd->plane[plane];
const int tx_row = blk_row >> (1 - pd->subsampling_y);
const int tx_col = blk_col >> (1 - pd->subsampling_x);
const int max_blocks_high = max_block_high(xd, plane_bsize, plane);
const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane);
......@@ -473,7 +470,8 @@ static void pack_txb_tokens(aom_writer *w, AV1_COMMON *cm, MACROBLOCK *const x,
const TX_SIZE plane_tx_size =
plane ? av1_get_uv_tx_size(mbmi, pd->subsampling_x, pd->subsampling_y)
: mbmi->inter_tx_size[tx_row][tx_col];
: mbmi->inter_tx_size[av1_get_txb_size_index(plane_bsize, blk_row,
blk_col)];
if (tx_size == plane_tx_size || plane) {
TOKEN_STATS tmp_token_stats;
......
......@@ -113,7 +113,7 @@ typedef struct {
TX_TYPE tx_type;
TX_SIZE tx_size;
TX_SIZE min_tx_size;
TX_SIZE inter_tx_size[MAX_MIB_SIZE][MAX_MIB_SIZE];
TX_SIZE inter_tx_size[INTER_TX_SIZE_BUF_LEN];
uint8_t blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE * 8];
#if CONFIG_TXK_SEL
TX_TYPE txk_type[MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
......
......@@ -390,10 +390,7 @@ static void reset_tx_size(MACROBLOCKD *xd, MB_MODE_INFO *mbmi,
mbmi->tx_size = (TX_SIZE)TXSIZEMAX(mbmi->tx_size, min_tx_size);
}
if (is_inter_block(mbmi)) {
for (int idy = 0; idy < xd->n8_h; ++idy) {
for (int idx = 0; idx < xd->n8_w; ++idx)
mbmi->inter_tx_size[idy][idx] = mbmi->tx_size;
}
memset(mbmi->inter_tx_size, mbmi->tx_size, sizeof(mbmi->inter_tx_size));
mbmi->min_tx_size = mbmi->tx_size;
}
}
......@@ -4691,14 +4688,14 @@ static void update_txfm_count(MACROBLOCK *x, MACROBLOCKD *xd,
int blk_row, int blk_col,
uint8_t allow_update_cdf) {
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
const int tx_row = blk_row >> 1;
const int tx_col = blk_col >> 1;
const int max_blocks_high = max_block_high(xd, mbmi->sb_type, 0);
const int max_blocks_wide = max_block_wide(xd, mbmi->sb_type, 0);
const BLOCK_SIZE bsize = mbmi->sb_type;
const int max_blocks_high = max_block_high(xd, bsize, 0);
const int max_blocks_wide = max_block_wide(xd, bsize, 0);
int ctx = txfm_partition_context(xd->above_txfm_context + blk_col,
xd->left_txfm_context + blk_row,
mbmi->sb_type, tx_size);
const TX_SIZE plane_tx_size = mbmi->inter_tx_size[tx_row][tx_col];
const int txb_size_index = av1_get_txb_size_index(bsize, blk_row, blk_col);
const TX_SIZE plane_tx_size = mbmi->inter_tx_size[txb_size_index];
if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
assert(tx_size > TX_4X4);
......@@ -4729,7 +4726,7 @@ static void update_txfm_count(MACROBLOCK *x, MACROBLOCKD *xd,
++x->txb_split_count;
if (sub_txs == TX_4X4) {
mbmi->inter_tx_size[tx_row][tx_col] = TX_4X4;
mbmi->inter_tx_size[txb_size_index] = TX_4X4;
mbmi->tx_size = TX_4X4;
txfm_partition_update(xd->above_txfm_context + blk_col,
xd->left_txfm_context + blk_row, TX_4X4, tx_size);
......@@ -4774,11 +4771,11 @@ static void tx_partition_count_update(const AV1_COMMON *const cm, MACROBLOCK *x,
static void set_txfm_context(MACROBLOCKD *xd, TX_SIZE tx_size, int blk_row,
int blk_col) {
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
const int tx_row = blk_row >> 1;
const int tx_col = blk_col >> 1;
const int max_blocks_high = max_block_high(xd, mbmi->sb_type, 0);
const int max_blocks_wide = max_block_wide(xd, mbmi->sb_type, 0);
const TX_SIZE plane_tx_size = mbmi->inter_tx_size[tx_row][tx_col];
const BLOCK_SIZE bsize = mbmi->sb_type;
const int max_blocks_high = max_block_high(xd, bsize, 0);
const int max_blocks_wide = max_block_wide(xd, bsize, 0);
const int txb_size_index = av1_get_txb_size_index(bsize, blk_row, blk_col);
const TX_SIZE plane_tx_size = mbmi->inter_tx_size[txb_size_index];
if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
......@@ -4789,7 +4786,7 @@ static void set_txfm_context(MACROBLOCKD *xd, TX_SIZE tx_size, int blk_row,
} else {
if (tx_size == TX_8X8) {
mbmi->inter_tx_size[tx_row][tx_col] = TX_4X4;
mbmi->inter_tx_size[txb_size_index] = TX_4X4;
mbmi->tx_size = TX_4X4;
txfm_partition_update(xd->above_txfm_context + blk_col,
xd->left_txfm_context + blk_row, TX_4X4, tx_size);
......
......@@ -615,8 +615,6 @@ static void encode_block_inter(int plane, int block, int blk_row, int blk_col,
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
const struct macroblockd_plane *const pd = &xd->plane[plane];
const int tx_row = blk_row >> (1 - pd->subsampling_y);
const int tx_col = blk_col >> (1 - pd->subsampling_x);
const int max_blocks_high = max_block_high(xd, plane_bsize, plane);
const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane);
......@@ -624,7 +622,8 @@ static void encode_block_inter(int plane, int block, int blk_row, int blk_col,
const TX_SIZE plane_tx_size =
plane ? av1_get_uv_tx_size(mbmi, pd->subsampling_x, pd->subsampling_y)
: mbmi->inter_tx_size[tx_row][tx_col];
: mbmi->inter_tx_size[av1_get_txb_size_index(plane_bsize, blk_row,
blk_col)];
if (tx_size == plane_tx_size || plane) {
encode_block(plane, block, blk_row, blk_col, plane_bsize, tx_size, arg,
......
This diff is collapsed.
......@@ -486,8 +486,6 @@ void tokenize_vartx(ThreadData *td, TOKENEXTRA **t, RUN_TYPE dry_run,
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
const struct macroblockd_plane *const pd = &xd->plane[plane];
const int tx_row = blk_row >> (1 - pd->subsampling_y);
const int tx_col = blk_col >> (1 - pd->subsampling_x);
const int max_blocks_high = max_block_high(xd, plane_bsize, plane);
const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane);
......@@ -495,7 +493,8 @@ void tokenize_vartx(ThreadData *td, TOKENEXTRA **t, RUN_TYPE dry_run,
const TX_SIZE plane_tx_size =
plane ? av1_get_uv_tx_size(mbmi, pd->subsampling_x, pd->subsampling_y)
: mbmi->inter_tx_size[tx_row][tx_col];
: mbmi->inter_tx_size[av1_get_txb_size_index(plane_bsize, blk_row,
blk_col)];
if (tx_size == plane_tx_size || plane) {
plane_bsize = get_plane_block_size(mbmi->sb_type, pd);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment