Commit f25a4cf9 authored by iole moccagatta's avatar iole moccagatta Committed by Iole Moccagatta

Add coef_interleave experiment

This commit adds an experiment to interleave the coding of transform
coefficients from YUV planes.  The experiment can be enabled at config
time by --enable-coef-interleave.

Change-Id: Ifd92f9c367304bca9732f13fa026eb8996363677
parent 63bd6dc9
...@@ -39,6 +39,87 @@ PREDICTION_MODE av1_above_block_mode(const MODE_INFO *cur_mi, ...@@ -39,6 +39,87 @@ PREDICTION_MODE av1_above_block_mode(const MODE_INFO *cur_mi,
} }
} }
#if CONFIG_COEF_INTERLEAVE
void av1_foreach_transformed_block_interleave(
const MACROBLOCKD *const xd, BLOCK_SIZE bsize,
foreach_transformed_block_visitor visit, void *arg) {
const struct macroblockd_plane *const pd_y = &xd->plane[0];
const struct macroblockd_plane *const pd_c = &xd->plane[1];
const MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
const TX_SIZE tx_log2_y = mbmi->tx_size;
const TX_SIZE tx_log2_c = get_uv_tx_size(mbmi, pd_c);
const int tx_sz_y = (1 << tx_log2_y);
const int tx_sz_c = (1 << tx_log2_c);
const BLOCK_SIZE plane_bsize_y = get_plane_block_size(bsize, pd_y);
const BLOCK_SIZE plane_bsize_c = get_plane_block_size(bsize, pd_c);
const int num_4x4_w_y = num_4x4_blocks_wide_lookup[plane_bsize_y];
const int num_4x4_w_c = num_4x4_blocks_wide_lookup[plane_bsize_c];
const int num_4x4_h_y = num_4x4_blocks_high_lookup[plane_bsize_y];
const int num_4x4_h_c = num_4x4_blocks_high_lookup[plane_bsize_c];
const int step_y = 1 << (tx_log2_y << 1);
const int step_c = 1 << (tx_log2_c << 1);
const int max_4x4_w_y =
get_max_4x4_size(num_4x4_w_y, xd->mb_to_right_edge, pd_y->subsampling_x);
const int max_4x4_h_y =
get_max_4x4_size(num_4x4_h_y, xd->mb_to_bottom_edge, pd_y->subsampling_y);
const int extra_step_y = ((num_4x4_w_y - max_4x4_w_y) >> tx_log2_y) * step_y;
const int max_4x4_w_c =
get_max_4x4_size(num_4x4_w_c, xd->mb_to_right_edge, pd_c->subsampling_x);
const int max_4x4_h_c =
get_max_4x4_size(num_4x4_h_c, xd->mb_to_bottom_edge, pd_c->subsampling_y);
const int extra_step_c = ((num_4x4_w_c - max_4x4_w_c) >> tx_log2_c) * step_c;
// The max_4x4_w/h may be smaller than tx_sz under some corner cases,
// i.e. when the SB is splitted by tile boundaries.
const int tu_num_w_y = (max_4x4_w_y + tx_sz_y - 1) / tx_sz_y;
const int tu_num_h_y = (max_4x4_h_y + tx_sz_y - 1) / tx_sz_y;
const int tu_num_w_c = (max_4x4_w_c + tx_sz_c - 1) / tx_sz_c;
const int tu_num_h_c = (max_4x4_h_c + tx_sz_c - 1) / tx_sz_c;
const int tu_num_y = tu_num_w_y * tu_num_h_y;
const int tu_num_c = tu_num_w_c * tu_num_h_c;
int tu_idx_c = 0;
int offset_y, row_y, col_y;
int offset_c, row_c, col_c;
for (row_y = 0; row_y < tu_num_h_y; row_y++) {
for (col_y = 0; col_y < tu_num_w_y; col_y++) {
// luma
offset_y = (row_y * tu_num_w_y + col_y) * step_y + row_y * extra_step_y;
visit(0, offset_y, row_y * tx_sz_y, col_y * tx_sz_y, plane_bsize_y,
tx_log2_y, arg);
// chroma
if (tu_idx_c < tu_num_c) {
row_c = (tu_idx_c / tu_num_w_c) * tx_sz_c;
col_c = (tu_idx_c % tu_num_w_c) * tx_sz_c;
offset_c = tu_idx_c * step_c + (tu_idx_c / tu_num_w_c) * extra_step_c;
visit(1, offset_c, row_c, col_c, plane_bsize_c, tx_log2_c, arg);
visit(2, offset_c, row_c, col_c, plane_bsize_c, tx_log2_c, arg);
tu_idx_c++;
}
}
}
// In 422 case, it's possible that Chroma has more TUs than Luma
while (tu_idx_c < tu_num_c) {
row_c = (tu_idx_c / tu_num_w_c) * tx_sz_c;
col_c = (tu_idx_c % tu_num_w_c) * tx_sz_c;
offset_c = tu_idx_c * step_c + row_c * extra_step_c;
visit(1, offset_c, row_c, col_c, plane_bsize_c, tx_log2_c, arg);
visit(2, offset_c, row_c, col_c, plane_bsize_c, tx_log2_c, arg);
tu_idx_c++;
}
}
#endif
void av1_foreach_transformed_block_in_plane( void av1_foreach_transformed_block_in_plane(
const MACROBLOCKD *const xd, BLOCK_SIZE bsize, int plane, const MACROBLOCKD *const xd, BLOCK_SIZE bsize, int plane,
foreach_transformed_block_visitor visit, void *arg) { foreach_transformed_block_visitor visit, void *arg) {
......
...@@ -833,6 +833,17 @@ void av1_foreach_transformed_block(const MACROBLOCKD *const xd, ...@@ -833,6 +833,17 @@ void av1_foreach_transformed_block(const MACROBLOCKD *const xd,
foreach_transformed_block_visitor visit, foreach_transformed_block_visitor visit,
void *arg); void *arg);
#if CONFIG_COEF_INTERLEAVE
static INLINE int get_max_4x4_size(int num_4x4, int mb_to_edge,
int subsampling) {
return num_4x4 + (mb_to_edge >= 0 ? 0 : mb_to_edge >> (5 + subsampling));
}
void av1_foreach_transformed_block_interleave(
const MACROBLOCKD *const xd, BLOCK_SIZE bsize,
foreach_transformed_block_visitor visit, void *arg);
#endif
void av1_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd, void av1_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd,
TX_SIZE tx_size, int has_eob, int aoff, int loff); TX_SIZE tx_size, int has_eob, int aoff, int loff);
......
...@@ -1435,6 +1435,127 @@ static void decode_block(AV1Decoder *const pbi, MACROBLOCKD *const xd, ...@@ -1435,6 +1435,127 @@ static void decode_block(AV1Decoder *const pbi, MACROBLOCKD *const xd,
if (mbmi->skip) { if (mbmi->skip) {
dec_reset_skip_context(xd); dec_reset_skip_context(xd);
} }
#if CONFIG_COEF_INTERLEAVE
{
const struct macroblockd_plane *const pd_y = &xd->plane[0];
const struct macroblockd_plane *const pd_c = &xd->plane[1];
const TX_SIZE tx_log2_y = mbmi->tx_size;
const TX_SIZE tx_log2_c = get_uv_tx_size(mbmi, pd_c);
const int tx_sz_y = (1 << tx_log2_y);
const int tx_sz_c = (1 << tx_log2_c);
const int num_4x4_w_y = pd_y->n4_w;
const int num_4x4_h_y = pd_y->n4_h;
const int num_4x4_w_c = pd_c->n4_w;
const int num_4x4_h_c = pd_c->n4_h;
const int max_4x4_w_y = get_max_4x4_size(num_4x4_w_y, xd->mb_to_right_edge,
pd_y->subsampling_x);
const int max_4x4_h_y = get_max_4x4_size(num_4x4_h_y, xd->mb_to_bottom_edge,
pd_y->subsampling_y);
const int max_4x4_w_c = get_max_4x4_size(num_4x4_w_c, xd->mb_to_right_edge,
pd_c->subsampling_x);
const int max_4x4_h_c = get_max_4x4_size(num_4x4_h_c, xd->mb_to_bottom_edge,
pd_c->subsampling_y);
// The max_4x4_w/h may be smaller than tx_sz under some corner cases,
// i.e. when the SB is splitted by tile boundaries.
const int tu_num_w_y = (max_4x4_w_y + tx_sz_y - 1) / tx_sz_y;
const int tu_num_h_y = (max_4x4_h_y + tx_sz_y - 1) / tx_sz_y;
const int tu_num_w_c = (max_4x4_w_c + tx_sz_c - 1) / tx_sz_c;
const int tu_num_h_c = (max_4x4_h_c + tx_sz_c - 1) / tx_sz_c;
const int tu_num_y = tu_num_w_y * tu_num_h_y;
const int tu_num_c = tu_num_w_c * tu_num_h_c;
if (!is_inter_block(mbmi)) {
int tu_idx_c = 0;
int row_y, col_y, row_c, col_c;
int plane;
#if CONFIG_PALETTE
for (plane = 0; plane <= 1; ++plane) {
if (mbmi->palette_mode_info.palette_size[plane])
av1_decode_palette_tokens(xd, plane, r);
}
#endif
for (row_y = 0; row_y < tu_num_h_y; row_y++) {
for (col_y = 0; col_y < tu_num_w_y; col_y++) {
// luma
predict_and_reconstruct_intra_block(
cm, xd, r, mbmi, 0, row_y * tx_sz_y, col_y * tx_sz_y, tx_log2_y);
// chroma
if (tu_idx_c < tu_num_c) {
row_c = (tu_idx_c / tu_num_w_c) * tx_sz_c;
col_c = (tu_idx_c % tu_num_w_c) * tx_sz_c;
predict_and_reconstruct_intra_block(cm, xd, r, mbmi, 1, row_c,
col_c, tx_log2_c);
predict_and_reconstruct_intra_block(cm, xd, r, mbmi, 2, row_c,
col_c, tx_log2_c);
tu_idx_c++;
}
}
}
// In 422 case, it's possilbe that Chroma has more TUs than Luma
while (tu_idx_c < tu_num_c) {
row_c = (tu_idx_c / tu_num_w_c) * tx_sz_c;
col_c = (tu_idx_c % tu_num_w_c) * tx_sz_c;
predict_and_reconstruct_intra_block(cm, xd, r, mbmi, 1, row_c, col_c,
tx_log2_c);
predict_and_reconstruct_intra_block(cm, xd, r, mbmi, 2, row_c, col_c,
tx_log2_c);
tu_idx_c++;
}
} else {
// Prediction
av1_build_inter_predictors_sb(xd, mi_row, mi_col,
AOMMAX(bsize, BLOCK_8X8));
// Reconstruction
if (!mbmi->skip) {
int eobtotal = 0;
int tu_idx_c = 0;
int row_y, col_y, row_c, col_c;
for (row_y = 0; row_y < tu_num_h_y; row_y++) {
for (col_y = 0; col_y < tu_num_w_y; col_y++) {
// luma
eobtotal += reconstruct_inter_block(cm, xd, r, mbmi->segment_id, 0,
row_y * tx_sz_y,
col_y * tx_sz_y, tx_log2_y);
// chroma
if (tu_idx_c < tu_num_c) {
row_c = (tu_idx_c / tu_num_w_c) * tx_sz_c;
col_c = (tu_idx_c % tu_num_w_c) * tx_sz_c;
eobtotal += reconstruct_inter_block(cm, xd, r, mbmi->segment_id,
1, row_c, col_c, tx_log2_c);
eobtotal += reconstruct_inter_block(cm, xd, r, mbmi->segment_id,
2, row_c, col_c, tx_log2_c);
tu_idx_c++;
}
}
}
// In 422 case, it's possilbe that Chroma has more TUs than Luma
while (tu_idx_c < tu_num_c) {
row_c = (tu_idx_c / tu_num_w_c) * tx_sz_c;
col_c = (tu_idx_c % tu_num_w_c) * tx_sz_c;
eobtotal += reconstruct_inter_block(cm, xd, r, mbmi->segment_id, 1,
row_c, col_c, tx_log2_c);
eobtotal += reconstruct_inter_block(cm, xd, r, mbmi->segment_id, 2,
row_c, col_c, tx_log2_c);
tu_idx_c++;
}
if (bsize >= BLOCK_8X8 && eobtotal == 0)
#if CONFIG_MISC_FIXES
mbmi->has_no_coeffs = 1;
#else
mbmi->skip = 1;
#endif
}
}
}
#else
if (!is_inter_block(mbmi)) { if (!is_inter_block(mbmi)) {
int plane; int plane;
#if CONFIG_PALETTE #if CONFIG_PALETTE
...@@ -1545,6 +1666,7 @@ static void decode_block(AV1Decoder *const pbi, MACROBLOCKD *const xd, ...@@ -1545,6 +1666,7 @@ static void decode_block(AV1Decoder *const pbi, MACROBLOCKD *const xd,
} }
} }
} }
#endif
xd->corrupted |= aom_reader_has_error(r); xd->corrupted |= aom_reader_has_error(r);
} }
......
...@@ -1883,6 +1883,82 @@ static void write_modes_b(AV1_COMP *cpi, const TileInfo *const tile, ...@@ -1883,6 +1883,82 @@ static void write_modes_b(AV1_COMP *cpi, const TileInfo *const tile,
if (supertx_enabled) return; if (supertx_enabled) return;
#endif // CONFIG_SUPERTX #endif // CONFIG_SUPERTX
#if CONFIG_COEF_INTERLEAVE
if (!m->mbmi.skip) {
const struct macroblockd_plane *const pd_y = &xd->plane[0];
const struct macroblockd_plane *const pd_c = &xd->plane[1];
const TX_SIZE tx_log2_y = m->mbmi.tx_size;
const TX_SIZE tx_log2_c = get_uv_tx_size(&m->mbmi, pd_c);
const int tx_sz_y = (1 << tx_log2_y);
const int tx_sz_c = (1 << tx_log2_c);
const BLOCK_SIZE plane_bsize_y =
get_plane_block_size(AOMMAX(m->mbmi.sb_type, 3), pd_y);
const BLOCK_SIZE plane_bsize_c =
get_plane_block_size(AOMMAX(m->mbmi.sb_type, 3), pd_c);
const int num_4x4_w_y = num_4x4_blocks_wide_lookup[plane_bsize_y];
const int num_4x4_w_c = num_4x4_blocks_wide_lookup[plane_bsize_c];
const int num_4x4_h_y = num_4x4_blocks_high_lookup[plane_bsize_y];
const int num_4x4_h_c = num_4x4_blocks_high_lookup[plane_bsize_c];
const int max_4x4_w_y = get_max_4x4_size(num_4x4_w_y, xd->mb_to_right_edge,
pd_y->subsampling_x);
const int max_4x4_h_y = get_max_4x4_size(num_4x4_h_y, xd->mb_to_bottom_edge,
pd_y->subsampling_y);
const int max_4x4_w_c = get_max_4x4_size(num_4x4_w_c, xd->mb_to_right_edge,
pd_c->subsampling_x);
const int max_4x4_h_c = get_max_4x4_size(num_4x4_h_c, xd->mb_to_bottom_edge,
pd_c->subsampling_y);
// The max_4x4_w/h may be smaller than tx_sz under some corner cases,
// i.e. when the SB is splitted by tile boundaries.
const int tu_num_w_y = (max_4x4_w_y + tx_sz_y - 1) / tx_sz_y;
const int tu_num_h_y = (max_4x4_h_y + tx_sz_y - 1) / tx_sz_y;
const int tu_num_w_c = (max_4x4_w_c + tx_sz_c - 1) / tx_sz_c;
const int tu_num_h_c = (max_4x4_h_c + tx_sz_c - 1) / tx_sz_c;
const int tu_num_y = tu_num_w_y * tu_num_h_y;
const int tu_num_c = tu_num_w_c * tu_num_h_c;
int tu_idx_y = 0, tu_idx_c = 0;
TOKEN_STATS token_stats;
init_token_stats(&token_stats);
assert(*tok < tok_end);
while (tu_idx_y < tu_num_y) {
pack_mb_tokens(w, tok, tok_end, cm->bit_depth, tx_log2_y, &token_stats);
assert(*tok < tok_end && (*tok)->token == EOSB_TOKEN);
(*tok)++;
tu_idx_y++;
if (tu_idx_c < tu_num_c) {
pack_mb_tokens(w, tok, tok_end, cm->bit_depth, tx_log2_c, &token_stats);
assert(*tok < tok_end && (*tok)->token == EOSB_TOKEN);
(*tok)++;
pack_mb_tokens(w, tok, tok_end, cm->bit_depth, tx_log2_c, &token_stats);
assert(*tok < tok_end && (*tok)->token == EOSB_TOKEN);
(*tok)++;
tu_idx_c++;
}
}
// In 422 case, it's possilbe that Chroma has more TUs than Luma
while (tu_idx_c < tu_num_c) {
pack_mb_tokens(w, tok, tok_end, cm->bit_depth, tx_log2_c, &token_stats);
assert(*tok < tok_end && (*tok)->token == EOSB_TOKEN);
(*tok)++;
pack_mb_tokens(w, tok, tok_end, cm->bit_depth, tx_log2_c, &token_stats);
assert(*tok < tok_end && (*tok)->token == EOSB_TOKEN);
(*tok)++;
tu_idx_c++;
}
}
#else // CONFIG_COEF_INTERLEAVE
if (!m->mbmi.skip) { if (!m->mbmi.skip) {
assert(*tok < tok_end); assert(*tok < tok_end);
for (plane = 0; plane < MAX_MB_PLANE; ++plane) { for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
...@@ -1965,6 +2041,7 @@ static void write_modes_b(AV1_COMP *cpi, const TileInfo *const tile, ...@@ -1965,6 +2041,7 @@ static void write_modes_b(AV1_COMP *cpi, const TileInfo *const tile,
(*tok)++; (*tok)++;
} }
} }
#endif // CONFIG_COEF_INTERLEAVE
#else #else
// PVQ writes its tokens (i.e. symbols) here. // PVQ writes its tokens (i.e. symbols) here.
if (!m->mbmi.skip) { if (!m->mbmi.skip) {
......
...@@ -503,6 +503,11 @@ static void tokenize_b(int plane, int block, int blk_row, int blk_col, ...@@ -503,6 +503,11 @@ static void tokenize_b(int plane, int block, int blk_row, int blk_col,
++eob_branch[band[c]][pt]; ++eob_branch[band[c]][pt];
} }
#if CONFIG_COEF_INTERLEAVE
t->token = EOSB_TOKEN;
t++;
#endif
*tp = t; *tp = t;
#if CONFIG_ADAPT_SCAN #if CONFIG_ADAPT_SCAN
...@@ -725,6 +730,10 @@ void av1_tokenize_sb(const AV1_COMP *cpi, ThreadData *td, TOKENEXTRA **t, ...@@ -725,6 +730,10 @@ void av1_tokenize_sb(const AV1_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
#if !CONFIG_PVQ #if !CONFIG_PVQ
if (!dry_run) { if (!dry_run) {
#if CONFIG_COEF_INTERLEAVE
td->counts->skip[ctx][0] += skip_inc;
av1_foreach_transformed_block_interleave(xd, bsize, tokenize_b, &arg);
#else
int plane; int plane;
td->counts->skip[ctx][0] += skip_inc; td->counts->skip[ctx][0] += skip_inc;
...@@ -734,6 +743,7 @@ void av1_tokenize_sb(const AV1_COMP *cpi, ThreadData *td, TOKENEXTRA **t, ...@@ -734,6 +743,7 @@ void av1_tokenize_sb(const AV1_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
(*t)->token = EOSB_TOKEN; (*t)->token = EOSB_TOKEN;
(*t)++; (*t)++;
} }
#endif
} else if (dry_run == DRY_RUN_NORMAL) { } else if (dry_run == DRY_RUN_NORMAL) {
av1_foreach_transformed_block(xd, bsize, set_entropy_context_b, &arg); av1_foreach_transformed_block(xd, bsize, set_entropy_context_b, &arg);
} else if (dry_run == DRY_RUN_COSTCOEFFS) { } else if (dry_run == DRY_RUN_COSTCOEFFS) {
......
...@@ -296,6 +296,7 @@ EXPERIMENT_LIST=" ...@@ -296,6 +296,7 @@ EXPERIMENT_LIST="
simp_mv_pred simp_mv_pred
rd_debug rd_debug
reference_buffer reference_buffer
coef_interleave
" "
CONFIG_LIST=" CONFIG_LIST="
dependency_tracking dependency_tracking
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment