Commit c7af36d4 authored by Luc Trudeau's avatar Luc Trudeau

[CFL] Sub8x8 Validation Code Rewrite

Sub8x8 Validation code is changed to be more robust. The scope of the
validation is narrowed to validating that all of the required content in
the storage buffer was stored between CfL predictions. The early
termination used in the current mode decision code does not allow to
validate more than that.

This change does not change encoder output

BUG=aomedia:925

Change-Id: I7f1ed84da5037dcfaaf5da9cf33b4b8d664d2352
parent 84b05ac1
......@@ -647,10 +647,12 @@ typedef struct cfl_ctx {
int is_chroma_reference;
#if CONFIG_DEBUG
// The prediction used for sub8x8 blocks originates from multiple luma blocks,
// this array is used to validate that cfl_store() is called only once for
// each luma block
uint8_t sub8x8_val[CFL_SUB8X8_VAL_MI_SQUARE];
// Validation buffer is usually 2x2, except for 16x4 and 4x16 in that case it
// is 4x2 and 2x4 respectively. To simplify accessing the buffer we use a
// stride of CFL_SUB8X8_VAL_MI_SIZE resulting in a square of 16.
uint16_t sub8x8_val[CFL_SUB8X8_VAL_MI_SQUARE];
uint16_t store_counter;
uint16_t last_compute_counter;
#endif // CONFIG_DEBUG
} CFL_CTX;
#endif // CONFIG_CFL
......
......@@ -26,6 +26,8 @@ void cfl_init(CFL_CTX *cfl, AV1_COMMON *cm) {
cfl->store_y = 0;
#if CONFIG_DEBUG
cfl_clear_sub8x8_val(cfl);
cfl->store_counter = 0;
cfl->last_compute_counter = 0;
#endif // CONFIG_DEBUG
}
......@@ -463,16 +465,59 @@ static INLINE void sub8x8_adjust_offset(const CFL_CTX *cfl, int *row_out,
}
}
#if CONFIG_DEBUG
static INLINE void sub8x8_set_val(CFL_CTX *cfl, int row, int col, int val_high,
int val_wide) {
for (int val_r = 0; val_r < val_high; val_r++) {
assert(row + val_r < CFL_SUB8X8_VAL_MI_SIZE);
int row_off = (row + val_r) * CFL_SUB8X8_VAL_MI_SIZE;
for (int val_c = 0; val_c < val_wide; val_c++) {
assert(col + val_c < CFL_SUB8X8_VAL_MI_SIZE);
assert(cfl->sub8x8_val[row_off + col + val_c] == 0);
cfl->sub8x8_val[row_off + col + val_c]++;
// Since the chroma surface of sub8x8 block span across multiple luma blocks,
// this function validates that the reconstructed luma area required to predict
// the chroma block using CfL has been stored during the previous luma encode.
//
// Issue 1: Chroma intra prediction is not always performed after luma. One
// such example is when luma RD cost is really high and the mode decision
// algorithm decides to terminate instead of evaluating chroma.
//
// Issue 2: When multiple CfL predictions are computed for a given sub8x8
// block. The reconstructed luma that belongs to the non-reference sub8x8
// blocks must remain in the buffer (we cannot clear the buffer when we
// compute the CfL prediction
//
// To resolve these issues, we increment the store_counter on each store. if
// other sub8x8 blocks have already been coded and the counter corresponds to
// the previous value they are also set to the current value. If a sub8x8 block
// is not stored the store_counter won't match which will be detected when the
// CfL parements are computed.
static void sub8x8_set_val(CFL_CTX *cfl, int row, int col, TX_SIZE y_tx_size) {
const int y_tx_wide_unit = tx_size_wide_unit[y_tx_size];
const int y_tx_high_unit = tx_size_high_unit[y_tx_size];
// How many 4x4 are in tx_size
const int y_tx_unit_len = y_tx_wide_unit * y_tx_high_unit;
assert(y_tx_unit_len == 1 || y_tx_unit_len == 2 || y_tx_unit_len == 4);
// Invalidate other counters if (0,0)
const int is_first = row + col == 0;
cfl->store_counter += is_first ? 2 : 1;
const int inc =
(y_tx_wide_unit >= y_tx_high_unit) ? 1 : CFL_SUB8X8_VAL_MI_SIZE;
uint16_t *sub8x8_val = cfl->sub8x8_val + (row * CFL_SUB8X8_VAL_MI_SIZE + col);
for (int i = 0; i < y_tx_unit_len; i++) {
*sub8x8_val = cfl->store_counter;
sub8x8_val += inc;
}
if (!is_first) {
const uint16_t prev_store_counter = cfl->store_counter - 1;
int found = 0;
sub8x8_val = cfl->sub8x8_val;
for (int y = 0; y < CFL_SUB8X8_VAL_MI_SIZE; y++) {
for (int x = 0; x < CFL_SUB8X8_VAL_MI_SIZE; x++) {
if (sub8x8_val[x] == prev_store_counter) {
sub8x8_val[x] = cfl->store_counter;
found = 1;
}
}
sub8x8_val += CFL_SUB8X8_VAL_MI_SIZE;
}
// Something is wrong if (0,0) is missing
assert(found);
}
}
#endif // CONFIG_DEBUG
......@@ -483,15 +528,13 @@ void cfl_store_tx(MACROBLOCKD *const xd, int row, int col, TX_SIZE tx_size,
struct macroblockd_plane *const pd = &xd->plane[AOM_PLANE_Y];
uint8_t *dst =
&pd->dst.buf[(row * pd->dst.stride + col) << tx_size_wide_log2[0]];
(void)bsize;
if (block_size_high[bsize] == 4 || block_size_wide[bsize] == 4) {
// Only dimensions of size 4 can have an odd offset.
assert(!((col & 1) && tx_size_wide[tx_size] != 4));
assert(!((row & 1) && tx_size_high[tx_size] != 4));
sub8x8_adjust_offset(cfl, &row, &col);
#if CONFIG_DEBUG
sub8x8_set_val(cfl, row, col, tx_size_high_unit[tx_size],
tx_size_wide_unit[tx_size]);
sub8x8_set_val(cfl, row, col, tx_size);
#endif // CONFIG_DEBUG
}
cfl_store(cfl, dst, pd->dst.stride, row, col, tx_size_wide[tx_size],
......@@ -507,7 +550,12 @@ void cfl_store_block(MACROBLOCKD *const xd, BLOCK_SIZE bsize, TX_SIZE tx_size) {
if (block_size_high[bsize] == 4 || block_size_wide[bsize] == 4) {
sub8x8_adjust_offset(cfl, &row, &col);
#if CONFIG_DEBUG
sub8x8_set_val(cfl, row, col, mi_size_high[bsize], mi_size_wide[bsize]);
// Point to the last transform block inside the partition.
const int off_row =
row + (mi_size_high[bsize] - tx_size_high_unit[tx_size]);
const int off_col =
col + (mi_size_wide[bsize] - tx_size_wide_unit[tx_size]);
sub8x8_set_val(cfl, off_row, off_col, tx_size);
#endif // CONFIG_DEBUG
}
const int width = max_intra_block_width(xd, bsize, AOM_PLANE_Y, tx_size);
......@@ -526,13 +574,24 @@ void cfl_compute_parameters(MACROBLOCKD *const xd, TX_SIZE tx_size) {
const BLOCK_SIZE plane_bsize = AOMMAX(
BLOCK_4X4, get_plane_block_size(mbmi->sb_type, &xd->plane[AOM_PLANE_U]));
#if CONFIG_DEBUG
if (mbmi->sb_type < BLOCK_8X8) {
for (int val_r = 0; val_r < mi_size_high[mbmi->sb_type]; val_r++) {
for (int val_c = 0; val_c < mi_size_wide[mbmi->sb_type]; val_c++) {
assert(cfl->sub8x8_val[val_r * CFL_SUB8X8_VAL_MI_SIZE + val_c] == 1);
BLOCK_SIZE bsize = mbmi->sb_type;
if (block_size_high[bsize] == 4 || block_size_wide[bsize] == 4) {
const uint16_t compute_counter = cfl->sub8x8_val[0];
assert(compute_counter != cfl->last_compute_counter);
bsize = scale_chroma_bsize(bsize, cfl->subsampling_x, cfl->subsampling_y);
const int val_wide = mi_size_wide[bsize];
const int val_high = mi_size_high[bsize];
assert(val_wide <= CFL_SUB8X8_VAL_MI_SIZE);
assert(val_high <= CFL_SUB8X8_VAL_MI_SIZE);
for (int val_r = 0; val_r < val_high; val_r++) {
for (int val_c = 0; val_c < val_wide; val_c++) {
// If all counters in the validation buffer are equal then they are all
// related to the same chroma reference block.
assert(cfl->sub8x8_val[val_r * CFL_SUB8X8_VAL_MI_SIZE + val_c] ==
compute_counter);
}
}
cfl_clear_sub8x8_val(cfl);
cfl->last_compute_counter = compute_counter;
}
#endif // CONFIG_DEBUG
// AOM_PLANE_U is used, but both planes will have the same sizes.
......
......@@ -911,11 +911,6 @@ static void decode_token_and_recon_block(AV1Decoder *const pbi,
}
#if CONFIG_CFL
if (mbmi->uv_mode != UV_CFL_PRED) {
#if CONFIG_DEBUG
if (cfl->is_chroma_reference) {
cfl_clear_sub8x8_val(cfl);
}
#endif
if (!cfl->is_chroma_reference && is_inter_block(mbmi)) {
cfl_store_block(xd, mbmi->sb_type, mbmi->tx_size);
}
......
......@@ -2805,12 +2805,6 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
}
restore_context(x, &x_ctx, mi_row, mi_col, bsize);
#if CONFIG_CFL && CONFIG_DEBUG
if (!x->skip_chroma_rd) {
cfl_clear_sub8x8_val(xd->cfl);
}
#endif // CONFIG_CFL && CONFIG_DEBUG
}
// store estimated motion vector
......@@ -2862,11 +2856,6 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
}
#endif // CONFIG_DIST_8X8
#if CONFIG_CFL && CONFIG_DEBUG
if (!reached_last_index && sum_rdc.rdcost >= best_rdc.rdcost)
cfl_clear_sub8x8_val(xd->cfl);
#endif // CONFIG_CFL && CONFIG_DEBUG
if (reached_last_index && sum_rdc.rdcost < best_rdc.rdcost) {
sum_rdc.rate += partition_cost[PARTITION_SPLIT];
sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
......@@ -2950,9 +2939,6 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
#endif // CONFIG_DIST_8X8
}
#if CONFIG_CFL && CONFIG_DEBUG
cfl_clear_sub8x8_val(xd->cfl);
#endif // CONFIG_CFL && CONFIG_DEBUG
if (sum_rdc.rdcost < best_rdc.rdcost) {
sum_rdc.rate += partition_cost[PARTITION_HORZ];
sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
......@@ -3030,10 +3016,6 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
#endif // CONFIG_DIST_8X8
}
#if CONFIG_CFL && CONFIG_DEBUG
cfl_clear_sub8x8_val(xd->cfl);
#endif // CONFIG_CFL && CONFIG_DEBUG
if (sum_rdc.rdcost < best_rdc.rdcost) {
sum_rdc.rate += partition_cost[PARTITION_VERT];
sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
......@@ -4650,13 +4632,6 @@ static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td,
}
#if CONFIG_CFL
xd->cfl->store_y = 0;
#if CONFIG_DEBUG
if (is_chroma_reference(mi_row, mi_col, bsize, xd->cfl->subsampling_x,
xd->cfl->subsampling_y) &&
!xd->cfl->are_parameters_computed) {
cfl_clear_sub8x8_val(xd->cfl);
}
#endif // CONFIG_DEBUG
#endif // CONFIG_CFL
if (!dry_run) {
sum_intra_stats(td->counts, xd, mi, xd->above_mi, xd->left_mi,
......@@ -4836,13 +4811,6 @@ static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td,
}
#if CONFIG_CFL
CFL_CTX *const cfl = xd->cfl;
#if CONFIG_DEBUG
if (is_chroma_reference(mi_row, mi_col, bsize, cfl->subsampling_x,
cfl->subsampling_y) &&
!cfl->are_parameters_computed) {
cfl_clear_sub8x8_val(cfl);
}
#endif // CONFIG_DEBUG
if (is_inter_block(mbmi) &&
!is_chroma_reference(mi_row, mi_col, bsize, cfl->subsampling_x,
cfl->subsampling_y)) {
......
......@@ -2110,18 +2110,17 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
tx_size, &this_rd_stats.dist, &this_rd_stats.sse,
OUTPUT_HAS_PREDICTED_PIXELS);
}
rd = RDCOST(x->rdmult, 0, this_rd_stats.dist);
if (args->this_rd + rd > args->best_rd) {
args->exit_early = 1;
return;
}
#if CONFIG_CFL
if (plane == AOM_PLANE_Y && xd->cfl->store_y) {
assert(!is_inter_block(mbmi) || plane_bsize < BLOCK_8X8);
cfl_store_tx(xd, blk_row, blk_col, tx_size, plane_bsize);
}
#endif // CONFIG_CFL
rd = RDCOST(x->rdmult, 0, this_rd_stats.dist);
if (args->this_rd + rd > args->best_rd) {
args->exit_early = 1;
return;
}
const PLANE_TYPE plane_type = get_plane_type(plane);
const TX_TYPE tx_type =
av1_get_tx_type(plane_type, xd, blk_row, blk_col, block, tx_size);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment