Commit b7b60c57 authored by Yushin Cho's avatar Yushin Cho

New experiment DIST_8x8

A framework for computing a distortion at 8x8 luma block level
during RDO-based mode decision search. New 8x8 distortion metric can
be plugged in by way of this tool.

Existing daala_dist now uses this experiment as well.
Other possible applications that can make use of this experiment would be
a distortion meric, which should apply at 8x8 pixels such as PSNR-HVS, SSIM, or etc.

A rd_cost for final coding mode decision for a super block is
computed for a partition size 8x8 or larger. For a block larger than 8x8,
a distortion of each 8x8 block is independently computed then summed up.

The rd_cost for 8x8 block with new 8x8 distortion metric is computed
only when the mode decision of its sub8x8 blocks are completed.
However, MSE distortion metric is used with sub8x8 mode decision. Thus,
early termination is also determined with the MSE based rd_cost.
Because the best rd_cost (i.e. the reference rd_cost) during sub8x8 prediction
or sub8x8 tx is based on new 8x8 distortion while each sub8x8 uses MSE,
the existing early termination cannot be used (And this can be the one of possible reason
for the BD-Rate change with this revision).

For a sub8x8 prediction, prediction mode for each sub8x8 block of a 8x8 block is
decided with existing MSE and then av1_dist_8x8() is applied to the 8x8 pixels.
(There is also av1_dist_8x8_diff, which can input diff signal directly)

For a sub8x8 tx in a block larger than 8x8, instead of computing MSE distortion for
each sub8x8 tx block, we wait until all sub8x8 tx blocks are encoded before av1_dist_8x8()
is applied to 8x8 pixels.

Sub8x8 prediction and transformas were most of tricky parts in this change.
Two kind of distortions, for a) predicted pixels and b) decoded pixels
(i.e. predicted + possible reconstructed residue), are always computed during RDO.
In order to access those two signals a) and b) for a 8x8 block after
its sub8x8 mode decision is finished, a) and b) need be properly stored for later retrieval.

The CB4X4 makes the task of accessing a) and b) signals for sub8x8 block further difficult,
since the intermediate data (i.e. a and/or b) for sub8x8 block
are not easily accessible outside of current partition unless reconstruced
with decided coding modes.

Change-Id: If60301a890c0674a3de1d8206965bbd6a6495bb7
parent 68ad7a6e
......@@ -329,7 +329,7 @@ typedef struct RD_STATS {
int64_t rdcost;
int64_t sse;
int skip; // sse should equal to dist when skip == 1
#if CONFIG_DAALA_DIST && CONFIG_CB4X4
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
int64_t dist_y;
#endif
#if CONFIG_RD_DEBUG
......@@ -608,8 +608,10 @@ typedef struct macroblockd_plane {
const dequant_val_type_nuq *dequant_val_nuq[QUANT_PROFILES];
#endif // CONFIG_NEW_QUANT
#if CONFIG_PVQ || CONFIG_DAALA_DIST
#if CONFIG_PVQ || CONFIG_DIST_8X8
DECLARE_ALIGNED(16, int16_t, pred[MAX_SB_SQUARE]);
#endif
#if CONFIG_PVQ
// PVQ: forward transformed predicted image, a reference for PVQ.
tran_low_t *pvq_ref_coeff;
#endif
......
......@@ -205,7 +205,7 @@ struct macroblock {
int pvq_speed;
int pvq_coded; // Indicates whether pvq_info needs be stored to tokenize
#endif
#if CONFIG_DAALA_DIST
#if CONFIG_DIST_8X8
#if CONFIG_CB4X4
#if CONFIG_HIGHBITDEPTH
DECLARE_ALIGNED(16, uint16_t, decoded_8x8[8 * 8]);
......@@ -213,7 +213,7 @@ struct macroblock {
DECLARE_ALIGNED(16, uint8_t, decoded_8x8[8 * 8]);
#endif
#endif // CONFIG_CB4X4
#endif // CONFIG_DAALA_DIST
#endif // CONFIG_DIST_8X8
#if CONFIG_CFL
// Whether luma needs to be stored during RDO.
int cfl_store_y;
......
......@@ -1315,10 +1315,10 @@ static int set_segment_rdmult(const AV1_COMP *const cpi, MACROBLOCK *const x,
return av1_compute_rd_mult(cpi, segment_qindex + cm->y_dc_delta_q);
}
#if CONFIG_DAALA_DIST && CONFIG_CB4X4
static void daala_dist_set_sub8x8_dst(MACROBLOCK *const x, uint8_t *dst8x8,
BLOCK_SIZE bsize, int bw, int bh,
int mi_row, int mi_col) {
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
static void dist_8x8_set_sub8x8_dst(MACROBLOCK *const x, uint8_t *dst8x8,
BLOCK_SIZE bsize, int bw, int bh,
int mi_row, int mi_col) {
MACROBLOCKD *const xd = &x->e_mbd;
struct macroblockd_plane *const pd = &xd->plane[0];
const int dst_stride = pd->dst.stride;
......@@ -3729,7 +3729,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
&this_rdc, best_rdc.rdcost - sum_rdc.rdcost, pc_tree->split[idx]);
#endif // CONFIG_SUPERTX
#if CONFIG_DAALA_DIST && CONFIG_CB4X4
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
if (bsize == BLOCK_8X8 && this_rdc.rate != INT_MAX) {
assert(this_rdc.dist_y < INT64_MAX);
}
......@@ -3747,7 +3747,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
#if CONFIG_SUPERTX
sum_rate_nocoef += this_rate_nocoef;
#endif // CONFIG_SUPERTX
#if CONFIG_DAALA_DIST && CONFIG_CB4X4
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
if (bsize == BLOCK_8X8) {
assert(this_rdc.dist_y < INT64_MAX);
sum_rdc.dist_y += this_rdc.dist_y;
......@@ -3757,11 +3757,10 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
}
reached_last_index = (idx == 4);
#if CONFIG_DAALA_DIST && CONFIG_CB4X4
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
if (reached_last_index && sum_rdc.rdcost != INT64_MAX &&
bsize == BLOCK_8X8) {
int use_activity_masking = 0;
int64_t daala_dist;
int64_t dist_8x8;
const int src_stride = x->plane[0].src.stride;
uint8_t *decoded_8x8;
......@@ -3772,19 +3771,16 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
#endif
decoded_8x8 = (uint8_t *)x->decoded_8x8;
#if CONFIG_PVQ
use_activity_masking = x->daala_enc.use_activity_masking;
#endif
daala_dist =
av1_daala_dist(xd, x->plane[0].src.buf - 4 * src_stride - 4,
src_stride, decoded_8x8, 8, 8, 8, 8, 8, 1,
use_activity_masking, x->qindex)
dist_8x8 =
av1_dist_8x8(cpi, xd, x->plane[0].src.buf - 4 * src_stride - 4,
src_stride, decoded_8x8, 8, BLOCK_8X8, 8, 8, 8, 8,
x->qindex)
<< 4;
assert(sum_rdc.dist_y < INT64_MAX);
sum_rdc.dist = sum_rdc.dist - sum_rdc.dist_y + daala_dist;
sum_rdc.dist = sum_rdc.dist - sum_rdc.dist_y + dist_8x8;
sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
}
#endif // CONFIG_DAALA_DIST && CONFIG_CB4X4
#endif // CONFIG_DIST_8X8 && CONFIG_CB4X4
#if CONFIG_SUPERTX
if (supertx_allowed && sum_rdc.rdcost < INT64_MAX && reached_last_index) {
......@@ -3922,14 +3918,14 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
best_rdc.rdcost - sum_rdc.rdcost);
#endif // CONFIG_SUPERTX
#if CONFIG_DAALA_DIST && CONFIG_CB4X4
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
if (this_rdc.rate != INT_MAX && bsize == BLOCK_8X8) {
update_state(cpi, td, &pc_tree->horizontal[1], mi_row + mi_step, mi_col,
subsize, DRY_RUN_NORMAL);
encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row + mi_step, mi_col,
subsize, NULL);
}
#endif // CONFIG_DAALA_DIST && CONFIG_CB4X4
#endif // CONFIG_DIST_8X8 && CONFIG_CB4X4
if (this_rdc.rate == INT_MAX) {
sum_rdc.rdcost = INT64_MAX;
......@@ -3943,14 +3939,13 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
#if CONFIG_SUPERTX
sum_rate_nocoef += this_rate_nocoef;
#endif // CONFIG_SUPERTX
#if CONFIG_DAALA_DIST && CONFIG_CB4X4
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
sum_rdc.dist_y += this_rdc.dist_y;
#endif
}
#if CONFIG_DAALA_DIST && CONFIG_CB4X4
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
if (sum_rdc.rdcost != INT64_MAX && bsize == BLOCK_8X8) {
int use_activity_masking = 0;
int64_t daala_dist;
int64_t dist_8x8;
const int src_stride = x->plane[0].src.stride;
uint8_t *decoded_8x8;
......@@ -3961,17 +3956,14 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
#endif
decoded_8x8 = (uint8_t *)x->decoded_8x8;
#if CONFIG_PVQ
use_activity_masking = x->daala_enc.use_activity_masking;
#endif
daala_dist = av1_daala_dist(xd, x->plane[0].src.buf - 4 * src_stride,
src_stride, decoded_8x8, 8, 8, 8, 8, 8, 1,
use_activity_masking, x->qindex)
<< 4;
sum_rdc.dist = sum_rdc.dist - sum_rdc.dist_y + daala_dist;
dist_8x8 = av1_dist_8x8(cpi, xd, x->plane[0].src.buf - 4 * src_stride,
src_stride, decoded_8x8, 8, BLOCK_8X8, 8, 8, 8,
8, x->qindex)
<< 4;
sum_rdc.dist = sum_rdc.dist - sum_rdc.dist_y + dist_8x8;
sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
}
#endif // CONFIG_DAALA_DIST && CONFIG_CB4X4
#endif // CONFIG_DIST_8X8 && CONFIG_CB4X4
}
#if CONFIG_SUPERTX
......@@ -4106,14 +4098,14 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
best_rdc.rdcost - sum_rdc.rdcost);
#endif // CONFIG_SUPERTX
#if CONFIG_DAALA_DIST && CONFIG_CB4X4
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
if (this_rdc.rate != INT_MAX && bsize == BLOCK_8X8) {
update_state(cpi, td, &pc_tree->vertical[1], mi_row, mi_col + mi_step,
subsize, DRY_RUN_NORMAL);
encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row, mi_col + mi_step,
subsize, NULL);
}
#endif // CONFIG_DAALA_DIST && CONFIG_CB4X4
#endif // CONFIG_DIST_8X8 && CONFIG_CB4X4
if (this_rdc.rate == INT_MAX) {
sum_rdc.rdcost = INT64_MAX;
......@@ -4127,14 +4119,13 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
#if CONFIG_SUPERTX
sum_rate_nocoef += this_rate_nocoef;
#endif // CONFIG_SUPERTX
#if CONFIG_DAALA_DIST && CONFIG_CB4X4
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
sum_rdc.dist_y += this_rdc.dist_y;
#endif
}
#if CONFIG_DAALA_DIST && CONFIG_CB4X4
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
if (sum_rdc.rdcost != INT64_MAX && bsize == BLOCK_8X8) {
int use_activity_masking = 0;
int64_t daala_dist;
int64_t dist_8x8;
const int src_stride = x->plane[0].src.stride;
uint8_t *decoded_8x8;
......@@ -4145,17 +4136,14 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
#endif
decoded_8x8 = (uint8_t *)x->decoded_8x8;
#if CONFIG_PVQ
use_activity_masking = x->daala_enc.use_activity_masking;
#endif
daala_dist =
av1_daala_dist(xd, x->plane[0].src.buf - 4, src_stride, decoded_8x8,
8, 8, 8, 8, 8, 1, use_activity_masking, x->qindex)
dist_8x8 =
av1_dist_8x8(cpi, xd, x->plane[0].src.buf - 4, src_stride,
decoded_8x8, 8, BLOCK_8X8, 8, 8, 8, 8, x->qindex)
<< 4;
sum_rdc.dist = sum_rdc.dist - sum_rdc.dist_y + daala_dist;
sum_rdc.dist = sum_rdc.dist - sum_rdc.dist_y + dist_8x8;
sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
}
#endif // CONFIG_DAALA_DIST && CONFIG_CB4X4
#endif // CONFIG_DIST_8X8 && CONFIG_CB4X4
}
#if CONFIG_SUPERTX
if (supertx_allowed && sum_rdc.rdcost < INT64_MAX && !abort_flag) {
......@@ -4405,11 +4393,11 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
(void)best_rd;
*rd_cost = best_rdc;
#if CONFIG_DAALA_DIST && CONFIG_CB4X4
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
if (bsize <= BLOCK_8X8 && rd_cost->rate != INT_MAX) {
assert(rd_cost->dist_y < INT64_MAX);
}
#endif // CONFIG_DAALA_DIST && CONFIG_CB4X4
#endif // CONFIG_DIST_8X8 && CONFIG_CB4X4
#if CONFIG_SUPERTX
*rate_nocoef = best_rate_nocoef;
#endif // CONFIG_SUPERTX
......@@ -4435,13 +4423,13 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
x->cfl_store_y = 0;
#endif
#if CONFIG_DAALA_DIST && CONFIG_CB4X4
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX &&
bsize == BLOCK_4X4 && pc_tree->index == 3) {
encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize,
pc_tree, NULL);
}
#endif // CONFIG_DAALA_DIST && CONFIG_CB4X4
#endif // CONFIG_DIST_8X8 && CONFIG_CB4X4
if (bsize == cm->sb_size) {
#if !CONFIG_PVQ && !CONFIG_LV_MAP
......@@ -6053,11 +6041,11 @@ static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td,
#endif
}
#if CONFIG_DAALA_DIST && CONFIG_CB4X4
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
if (bsize < BLOCK_8X8) {
daala_dist_set_sub8x8_dst(x, (uint8_t *)x->decoded_8x8, bsize,
block_size_wide[bsize], block_size_high[bsize],
mi_row, mi_col);
dist_8x8_set_sub8x8_dst(x, (uint8_t *)x->decoded_8x8, bsize,
block_size_wide[bsize], block_size_high[bsize],
mi_row, mi_col);
}
#endif
......
......@@ -503,7 +503,7 @@ void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block,
AV1_XFORM_QUANT xform_quant_idx) {
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
#if !(CONFIG_PVQ || CONFIG_DAALA_DIST)
#if !(CONFIG_PVQ || CONFIG_DIST_8X8)
const struct macroblock_plane *const p = &x->plane[plane];
const struct macroblockd_plane *const pd = &xd->plane[plane];
#else
......@@ -538,10 +538,10 @@ void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block,
TxfmParam txfm_param;
#if CONFIG_PVQ || CONFIG_DAALA_DIST || CONFIG_LGT
#if CONFIG_PVQ || CONFIG_DIST_8X8 || CONFIG_LGT
uint8_t *dst;
const int dst_stride = pd->dst.stride;
#if CONFIG_PVQ || CONFIG_DAALA_DIST
#if CONFIG_PVQ || CONFIG_DIST_8X8
int16_t *pred;
const int txw = tx_size_wide[tx_size];
const int txh = tx_size_high[tx_size];
......@@ -601,9 +601,9 @@ void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block,
#endif // CONFIG_HIGHBITDEPTH
#endif
#if CONFIG_PVQ || CONFIG_DAALA_DIST || CONFIG_LGT
#if CONFIG_PVQ || CONFIG_DIST_8X8 || CONFIG_LGT
dst = &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
#if CONFIG_PVQ || CONFIG_DAALA_DIST
#if CONFIG_PVQ || CONFIG_DIST_8X8
pred = &pd->pred[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]];
// copy uint8 orig and predicted block to int16 buffer
......@@ -622,8 +622,8 @@ void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block,
#if CONFIG_HIGHBITDEPTH
}
#endif // CONFIG_HIGHBITDEPTH
#endif // CONFIG_PVQ || CONFIG_DAALA_DIST
#endif // CONFIG_PVQ || CONFIG_DAALA_DIST || CONFIG_LGT
#endif // CONFIG_PVQ || CONFIG_DIST_8X8
#endif // CONFIG_PVQ || CONFIG_DIST_8X8 || CONFIG_LGT
(void)ctx;
......
......@@ -457,7 +457,7 @@ static INLINE void av1_init_rd_stats(RD_STATS *rd_stats) {
rd_stats->rdcost = 0;
rd_stats->sse = 0;
rd_stats->skip = 1;
#if CONFIG_DAALA_DIST && CONFIG_CB4X4
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
rd_stats->dist_y = 0;
#endif
#if CONFIG_RD_DEBUG
......@@ -484,7 +484,7 @@ static INLINE void av1_invalid_rd_stats(RD_STATS *rd_stats) {
rd_stats->rdcost = INT64_MAX;
rd_stats->sse = INT64_MAX;
rd_stats->skip = 0;
#if CONFIG_DAALA_DIST && CONFIG_CB4X4
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
rd_stats->dist_y = INT64_MAX;
#endif
#if CONFIG_RD_DEBUG
......@@ -511,7 +511,7 @@ static INLINE void av1_merge_rd_stats(RD_STATS *rd_stats_dst,
rd_stats_dst->dist += rd_stats_src->dist;
rd_stats_dst->sse += rd_stats_src->sse;
rd_stats_dst->skip &= rd_stats_src->skip;
#if CONFIG_DAALA_DIST && CONFIG_CB4X4
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
rd_stats_dst->dist_y += rd_stats_src->dist_y;
#endif
#if CONFIG_RD_DEBUG
......
This diff is collapsed.
......@@ -72,11 +72,11 @@ void av1_dist_block(const AV1_COMP *cpi, MACROBLOCK *x, int plane,
TX_SIZE tx_size, int64_t *out_dist, int64_t *out_sse,
OUTPUT_STATUS output_status);
#if CONFIG_DAALA_DIST
int64_t av1_daala_dist(const MACROBLOCKD *xd, const uint8_t *src,
int src_stride, const uint8_t *dst, int dst_stride,
int bsw, int bsh, int visible_w, int visible_h, int qm,
int use_activity_masking, int qindex);
#if CONFIG_DIST_8X8
int64_t av1_dist_8x8(const AV1_COMP *const cpi, const MACROBLOCKD *xd,
const uint8_t *src, int src_stride, const uint8_t *dst,
int dst_stride, const BLOCK_SIZE tx_bsize, int bsw,
int bsh, int visible_w, int visible_h, int qindex);
#endif
#if !CONFIG_PVQ || CONFIG_VAR_TX
......
......@@ -174,3 +174,4 @@ set(CONFIG_DAALA_DCT4 0 CACHE NUMBER "Internal flag.")
set(CONFIG_GF_GROUPS 0 CACHE NUMBER "Internal flag.")
set(CONFIG_MRC_TX 0 CACHE NUMBER "Internal flag.")
set(CONFIG_INTER_STATS_ONLY 0 CACHE NUMBER "Internal flag.")
set(CONFIG_DIST_8x8 0 CACHE NUMBER "Internal flag.")
......@@ -312,6 +312,7 @@ EXPERIMENT_LIST="
entropy_stats
masked_tx
dependent_horztiles
dist_8x8
daala_dist
tripred
palette_throughput
......@@ -529,6 +530,7 @@ post_process_cmdline() {
soft_enable tempmv_signaling
# Fix up experiment dependencies
enabled daala_dist && enable_feature dist_8x8
enabled pvq && disable_feature chroma_2x2
enabled pvq && disable_feature rect_tx
enabled pvq && disable_feature ext_tx
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment