Commit 8e75e8bb authored by Yushin Cho's avatar Yushin Cho

Improve dist-8x8

Improve dist-8x8 when computing 8x8 yuv dist for sub8x8.

To apply dist-8x8 for sub8x8 partitions, once mode decision for
sub8x8 partitions are finished then dist-8x8 is computed on 8x8 window.
Since dist-8x8 is only for luma, chroma distortion should be identified.

Previously, it has been hard to be free from potential bugs doing this,
due to the complex inter mode search code.

The new method is less-error-prone, which computes uv distortion (in MSE) after
the mode decisions for all of sub8x8 blocks in a 8x8 window are finished,
when the dist-8x8 distortion for luma 8x8 pixels are computed with
new distortion metric.

All the code separating y and uv distortion in inter mode search has
been removed in this commit.

Change-Id: Ieaccb7915df5faeb5e89a7e70b2b7cbac65231af
parent d2630fa4
......@@ -344,9 +344,6 @@ typedef struct RD_STATS {
int64_t ref_rdcost;
int zero_rate;
uint8_t invalid_rate;
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
int64_t dist_y;
#endif
#if CONFIG_RD_DEBUG
int txb_coeff_cost[MAX_MB_PLANE];
#if CONFIG_VAR_TX
......
......@@ -3370,6 +3370,54 @@ static void rd_test_partition3(
}
#endif // CONFIG_EXT_PARTITION_TYPES
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
static int64_t dist_8x8_yuv(const AV1_COMP *const cpi, MACROBLOCK *const x,
uint8_t *y_src_8x8) {
MACROBLOCKD *const xd = &x->e_mbd;
int64_t dist_8x8, dist_8x8_uv, total_dist;
const int src_stride = x->plane[0].src.stride;
uint8_t *decoded_8x8;
int plane;
#if CONFIG_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
decoded_8x8 = CONVERT_TO_BYTEPTR(x->decoded_8x8);
else
#endif
decoded_8x8 = (uint8_t *)x->decoded_8x8;
dist_8x8 = av1_dist_8x8(cpi, x, y_src_8x8, src_stride, decoded_8x8, 8,
BLOCK_8X8, 8, 8, 8, 8, x->qindex)
<< 4;
// Compute chroma distortion for a luma 8x8 block
dist_8x8_uv = 0;
for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
const int src_stride_uv = x->plane[plane].src.stride;
const int dst_stride_uv = xd->plane[plane].dst.stride;
// uv buff pointers now (i.e. the last sub8x8 block) is the same
// to those at the first sub8x8 block because
// uv buff pointer is set only once at first sub8x8 block in a 8x8.
uint8_t *src_uv = x->plane[plane].src.buf;
uint8_t *dst_uv = xd->plane[plane].dst.buf;
unsigned sse;
#if CONFIG_CHROMA_SUB8X8
const BLOCK_SIZE plane_bsize =
AOMMAX(BLOCK_4X4, get_plane_block_size(BLOCK_8X8, &xd->plane[plane]));
#else
const BLOCK_SIZE plane_bsize =
get_plane_block_size(BLOCK_8X8, &xd->plane[plane]);
#endif
cpi->fn_ptr[plane_bsize].vf(src_uv, src_stride_uv, dst_uv, dst_stride_uv,
&sse);
dist_8x8_uv += (int64_t)sse << 4;
}
return total_dist = dist_8x8 + dist_8x8_uv;
}
#endif // CONFIG_DIST_8X8 && CONFIG_CB4X4
// TODO(jingning,jimbankoski,rbultje): properly skip partition types that are
// unlikely to be selected depending on previous rate-distortion optimization
// results, for encoding speed-up.
......@@ -3819,12 +3867,6 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
temp_best_rdcost - sum_rdc.rdcost,
pc_tree->split[idx]);
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
if (x->using_dist_8x8 && bsize == BLOCK_8X8 &&
this_rdc.rate != INT_MAX) {
assert(this_rdc.dist_y < INT64_MAX);
}
#endif
if (this_rdc.rate == INT_MAX) {
sum_rdc.rdcost = INT64_MAX;
#if CONFIG_SUPERTX
......@@ -3838,12 +3880,6 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
#if CONFIG_SUPERTX
sum_rate_nocoef += this_rate_nocoef;
#endif // CONFIG_SUPERTX
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
if (x->using_dist_8x8 && bsize == BLOCK_8X8) {
assert(this_rdc.dist_y < INT64_MAX);
sum_rdc.dist_y += this_rdc.dist_y;
}
#endif
}
}
reached_last_index = (idx == 4);
......@@ -3851,24 +3887,11 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
if (x->using_dist_8x8 && reached_last_index &&
sum_rdc.rdcost != INT64_MAX && bsize == BLOCK_8X8) {
int64_t dist_8x8;
const int src_stride = x->plane[0].src.stride;
uint8_t *decoded_8x8;
#if CONFIG_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
decoded_8x8 = CONVERT_TO_BYTEPTR(x->decoded_8x8);
else
#endif
decoded_8x8 = (uint8_t *)x->decoded_8x8;
int64_t dist_8x8;
dist_8x8 =
av1_dist_8x8(cpi, x, x->plane[0].src.buf - 4 * src_stride - 4,
src_stride, decoded_8x8, 8, BLOCK_8X8, 8, 8, 8, 8,
x->qindex)
<< 4;
assert(sum_rdc.dist_y < INT64_MAX);
sum_rdc.dist = sum_rdc.dist - sum_rdc.dist_y + dist_8x8;
dist_8x8_yuv(cpi, x, x->plane[0].src.buf - 4 * src_stride - 4);
sum_rdc.dist = dist_8x8;
sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
}
#endif // CONFIG_DIST_8X8 && CONFIG_CB4X4
......@@ -4029,29 +4052,14 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
#if CONFIG_SUPERTX
sum_rate_nocoef += this_rate_nocoef;
#endif // CONFIG_SUPERTX
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
if (x->using_dist_8x8) sum_rdc.dist_y += this_rdc.dist_y;
#endif
}
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
if (x->using_dist_8x8 && sum_rdc.rdcost != INT64_MAX &&
bsize == BLOCK_8X8) {
int64_t dist_8x8;
const int src_stride = x->plane[0].src.stride;
uint8_t *decoded_8x8;
#if CONFIG_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
decoded_8x8 = CONVERT_TO_BYTEPTR(x->decoded_8x8);
else
#endif
decoded_8x8 = (uint8_t *)x->decoded_8x8;
dist_8x8 = av1_dist_8x8(cpi, x, x->plane[0].src.buf - 4 * src_stride,
src_stride, decoded_8x8, 8, BLOCK_8X8, 8, 8, 8,
8, x->qindex)
<< 4;
sum_rdc.dist = sum_rdc.dist - sum_rdc.dist_y + dist_8x8;
int64_t dist_8x8;
dist_8x8 = dist_8x8_yuv(cpi, x, x->plane[0].src.buf - 4 * src_stride);
sum_rdc.dist = dist_8x8;
sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
}
#endif // CONFIG_DIST_8X8 && CONFIG_CB4X4
......@@ -4209,29 +4217,13 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
#if CONFIG_SUPERTX
sum_rate_nocoef += this_rate_nocoef;
#endif // CONFIG_SUPERTX
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
if (x->using_dist_8x8) sum_rdc.dist_y += this_rdc.dist_y;
#endif
}
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
if (x->using_dist_8x8 && sum_rdc.rdcost != INT64_MAX &&
bsize == BLOCK_8X8) {
int64_t dist_8x8;
const int src_stride = x->plane[0].src.stride;
uint8_t *decoded_8x8;
#if CONFIG_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
decoded_8x8 = CONVERT_TO_BYTEPTR(x->decoded_8x8);
else
#endif
decoded_8x8 = (uint8_t *)x->decoded_8x8;
dist_8x8 =
av1_dist_8x8(cpi, x, x->plane[0].src.buf - 4, src_stride,
decoded_8x8, 8, BLOCK_8X8, 8, 8, 8, 8, x->qindex)
<< 4;
sum_rdc.dist = sum_rdc.dist - sum_rdc.dist_y + dist_8x8;
dist_8x8 = dist_8x8_yuv(cpi, x, x->plane[0].src.buf - 4);
sum_rdc.dist = dist_8x8;
sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
}
#endif // CONFIG_DIST_8X8 && CONFIG_CB4X4
......@@ -4457,11 +4449,6 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
(void)best_rd;
*rd_cost = best_rdc;
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
if (x->using_dist_8x8 && bsize <= BLOCK_8X8 && rd_cost->rate != INT_MAX) {
assert(rd_cost->dist_y < INT64_MAX);
}
#endif // CONFIG_DIST_8X8 && CONFIG_CB4X4
#if CONFIG_SUPERTX
*rate_nocoef = best_rate_nocoef;
#endif // CONFIG_SUPERTX
......
......@@ -521,9 +521,6 @@ static INLINE void av1_init_rd_stats(RD_STATS *rd_stats) {
rd_stats->zero_rate = 0;
rd_stats->invalid_rate = 0;
rd_stats->ref_rdcost = INT64_MAX;
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
rd_stats->dist_y = 0;
#endif
#if CONFIG_RD_DEBUG
for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
rd_stats->txb_coeff_cost[plane] = 0;
......@@ -551,9 +548,6 @@ static INLINE void av1_invalid_rd_stats(RD_STATS *rd_stats) {
rd_stats->zero_rate = 0;
rd_stats->invalid_rate = 1;
rd_stats->ref_rdcost = INT64_MAX;
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
rd_stats->dist_y = INT64_MAX;
#endif
#if CONFIG_RD_DEBUG
for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
rd_stats->txb_coeff_cost[plane] = INT_MAX;
......@@ -579,9 +573,6 @@ static INLINE void av1_merge_rd_stats(RD_STATS *rd_stats_dst,
rd_stats_dst->sse += rd_stats_src->sse;
rd_stats_dst->skip &= rd_stats_src->skip;
rd_stats_dst->invalid_rate &= rd_stats_src->invalid_rate;
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
rd_stats_dst->dist_y += rd_stats_src->dist_y;
#endif
#if CONFIG_RD_DEBUG
for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
rd_stats_dst->txb_coeff_cost[plane] += rd_stats_src->txb_coeff_cost[plane];
......
......@@ -9978,9 +9978,6 @@ void av1_rd_pick_intra_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
rd_cost->dist = dist_y + dist_uv;
}
rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
if (x->using_dist_8x8) rd_cost->dist_y = dist_y;
#endif
} else {
rd_cost->rate = INT_MAX;
}
......@@ -10715,10 +10712,6 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
int compmode_cost = 0;
int rate2 = 0, rate_y = 0, rate_uv = 0;
int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
int64_t distortion2_y = 0;
int64_t total_sse_y = INT64_MAX;
#endif
int skippable = 0;
int this_skip2 = 0;
int64_t total_sse = INT64_MAX;
......@@ -11103,9 +11096,6 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
if (mbmi->mode != DC_PRED && mbmi->mode != TM_PRED)
rate2 += intra_cost_penalty;
distortion2 = distortion_y + distortion_uv;
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
if (x->using_dist_8x8 && bsize < BLOCK_8X8) distortion2_y = distortion_y;
#endif
} else {
int_mv backup_ref_mv[2];
......@@ -11201,20 +11191,6 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
{
RD_STATS rd_stats, rd_stats_y, rd_stats_uv;
av1_init_rd_stats(&rd_stats);
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
// While av1 master uses rd_stats_y.rate through out the codebase,
// which is set when handle_inter_mode is called, the daala-dist code
// in rd_pick_partition() for cb4x4 and sub8x8 blocks need to know
// .dist_y which comes from rd_stats_y.dist and rd_stats_y.sse.
// The problem is rd_stats_y.dist and rd_stats_y.sse are sometimes not
// initialized when rd_stats.skip = 1,
// then instead rd_stats.dist and rd_stats.sse have the
// combined luma and chroma dist and sse.
// This can be seen inside motion_mode_rd(), which is called by
// handle_inter_mode().
if (x->using_dist_8x8 && bsize < BLOCK_8X8)
av1_init_rd_stats(&rd_stats_y);
#endif
rd_stats.rate = rate2;
// Point to variables that are maintained between loop iterations
......@@ -11236,16 +11212,6 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
total_sse = rd_stats.sse;
rate_y = rd_stats_y.rate;
rate_uv = rd_stats_uv.rate;
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
if (x->using_dist_8x8 && bsize < BLOCK_8X8) {
if (rd_stats_y.rate != INT_MAX) {
assert(rd_stats_y.sse < INT64_MAX);
assert(rd_stats_y.dist < INT64_MAX);
}
total_sse_y = rd_stats_y.sse;
distortion2_y = rd_stats_y.dist;
}
#endif
}
// TODO(jingning): This needs some refactoring to improve code quality
......@@ -11419,16 +11385,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
frame_mv[NEARMV][ref_frame] = cur_mv;
av1_init_rd_stats(&tmp_rd_stats);
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
// With the same reason as 'rd_stats_y' passed to above
// handle_inter_mode(), tmp_rd_stats_y.dist and
// tmp_rd_stats_y.sse are sometimes not initialized, esp. when
// tmp_rd_stats.skip = 1 and tmp_rd_stats.dist and .sse
// represent combined luma and chroma .dist and .sse,
// we should initialized tmp_rd_stats_y.
if (x->using_dist_8x8 && bsize < BLOCK_8X8)
av1_init_rd_stats(&tmp_rd_stats_y);
#endif
// Point to variables that are not maintained between iterations
args.single_newmv = dummy_single_newmv;
#if CONFIG_EXT_INTER
......@@ -11504,16 +11461,6 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
tmp_ref_rd = tmp_alt_rd;
backup_mbmi = *mbmi;
backup_skip = x->skip;
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
if (x->using_dist_8x8 && bsize < BLOCK_8X8) {
if (tmp_rd_stats_y.rate != INT_MAX) {
assert(tmp_rd_stats_y.sse < INT64_MAX);
assert(tmp_rd_stats_y.dist < INT64_MAX);
}
total_sse_y = tmp_rd_stats_y.sse;
distortion2_y = tmp_rd_stats_y.dist;
}
#endif
#if CONFIG_VAR_TX
for (i = 0; i < MAX_MB_PLANE; ++i)
memcpy(x->blk_skip_drl[i], x->blk_skip[i],
......@@ -11596,12 +11543,6 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
this_skip2 = 1;
rate_y = 0;
rate_uv = 0;
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
if (x->using_dist_8x8 && bsize < BLOCK_8X8) {
assert(total_sse_y < INT64_MAX);
distortion2_y = total_sse_y;
}
#endif
}
} else {
// Add in the cost of the no skip flag.
......@@ -11621,11 +11562,6 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
#endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION
}
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
if (x->using_dist_8x8 && bsize < BLOCK_8X8 && rate2 != INT_MAX)
assert(distortion2_y < INT64_MAX);
#endif
if (ref_frame == INTRA_FRAME) {
// Keep record of best intra rd
if (this_rd < best_intra_rd) {
......@@ -11701,12 +11637,6 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
best_rate_y = rate_y + av1_cost_bit(av1_get_skip_prob(cm, xd),
this_skip2 || skippable);
best_rate_uv = rate_uv;
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
if (x->using_dist_8x8 && bsize < BLOCK_8X8) {
assert(distortion2_y < INT64_MAX);
rd_cost->dist_y = distortion2_y;
}
#endif
#if CONFIG_VAR_TX
for (i = 0; i < MAX_MB_PLANE; ++i)
memcpy(ctx->blk_skip[i], x->blk_skip[i],
......@@ -11714,10 +11644,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
#endif // CONFIG_VAR_TX
}
}
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
if (x->using_dist_8x8 && bsize < BLOCK_8X8 && rd_cost->rate != INT_MAX)
assert(rd_cost->dist_y < INT64_MAX);
#endif
/* keep record of best compound/single-only prediction */
if (!disable_skip && ref_frame != INTRA_FRAME) {
int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
......@@ -11849,21 +11776,9 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data,
rd_cost->dist = rd_stats_y.dist + rd_stats_uv.dist;
rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
best_skip2 = skip_blk;
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
if (x->using_dist_8x8 && bsize < BLOCK_8X8) {
assert(rd_cost->rate != INT_MAX);
assert(rd_cost->dist_y < INT64_MAX);
rd_cost->dist_y = rd_stats_y.dist;
}
#endif
}
}
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
if (x->using_dist_8x8 && bsize < BLOCK_8X8 && rd_cost->rate != INT_MAX)
assert(rd_cost->dist_y < INT64_MAX);
#endif
// Only try palette mode when the best mode so far is an intra mode.
if (try_palette && !is_inter_mode(best_mbmode.mode)) {
int rate2 = 0;
......@@ -12433,9 +12348,7 @@ void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi,
rd_cost->rate = rate2;
rd_cost->dist = distortion2;
rd_cost->rdcost = this_rd;
#if CONFIG_DIST_8X8 && CONFIG_CB4X4
if (x->using_dist_8x8 && bsize < BLOCK_8X8) rd_cost->dist_y = distortion2;
#endif
if (this_rd >= best_rd_so_far) {
rd_cost->rate = INT_MAX;
rd_cost->rdcost = INT64_MAX;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment