Commit d642294b authored by Jingning Han's avatar Jingning Han Committed by James Zern

Fix tsan error in VP9 sub8x8 intra mode search

This commit fixes issue 1141. The issue was triggered in multi-tile
encoding. The change properly saves and restores the block context
information in the real-time mode selection process. It removes
several redundant memcpy operations in sub8x8 intra block mode search.

Change-Id: I35c9ad197f4bd500ec39b5fc833f052f19eee010
parent acc592b3
......@@ -3031,10 +3031,24 @@ static void nonrd_pick_sb_modes(VP9_COMP *cpi,
TileInfo *const tile_info = &tile_data->tile_info;
MACROBLOCKD *const xd = &x->e_mbd;
MODE_INFO *mi;
ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
BLOCK_SIZE bs = VPXMAX(bsize, BLOCK_8X8); // processing unit block size
const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bs];
const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bs];
int plane;
set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
mi = xd->mi[0];
mi->sb_type = bsize;
for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
struct macroblockd_plane *pd = &xd->plane[plane];
memcpy(a + num_4x4_blocks_wide * plane, pd->above_context,
(sizeof(a[0]) * num_4x4_blocks_wide) >> pd->subsampling_x);
memcpy(l + num_4x4_blocks_high * plane, pd->left_context,
(sizeof(l[0]) * num_4x4_blocks_high) >> pd->subsampling_y);
}
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled)
if (cyclic_refresh_segment_id_boosted(mi->segment_id))
x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh);
......@@ -3052,6 +3066,14 @@ static void nonrd_pick_sb_modes(VP9_COMP *cpi,
duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize);
for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
struct macroblockd_plane *pd = &xd->plane[plane];
memcpy(pd->above_context, a + num_4x4_blocks_wide * plane,
(sizeof(a[0]) * num_4x4_blocks_wide) >> pd->subsampling_x);
memcpy(pd->left_context, l + num_4x4_blocks_high * plane,
(sizeof(l[0]) * num_4x4_blocks_high) >> pd->subsampling_y);
}
if (rd_cost->rate == INT_MAX)
vp9_rd_cost_reset(rd_cost);
......
......@@ -787,9 +787,9 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x,
#if CONFIG_VP9_HIGHBITDEPTH
uint16_t best_dst16[8 * 8];
#endif
memcpy(ta, a, num_4x4_blocks_wide * sizeof(a[0]));
memcpy(tl, l, num_4x4_blocks_high * sizeof(l[0]));
memcpy(ta, a, sizeof(ta));
memcpy(tl, l, sizeof(tl));
xd->mi[0]->tx_size = TX_4X4;
#if CONFIG_VP9_HIGHBITDEPTH
......@@ -810,8 +810,8 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x,
continue;
}
memcpy(tempa, ta, sizeof(ta));
memcpy(templ, tl, sizeof(tl));
memcpy(tempa, ta, num_4x4_blocks_wide * sizeof(ta[0]));
memcpy(templ, tl, num_4x4_blocks_high * sizeof(tl[0]));
for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
......@@ -874,8 +874,8 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x,
*bestdistortion = distortion;
best_rd = this_rd;
*best_mode = mode;
memcpy(a, tempa, sizeof(tempa));
memcpy(l, templ, sizeof(templ));
memcpy(a, tempa, num_4x4_blocks_wide * sizeof(tempa[0]));
memcpy(l, templ, num_4x4_blocks_high * sizeof(templ[0]));
for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
memcpy(best_dst16 + idy * 8,
CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
......@@ -914,8 +914,8 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x,
continue;
}
memcpy(tempa, ta, sizeof(ta));
memcpy(templ, tl, sizeof(tl));
memcpy(tempa, ta, num_4x4_blocks_wide * sizeof(ta[0]));
memcpy(templ, tl, num_4x4_blocks_high * sizeof(tl[0]));
for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
......@@ -976,8 +976,8 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x,
*bestdistortion = distortion;
best_rd = this_rd;
*best_mode = mode;
memcpy(a, tempa, sizeof(tempa));
memcpy(l, templ, sizeof(templ));
memcpy(a, tempa, num_4x4_blocks_wide * sizeof(tempa[0]));
memcpy(l, templ, num_4x4_blocks_high * sizeof(templ[0]));
for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
memcpy(best_dst + idy * 8, dst_init + idy * dst_stride,
num_4x4_blocks_wide * 4);
......@@ -1013,12 +1013,8 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP *cpi, MACROBLOCK *mb,
int64_t total_distortion = 0;
int tot_rate_y = 0;
int64_t total_rd = 0;
ENTROPY_CONTEXT t_above[4], t_left[4];
const int *bmode_costs = cpi->mbmode_cost;
memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
// Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block.
for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
......@@ -1034,8 +1030,11 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP *cpi, MACROBLOCK *mb,
}
this_rd = rd_pick_intra4x4block(cpi, mb, idy, idx, &best_mode,
bmode_costs, t_above + idx, t_left + idy,
bmode_costs,
xd->plane[0].above_context + idx,
xd->plane[0].left_context + idy,
&r, &ry, &d, bsize, best_rd - total_rd);
if (this_rd >= best_rd - total_rd)
return INT64_MAX;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment