Commit 2a10c91b authored by James Zern's avatar James Zern

rdopt,rd_pick_intra4x4block: port tsan fix from libvpx

minus the non-existent nonrd portion. original change:

commit d642294b
Author: Jingning Han <jingning@google.com>
Date:   Thu Feb 11 12:36:49 2016 -0800

    Fix tsan error in VP9 sub8x8 intra mode search

    This commit fixes issue 1141. The issue was triggered in multi-tile
    encoding. The change properly saves and restores the block context
    information in the real-time mode selection process. It removes
    several redundant memcpy operations in sub8x8 intra block mode
    search.

    Change-Id: I35c9ad197f4bd500ec39b5fc833f052f19eee010

Change-Id: If0c11092450ede0450fc9bcf5db2f7349ac2860c
parent 5c02d1f5
...@@ -812,8 +812,8 @@ static int64_t rd_pick_intra4x4block(AV1_COMP *cpi, MACROBLOCK *x, int row, ...@@ -812,8 +812,8 @@ static int64_t rd_pick_intra4x4block(AV1_COMP *cpi, MACROBLOCK *x, int row,
uint16_t best_dst16[8 * 8]; uint16_t best_dst16[8 * 8];
#endif #endif
memcpy(ta, a, sizeof(ta)); memcpy(ta, a, num_4x4_blocks_wide * sizeof(a[0]));
memcpy(tl, l, sizeof(tl)); memcpy(tl, l, num_4x4_blocks_high * sizeof(l[0]));
xd->mi[0]->mbmi.tx_size = TX_4X4; xd->mi[0]->mbmi.tx_size = TX_4X4;
#if CONFIG_AOM_HIGHBITDEPTH #if CONFIG_AOM_HIGHBITDEPTH
...@@ -832,8 +832,8 @@ static int64_t rd_pick_intra4x4block(AV1_COMP *cpi, MACROBLOCK *x, int row, ...@@ -832,8 +832,8 @@ static int64_t rd_pick_intra4x4block(AV1_COMP *cpi, MACROBLOCK *x, int row,
if (conditional_skipintra(mode, *best_mode)) continue; if (conditional_skipintra(mode, *best_mode)) continue;
} }
memcpy(tempa, ta, sizeof(ta)); memcpy(tempa, ta, num_4x4_blocks_wide * sizeof(ta[0]));
memcpy(templ, tl, sizeof(tl)); memcpy(templ, tl, num_4x4_blocks_high * sizeof(tl[0]));
for (idy = 0; idy < num_4x4_blocks_high; ++idy) { for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
for (idx = 0; idx < num_4x4_blocks_wide; ++idx) { for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
...@@ -892,8 +892,8 @@ static int64_t rd_pick_intra4x4block(AV1_COMP *cpi, MACROBLOCK *x, int row, ...@@ -892,8 +892,8 @@ static int64_t rd_pick_intra4x4block(AV1_COMP *cpi, MACROBLOCK *x, int row,
*bestdistortion = distortion; *bestdistortion = distortion;
best_rd = this_rd; best_rd = this_rd;
*best_mode = mode; *best_mode = mode;
memcpy(a, tempa, sizeof(tempa)); memcpy(a, tempa, num_4x4_blocks_wide * sizeof(tempa[0]));
memcpy(l, templ, sizeof(templ)); memcpy(l, templ, num_4x4_blocks_high * sizeof(templ[0]));
for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) { for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
memcpy(best_dst16 + idy * 8, memcpy(best_dst16 + idy * 8,
CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride), CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
...@@ -927,8 +927,8 @@ static int64_t rd_pick_intra4x4block(AV1_COMP *cpi, MACROBLOCK *x, int row, ...@@ -927,8 +927,8 @@ static int64_t rd_pick_intra4x4block(AV1_COMP *cpi, MACROBLOCK *x, int row,
if (conditional_skipintra(mode, *best_mode)) continue; if (conditional_skipintra(mode, *best_mode)) continue;
} }
memcpy(tempa, ta, sizeof(ta)); memcpy(tempa, ta, num_4x4_blocks_wide * sizeof(ta[0]));
memcpy(templ, tl, sizeof(tl)); memcpy(templ, tl, num_4x4_blocks_high * sizeof(tl[0]));
for (idy = 0; idy < num_4x4_blocks_high; ++idy) { for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
for (idx = 0; idx < num_4x4_blocks_wide; ++idx) { for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
...@@ -984,8 +984,8 @@ static int64_t rd_pick_intra4x4block(AV1_COMP *cpi, MACROBLOCK *x, int row, ...@@ -984,8 +984,8 @@ static int64_t rd_pick_intra4x4block(AV1_COMP *cpi, MACROBLOCK *x, int row,
*bestdistortion = distortion; *bestdistortion = distortion;
best_rd = this_rd; best_rd = this_rd;
*best_mode = mode; *best_mode = mode;
memcpy(a, tempa, sizeof(tempa)); memcpy(a, tempa, num_4x4_blocks_wide * sizeof(tempa[0]));
memcpy(l, templ, sizeof(templ)); memcpy(l, templ, num_4x4_blocks_high * sizeof(templ[0]));
for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
memcpy(best_dst + idy * 8, dst_init + idy * dst_stride, memcpy(best_dst + idy * 8, dst_init + idy * dst_stride,
num_4x4_blocks_wide * 4); num_4x4_blocks_wide * 4);
...@@ -1019,12 +1019,8 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(AV1_COMP *cpi, MACROBLOCK *mb, ...@@ -1019,12 +1019,8 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(AV1_COMP *cpi, MACROBLOCK *mb,
int64_t total_distortion = 0; int64_t total_distortion = 0;
int tot_rate_y = 0; int tot_rate_y = 0;
int64_t total_rd = 0; int64_t total_rd = 0;
ENTROPY_CONTEXT t_above[4], t_left[4];
const int *bmode_costs = cpi->mbmode_cost; const int *bmode_costs = cpi->mbmode_cost;
memcpy(t_above, xd->plane[0].above_context, sizeof(t_above));
memcpy(t_left, xd->plane[0].left_context, sizeof(t_left));
// Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block. // Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block.
for (idy = 0; idy < 2; idy += num_4x4_blocks_high) { for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) { for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
...@@ -1040,7 +1036,9 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(AV1_COMP *cpi, MACROBLOCK *mb, ...@@ -1040,7 +1036,9 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(AV1_COMP *cpi, MACROBLOCK *mb,
} }
this_rd = rd_pick_intra4x4block(cpi, mb, idy, idx, &best_mode, this_rd = rd_pick_intra4x4block(cpi, mb, idy, idx, &best_mode,
bmode_costs, t_above + idx, t_left + idy, bmode_costs,
xd->plane[0].above_context + idx,
xd->plane[0].left_context + idy,
&r, &ry, &d, bsize, best_rd - total_rd); &r, &ry, &d, bsize, best_rd - total_rd);
if (this_rd >= best_rd - total_rd) return INT64_MAX; if (this_rd >= best_rd - total_rd) return INT64_MAX;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment