Commit b62eef7b authored by David Barker's avatar David Barker Committed by Yue Chen
Browse files

Fix encode/decode mismatch with global/warped motion

When predicting a 4x4 warp block (either using ZEROMV with
global-motion, or the WARPED_CAUSAL motion mode with
warped-motion), the warp filter would previously write
4 bytes to the right of the block.

This caused encode/decode mismatches when encoding with
multiple threads and tile_cols > 1, since in that case
we could end up overwriting already-generated pixels from
the next tile across.

This patch changes the filter so that we only overwrite the
intended pixels.

Change-Id: I3664b44e872e85aa5ccc0a5781f0f9ad994a5b80
parent a67c640b
......@@ -1301,7 +1301,7 @@ void av1_warp_affine_c(int32_t *mat, uint8_t *ref, int width, int height,
// Vertical filter
for (k = -4; k < AOMMIN(4, p_row + p_height - i - 4); ++k) {
int sy = sy4 + gamma * (-4) + delta * k;
for (l = -4; l < 4; ++l) {
for (l = -4; l < AOMMIN(4, p_col + p_width - j - 4); ++l) {
uint8_t *p =
&pred[(i - p_row + k + 4) * p_stride + (j - p_col + l + 4)];
// At this point, sy = sy4 + gamma * l + delta * k
......
......@@ -270,12 +270,15 @@ void av1_highbd_warp_affine_ssse3(int32_t *mat, uint16_t *ref, int width,
// Store, blending with 'pred' if needed
__m128i *p = (__m128i *)&pred[(i + k + 4) * p_stride + j];
if (ref_frm) {
__m128i orig = _mm_loadu_si128(p);
_mm_storeu_si128(p, _mm_avg_epu16(res_16bit, orig));
} else {
if (ref_frm) res_16bit = _mm_avg_epu16(res_16bit, _mm_loadu_si128(p));
// Note: If we're outputting a 4x4 block, we need to be very careful
// to only output 4 pixels at this point, to avoid encode/decode
// mismatches when encoding with multiple threads.
if (p_width == 4)
_mm_storel_epi64(p, res_16bit);
else
_mm_storeu_si128(p, res_16bit);
}
}
}
}
......
......@@ -276,12 +276,15 @@ void av1_warp_affine_sse2(int32_t *mat, uint8_t *ref, int width, int height,
// Store, blending with 'pred' if needed
__m128i *p = (__m128i *)&pred[(i + k + 4) * p_stride + j];
if (ref_frm) {
__m128i orig = _mm_loadl_epi64(p);
_mm_storel_epi64(p, _mm_avg_epu8(res_8bit, orig));
} else {
if (ref_frm) res_8bit = _mm_avg_epu8(res_8bit, _mm_loadl_epi64(p));
// Note: If we're outputting a 4x4 block, we need to be very careful
// to only output 4 pixels at this point, to avoid encode/decode
// mismatches when encoding with multiple threads.
if (p_width == 4)
*(uint32_t *)p = _mm_cvtsi128_si32(res_8bit);
else
_mm_storel_epi64(p, res_8bit);
}
}
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment