From 7afb8b758ac38829cd4808d2412e0b8fadd042d6 Mon Sep 17 00:00:00 2001 From: Sarah Parker Date: Thu, 4 May 2017 15:33:10 -0700 Subject: [PATCH] Compute compound average in warp_plane only for COMPOUND_AVERAGE This fixes a mismatch which occurs when global/warped motion and a masked compound type are used together. Change-Id: I08b2702cdb3b85f8d8817b9286a73951c97cf379 --- av1/common/av1_rtcd_defs.pl | 4 +-- av1/common/reconinter.h | 8 +++++- av1/common/warped_motion.c | 34 ++++++++++++------------ av1/common/warped_motion.h | 2 +- av1/common/x86/highbd_warp_plane_ssse3.c | 8 +++--- av1/common/x86/warp_plane_sse2.c | 6 ++--- av1/common/x86/warp_plane_ssse3.c | 6 ++--- 7 files changed, 38 insertions(+), 30 deletions(-) diff --git a/av1/common/av1_rtcd_defs.pl b/av1/common/av1_rtcd_defs.pl index ec682cf28..f6fa6beea 100755 --- a/av1/common/av1_rtcd_defs.pl +++ b/av1/common/av1_rtcd_defs.pl @@ -608,11 +608,11 @@ if (aom_config("CONFIG_PVQ") eq "yes") { if ((aom_config("CONFIG_WARPED_MOTION") eq "yes") || (aom_config("CONFIG_GLOBAL_MOTION") eq "yes")) { - add_proto qw/void av1_warp_affine/, "const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int ref_frm, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta"; + add_proto qw/void av1_warp_affine/, "const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int comp_avg, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta"; specialize qw/av1_warp_affine sse2 ssse3/; if (aom_config("CONFIG_HIGHBITDEPTH") eq "yes") { - add_proto qw/void av1_highbd_warp_affine/, "const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, int ref_frm, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta"; + add_proto qw/void av1_highbd_warp_affine/, "const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, int comp_avg, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta"; specialize qw/av1_highbd_warp_affine ssse3/; } } diff --git a/av1/common/reconinter.h b/av1/common/reconinter.h index 10933a751..a70c13af0 100644 --- a/av1/common/reconinter.h +++ b/av1/common/reconinter.h @@ -415,13 +415,19 @@ static INLINE void av1_make_inter_predictor( if (do_warp) { const struct macroblockd_plane *const pd = &xd->plane[plane]; const struct buf_2d *const pre_buf = &pd->pre[ref]; +#if CONFIG_EXT_INTER + int compute_avg = + ref && mi->mbmi.interinter_compound_type == COMPOUND_AVERAGE; +#else + int compute_avg = ref; +#endif // CONFIG_EXT_INTER av1_warp_plane(&final_warp_params, #if CONFIG_HIGHBITDEPTH xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH, xd->bd, #endif // CONFIG_HIGHBITDEPTH pre_buf->buf0, pre_buf->width, pre_buf->height, pre_buf->stride, dst, p_col, p_row, w, h, dst_stride, - pd->subsampling_x, pd->subsampling_y, xs, ys, ref); + pd->subsampling_x, pd->subsampling_y, xs, ys, compute_avg); return; } #endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION diff --git a/av1/common/warped_motion.c b/av1/common/warped_motion.c index 7e27985fd..1b9529397 100644 --- a/av1/common/warped_motion.c +++ b/av1/common/warped_motion.c @@ -909,7 +909,7 @@ static void highbd_warp_plane_old(WarpedMotionParams *wm, uint8_t *ref8, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int x_scale, int y_scale, int bd, - int ref_frm) { + int comp_avg) { int i, j; ProjectPointsFunc projectpoints = get_project_points_type(wm->wmtype); uint16_t *pred = CONVERT_TO_SHORTPTR(pred8); @@ -923,7 +923,7 @@ static void highbd_warp_plane_old(WarpedMotionParams *wm, uint8_t *ref8, projectpoints(wm->wmmat, in, out, 1, 2, 2, subsampling_x, subsampling_y); out[0] = ROUND_POWER_OF_TWO_SIGNED(out[0] * x_scale, 4); out[1] = ROUND_POWER_OF_TWO_SIGNED(out[1] * y_scale, 4); - if (ref_frm) + if (comp_avg) pred[(j - p_col) + (i - p_row) * p_stride] = ROUND_POWER_OF_TWO( pred[(j - p_col) + (i - p_row) * p_stride] + highbd_warp_interpolate(ref, out[0], out[1], width, height, @@ -953,7 +953,7 @@ void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref, int width, int height, int stride, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, - int subsampling_y, int bd, int ref_frm, + int subsampling_y, int bd, int comp_avg, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta) { #if HORSHEAR_REDUCE_PREC_BITS >= 5 @@ -1059,7 +1059,7 @@ void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref, } sum = clip_pixel_highbd( ROUND_POWER_OF_TWO(sum, VERSHEAR_REDUCE_PREC_BITS), bd); - if (ref_frm) + if (comp_avg) *p = ROUND_POWER_OF_TWO(*p + sum, 1); else *p = sum; @@ -1075,7 +1075,7 @@ static void highbd_warp_plane(WarpedMotionParams *wm, uint8_t *ref8, int width, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int x_scale, int y_scale, - int bd, int ref_frm) { + int bd, int comp_avg) { if (wm->wmtype == ROTZOOM) { wm->wmmat[5] = wm->wmmat[2]; wm->wmmat[4] = -wm->wmmat[3]; @@ -1092,12 +1092,12 @@ static void highbd_warp_plane(WarpedMotionParams *wm, uint8_t *ref8, int width, uint16_t *pred = CONVERT_TO_SHORTPTR(pred8); av1_highbd_warp_affine(mat, ref, width, height, stride, pred, p_col, p_row, p_width, p_height, p_stride, subsampling_x, - subsampling_y, bd, ref_frm, alpha, beta, gamma, + subsampling_y, bd, comp_avg, alpha, beta, gamma, delta); } else { highbd_warp_plane_old(wm, ref8, width, height, stride, pred8, p_col, p_row, p_width, p_height, p_stride, subsampling_x, - subsampling_y, x_scale, y_scale, bd, ref_frm); + subsampling_y, x_scale, y_scale, bd, comp_avg); } } @@ -1138,7 +1138,7 @@ static void warp_plane_old(WarpedMotionParams *wm, uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int x_scale, - int y_scale, int ref_frm) { + int y_scale, int comp_avg) { int i, j; ProjectPointsFunc projectpoints = get_project_points_type(wm->wmtype); if (projectpoints == NULL) return; @@ -1150,7 +1150,7 @@ static void warp_plane_old(WarpedMotionParams *wm, uint8_t *ref, int width, projectpoints(wm->wmmat, in, out, 1, 2, 2, subsampling_x, subsampling_y); out[0] = ROUND_POWER_OF_TWO_SIGNED(out[0] * x_scale, 4); out[1] = ROUND_POWER_OF_TWO_SIGNED(out[1] * y_scale, 4); - if (ref_frm) + if (comp_avg) pred[(j - p_col) + (i - p_row) * p_stride] = ROUND_POWER_OF_TWO( pred[(j - p_col) + (i - p_row) * p_stride] + warp_interpolate(ref, out[0], out[1], width, height, stride), @@ -1202,7 +1202,7 @@ static void warp_plane_old(WarpedMotionParams *wm, uint8_t *ref, int width, void av1_warp_affine_c(const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, - int subsampling_x, int subsampling_y, int ref_frm, + int subsampling_x, int subsampling_y, int comp_avg, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta) { int16_t tmp[15 * 8]; @@ -1316,7 +1316,7 @@ void av1_warp_affine_c(const int32_t *mat, const uint8_t *ref, int width, sum += tmp[(k + m + 4) * 8 + (l + 4)] * coeffs[m]; } sum = clip_pixel(ROUND_POWER_OF_TWO(sum, VERSHEAR_REDUCE_PREC_BITS)); - if (ref_frm) + if (comp_avg) *p = ROUND_POWER_OF_TWO(*p + sum, 1); else *p = sum; @@ -1331,7 +1331,7 @@ static void warp_plane(WarpedMotionParams *wm, uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int x_scale, - int y_scale, int ref_frm) { + int y_scale, int comp_avg) { if (wm->wmtype == ROTZOOM) { wm->wmmat[5] = wm->wmmat[2]; wm->wmmat[4] = -wm->wmmat[3]; @@ -1346,11 +1346,11 @@ static void warp_plane(WarpedMotionParams *wm, uint8_t *ref, int width, av1_warp_affine(mat, ref, width, height, stride, pred, p_col, p_row, p_width, p_height, p_stride, subsampling_x, subsampling_y, - ref_frm, alpha, beta, gamma, delta); + comp_avg, alpha, beta, gamma, delta); } else { warp_plane_old(wm, ref, width, height, stride, pred, p_col, p_row, p_width, p_height, p_stride, subsampling_x, subsampling_y, x_scale, - y_scale, ref_frm); + y_scale, comp_avg); } } @@ -1409,17 +1409,17 @@ void av1_warp_plane(WarpedMotionParams *wm, uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, - int subsampling_y, int x_scale, int y_scale, int ref_frm) { + int subsampling_y, int x_scale, int y_scale, int comp_avg) { #if CONFIG_HIGHBITDEPTH if (use_hbd) highbd_warp_plane(wm, ref, width, height, stride, pred, p_col, p_row, p_width, p_height, p_stride, subsampling_x, subsampling_y, - x_scale, y_scale, bd, ref_frm); + x_scale, y_scale, bd, comp_avg); else #endif // CONFIG_HIGHBITDEPTH warp_plane(wm, ref, width, height, stride, pred, p_col, p_row, p_width, p_height, p_stride, subsampling_x, subsampling_y, x_scale, - y_scale, ref_frm); + y_scale, comp_avg); } #if CONFIG_WARPED_MOTION diff --git a/av1/common/warped_motion.h b/av1/common/warped_motion.h index dfd8dae34..7c011ad23 100644 --- a/av1/common/warped_motion.h +++ b/av1/common/warped_motion.h @@ -87,7 +87,7 @@ void av1_warp_plane(WarpedMotionParams *wm, uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, - int subsampling_y, int x_scale, int y_scale, int ref_frm); + int subsampling_y, int x_scale, int y_scale, int comp_avg); int find_projection(int np, int *pts1, int *pts2, BLOCK_SIZE bsize, int mvy, int mvx, WarpedMotionParams *wm_params, int mi_row, diff --git a/av1/common/x86/highbd_warp_plane_ssse3.c b/av1/common/x86/highbd_warp_plane_ssse3.c index 476234014..18f120f4a 100644 --- a/av1/common/x86/highbd_warp_plane_ssse3.c +++ b/av1/common/x86/highbd_warp_plane_ssse3.c @@ -20,7 +20,7 @@ void av1_highbd_warp_affine_ssse3(const int32_t *mat, const uint16_t *ref, uint16_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, int subsampling_x, int subsampling_y, int bd, - int ref_frm, int16_t alpha, int16_t beta, + int comp_avg, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta) { #if HORSHEAR_REDUCE_PREC_BITS >= 5 __m128i tmp[15]; @@ -304,10 +304,12 @@ void av1_highbd_warp_affine_ssse3(const int32_t *mat, const uint16_t *ref, // to only output 4 pixels at this point, to avoid encode/decode // mismatches when encoding with multiple threads. if (p_width == 4) { - if (ref_frm) res_16bit = _mm_avg_epu16(res_16bit, _mm_loadl_epi64(p)); + if (comp_avg) + res_16bit = _mm_avg_epu16(res_16bit, _mm_loadl_epi64(p)); _mm_storel_epi64(p, res_16bit); } else { - if (ref_frm) res_16bit = _mm_avg_epu16(res_16bit, _mm_loadu_si128(p)); + if (comp_avg) + res_16bit = _mm_avg_epu16(res_16bit, _mm_loadu_si128(p)); _mm_storeu_si128(p, res_16bit); } } diff --git a/av1/common/x86/warp_plane_sse2.c b/av1/common/x86/warp_plane_sse2.c index 81145b6e1..055a1f642 100644 --- a/av1/common/x86/warp_plane_sse2.c +++ b/av1/common/x86/warp_plane_sse2.c @@ -18,7 +18,7 @@ void av1_warp_affine_sse2(const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, - int subsampling_x, int subsampling_y, int ref_frm, + int subsampling_x, int subsampling_y, int comp_avg, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta) { __m128i tmp[15]; @@ -296,13 +296,13 @@ void av1_warp_affine_sse2(const int32_t *mat, const uint8_t *ref, int width, // to only output 4 pixels at this point, to avoid encode/decode // mismatches when encoding with multiple threads. if (p_width == 4) { - if (ref_frm) { + if (comp_avg) { const __m128i orig = _mm_cvtsi32_si128(*(uint32_t *)p); res_8bit = _mm_avg_epu8(res_8bit, orig); } *(uint32_t *)p = _mm_cvtsi128_si32(res_8bit); } else { - if (ref_frm) res_8bit = _mm_avg_epu8(res_8bit, _mm_loadl_epi64(p)); + if (comp_avg) res_8bit = _mm_avg_epu8(res_8bit, _mm_loadl_epi64(p)); _mm_storel_epi64(p, res_8bit); } } diff --git a/av1/common/x86/warp_plane_ssse3.c b/av1/common/x86/warp_plane_ssse3.c index f6cc2d62f..1cca425b0 100644 --- a/av1/common/x86/warp_plane_ssse3.c +++ b/av1/common/x86/warp_plane_ssse3.c @@ -205,7 +205,7 @@ static const uint8_t odd_mask[16] = { 1, 3, 3, 5, 5, 7, 7, 9, void av1_warp_affine_ssse3(const int32_t *mat, const uint8_t *ref, int width, int height, int stride, uint8_t *pred, int p_col, int p_row, int p_width, int p_height, int p_stride, - int subsampling_x, int subsampling_y, int ref_frm, + int subsampling_x, int subsampling_y, int comp_avg, int16_t alpha, int16_t beta, int16_t gamma, int16_t delta) { __m128i tmp[15]; @@ -473,13 +473,13 @@ void av1_warp_affine_ssse3(const int32_t *mat, const uint8_t *ref, int width, // to only output 4 pixels at this point, to avoid encode/decode // mismatches when encoding with multiple threads. if (p_width == 4) { - if (ref_frm) { + if (comp_avg) { const __m128i orig = _mm_cvtsi32_si128(*(uint32_t *)p); res_8bit = _mm_avg_epu8(res_8bit, orig); } *(uint32_t *)p = _mm_cvtsi128_si32(res_8bit); } else { - if (ref_frm) res_8bit = _mm_avg_epu8(res_8bit, _mm_loadl_epi64(p)); + if (comp_avg) res_8bit = _mm_avg_epu8(res_8bit, _mm_loadl_epi64(p)); _mm_storel_epi64(p, res_8bit); } } -- GitLab