From 49bb8f8c8fd6f9160f5a407332a75bc706d5074a Mon Sep 17 00:00:00 2001 From: Thomas Daede Date: Thu, 13 Jul 2017 17:39:31 -0700 Subject: [PATCH] Fix mismatch between SSE2 and C versions of HBD upsampled_pred. This is a partial fix of the HBD upsampled pred quality loss introduced in commit 5d24b6f0491982c93f15fe9219364cf4a7d01995. BUG=aomedia:649 Change-Id: I114dab1c8aac285071c5910dff40d48beea8c2ed --- aom_dsp/variance.c | 14 ++++++-------- aom_dsp/x86/highbd_variance_sse2.c | 12 +++++------- 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/aom_dsp/variance.c b/aom_dsp/variance.c index cb5924a17..a4c3616e7 100644 --- a/aom_dsp/variance.c +++ b/aom_dsp/variance.c @@ -718,23 +718,21 @@ void aom_highbd_upsampled_pred_c(uint16_t *comp_pred, int width, int height, CONVERT_TO_BYTEPTR(comp_pred), width, NULL, -1, kernel, 16, width, height, bd); } else { - DECLARE_ALIGNED(16, uint8_t, - temp[((MAX_SB_SIZE * 2 + 16) + 16) * MAX_SB_SIZE]); - const uint16_t *ref; + DECLARE_ALIGNED(16, uint16_t, + temp[((MAX_SB_SIZE + 16) + 16) * MAX_SB_SIZE]); const int16_t *kernel_x; const int16_t *kernel_y; int intermediate_height; - ref = CONVERT_TO_SHORTPTR(ref8); kernel_x = av1_get_interp_filter_subpel_kernel(filter, subpel_x_q3 << 1); kernel_y = av1_get_interp_filter_subpel_kernel(filter, subpel_y_q3 << 1); intermediate_height = (((height - 1) * 8 + subpel_y_q3) >> 3) + filter.taps; assert(intermediate_height <= (MAX_SB_SIZE * 2 + 16) + 16); /*Directly call C versions to allow this to work for small (2x2) sizes.*/ - aom_highbd_convolve8_horiz_c( - CONVERT_TO_BYTEPTR(ref - ref_stride * ((filter.taps >> 1) - 1)), - ref_stride, CONVERT_TO_BYTEPTR(temp), MAX_SB_SIZE, kernel_x, 16, NULL, - -1, width, intermediate_height, bd); + aom_highbd_convolve8_horiz_c(ref8 - ref_stride * ((filter.taps >> 1) - 1), + ref_stride, CONVERT_TO_BYTEPTR(temp), + MAX_SB_SIZE, kernel_x, 16, NULL, -1, width, + intermediate_height, bd); aom_highbd_convolve8_vert_c( CONVERT_TO_BYTEPTR(temp + MAX_SB_SIZE * ((filter.taps >> 1) - 1)), MAX_SB_SIZE, CONVERT_TO_BYTEPTR(comp_pred), width, NULL, -1, kernel_y, diff --git a/aom_dsp/x86/highbd_variance_sse2.c b/aom_dsp/x86/highbd_variance_sse2.c index 033ebe6eb..93923ffb0 100644 --- a/aom_dsp/x86/highbd_variance_sse2.c +++ b/aom_dsp/x86/highbd_variance_sse2.c @@ -658,21 +658,19 @@ void aom_highbd_upsampled_pred_sse2(uint16_t *comp_pred, int width, int height, width, NULL, -1, kernel, 16, width, height, bd); } else { DECLARE_ALIGNED(16, uint16_t, - temp[((MAX_SB_SIZE * 2 + 16) + 16) * MAX_SB_SIZE]); - const uint16_t *ref; + temp[((MAX_SB_SIZE + 16) + 16) * MAX_SB_SIZE]); const int16_t *kernel_x; const int16_t *kernel_y; int intermediate_height; - ref = CONVERT_TO_SHORTPTR(ref8); kernel_x = av1_get_interp_filter_subpel_kernel(filter, subpel_x_q3 << 1); kernel_y = av1_get_interp_filter_subpel_kernel(filter, subpel_y_q3 << 1); intermediate_height = (((height - 1) * 8 + subpel_y_q3) >> 3) + filter.taps; assert(intermediate_height <= (MAX_SB_SIZE * 2 + 16) + 16); - aom_highbd_convolve8_horiz( - CONVERT_TO_BYTEPTR(ref - ref_stride * ((filter.taps >> 1) - 1)), - ref_stride, CONVERT_TO_BYTEPTR(temp), MAX_SB_SIZE, kernel_x, 16, NULL, - -1, width, intermediate_height, bd); + aom_highbd_convolve8_horiz(ref8 - ref_stride * ((filter.taps >> 1) - 1), + ref_stride, CONVERT_TO_BYTEPTR(temp), + MAX_SB_SIZE, kernel_x, 16, NULL, -1, width, + intermediate_height, bd); aom_highbd_convolve8_vert( CONVERT_TO_BYTEPTR(temp + MAX_SB_SIZE * ((filter.taps >> 1) - 1)), MAX_SB_SIZE, CONVERT_TO_BYTEPTR(comp_pred), width, NULL, -1, kernel_y, -- GitLab