Commit 49bb8f8c authored by Thomas Daede's avatar Thomas Daede

Fix mismatch between SSE2 and C versions of HBD upsampled_pred.

This is a partial fix of the HBD upsampled pred quality loss
introduced in commit 5d24b6f0.

BUG=aomedia:649

Change-Id: I114dab1c8aac285071c5910dff40d48beea8c2ed
parent 2510f64e
...@@ -718,23 +718,21 @@ void aom_highbd_upsampled_pred_c(uint16_t *comp_pred, int width, int height, ...@@ -718,23 +718,21 @@ void aom_highbd_upsampled_pred_c(uint16_t *comp_pred, int width, int height,
CONVERT_TO_BYTEPTR(comp_pred), width, NULL, CONVERT_TO_BYTEPTR(comp_pred), width, NULL,
-1, kernel, 16, width, height, bd); -1, kernel, 16, width, height, bd);
} else { } else {
DECLARE_ALIGNED(16, uint8_t, DECLARE_ALIGNED(16, uint16_t,
temp[((MAX_SB_SIZE * 2 + 16) + 16) * MAX_SB_SIZE]); temp[((MAX_SB_SIZE + 16) + 16) * MAX_SB_SIZE]);
const uint16_t *ref;
const int16_t *kernel_x; const int16_t *kernel_x;
const int16_t *kernel_y; const int16_t *kernel_y;
int intermediate_height; int intermediate_height;
ref = CONVERT_TO_SHORTPTR(ref8);
kernel_x = av1_get_interp_filter_subpel_kernel(filter, subpel_x_q3 << 1); kernel_x = av1_get_interp_filter_subpel_kernel(filter, subpel_x_q3 << 1);
kernel_y = av1_get_interp_filter_subpel_kernel(filter, subpel_y_q3 << 1); kernel_y = av1_get_interp_filter_subpel_kernel(filter, subpel_y_q3 << 1);
intermediate_height = intermediate_height =
(((height - 1) * 8 + subpel_y_q3) >> 3) + filter.taps; (((height - 1) * 8 + subpel_y_q3) >> 3) + filter.taps;
assert(intermediate_height <= (MAX_SB_SIZE * 2 + 16) + 16); assert(intermediate_height <= (MAX_SB_SIZE * 2 + 16) + 16);
/*Directly call C versions to allow this to work for small (2x2) sizes.*/ /*Directly call C versions to allow this to work for small (2x2) sizes.*/
aom_highbd_convolve8_horiz_c( aom_highbd_convolve8_horiz_c(ref8 - ref_stride * ((filter.taps >> 1) - 1),
CONVERT_TO_BYTEPTR(ref - ref_stride * ((filter.taps >> 1) - 1)), ref_stride, CONVERT_TO_BYTEPTR(temp),
ref_stride, CONVERT_TO_BYTEPTR(temp), MAX_SB_SIZE, kernel_x, 16, NULL, MAX_SB_SIZE, kernel_x, 16, NULL, -1, width,
-1, width, intermediate_height, bd); intermediate_height, bd);
aom_highbd_convolve8_vert_c( aom_highbd_convolve8_vert_c(
CONVERT_TO_BYTEPTR(temp + MAX_SB_SIZE * ((filter.taps >> 1) - 1)), CONVERT_TO_BYTEPTR(temp + MAX_SB_SIZE * ((filter.taps >> 1) - 1)),
MAX_SB_SIZE, CONVERT_TO_BYTEPTR(comp_pred), width, NULL, -1, kernel_y, MAX_SB_SIZE, CONVERT_TO_BYTEPTR(comp_pred), width, NULL, -1, kernel_y,
......
...@@ -658,21 +658,19 @@ void aom_highbd_upsampled_pred_sse2(uint16_t *comp_pred, int width, int height, ...@@ -658,21 +658,19 @@ void aom_highbd_upsampled_pred_sse2(uint16_t *comp_pred, int width, int height,
width, NULL, -1, kernel, 16, width, height, bd); width, NULL, -1, kernel, 16, width, height, bd);
} else { } else {
DECLARE_ALIGNED(16, uint16_t, DECLARE_ALIGNED(16, uint16_t,
temp[((MAX_SB_SIZE * 2 + 16) + 16) * MAX_SB_SIZE]); temp[((MAX_SB_SIZE + 16) + 16) * MAX_SB_SIZE]);
const uint16_t *ref;
const int16_t *kernel_x; const int16_t *kernel_x;
const int16_t *kernel_y; const int16_t *kernel_y;
int intermediate_height; int intermediate_height;
ref = CONVERT_TO_SHORTPTR(ref8);
kernel_x = av1_get_interp_filter_subpel_kernel(filter, subpel_x_q3 << 1); kernel_x = av1_get_interp_filter_subpel_kernel(filter, subpel_x_q3 << 1);
kernel_y = av1_get_interp_filter_subpel_kernel(filter, subpel_y_q3 << 1); kernel_y = av1_get_interp_filter_subpel_kernel(filter, subpel_y_q3 << 1);
intermediate_height = intermediate_height =
(((height - 1) * 8 + subpel_y_q3) >> 3) + filter.taps; (((height - 1) * 8 + subpel_y_q3) >> 3) + filter.taps;
assert(intermediate_height <= (MAX_SB_SIZE * 2 + 16) + 16); assert(intermediate_height <= (MAX_SB_SIZE * 2 + 16) + 16);
aom_highbd_convolve8_horiz( aom_highbd_convolve8_horiz(ref8 - ref_stride * ((filter.taps >> 1) - 1),
CONVERT_TO_BYTEPTR(ref - ref_stride * ((filter.taps >> 1) - 1)), ref_stride, CONVERT_TO_BYTEPTR(temp),
ref_stride, CONVERT_TO_BYTEPTR(temp), MAX_SB_SIZE, kernel_x, 16, NULL, MAX_SB_SIZE, kernel_x, 16, NULL, -1, width,
-1, width, intermediate_height, bd); intermediate_height, bd);
aom_highbd_convolve8_vert( aom_highbd_convolve8_vert(
CONVERT_TO_BYTEPTR(temp + MAX_SB_SIZE * ((filter.taps >> 1) - 1)), CONVERT_TO_BYTEPTR(temp + MAX_SB_SIZE * ((filter.taps >> 1) - 1)),
MAX_SB_SIZE, CONVERT_TO_BYTEPTR(comp_pred), width, NULL, -1, kernel_y, MAX_SB_SIZE, CONVERT_TO_BYTEPTR(comp_pred), width, NULL, -1, kernel_y,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment