Commit 674bffdc authored by Angie Chiang's avatar Angie Chiang
Browse files

Add rounding option into av1_convolve

Use a round flag in ConvolveParams to indicate if the destination buffer
has the result rounded by FILTER_BITS or not.
This CL is part of the goal of reducing interpolation rounding error in
compound prediction mode.

Change-Id: I49e522a89a67a771f5a6e7fbbc609e97923aecb6
parent 203b1d30
...@@ -9,6 +9,7 @@ print <<EOF ...@@ -9,6 +9,7 @@ print <<EOF
#include "av1/common/enums.h" #include "av1/common/enums.h"
#include "av1/common/quant_common.h" #include "av1/common/quant_common.h"
#include "av1/common/filter.h" #include "av1/common/filter.h"
#include "av1/common/convolve.h"
#include "av1/common/av1_txfm.h" #include "av1/common/av1_txfm.h"
struct macroblockd; struct macroblockd;
...@@ -41,10 +42,10 @@ if ($opts{arch} eq "x86_64") { ...@@ -41,10 +42,10 @@ if ($opts{arch} eq "x86_64") {
add_proto qw/void av1_convolve_init/, "void"; add_proto qw/void av1_convolve_init/, "void";
specialize qw/av1_convolve_init ssse3/; specialize qw/av1_convolve_init ssse3/;
add_proto qw/void av1_convolve_horiz/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, int avg"; add_proto qw/void av1_convolve_horiz/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params";
specialize qw/av1_convolve_horiz ssse3/; specialize qw/av1_convolve_horiz ssse3/;
add_proto qw/void av1_convolve_vert/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, int avg"; add_proto qw/void av1_convolve_vert/, "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, const InterpFilterParams fp, const int subpel_x_q4, int x_step_q4, ConvolveParams *conv_params";
specialize qw/av1_convolve_vert ssse3/; specialize qw/av1_convolve_vert ssse3/;
if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") { if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") {
......
...@@ -27,7 +27,8 @@ ...@@ -27,7 +27,8 @@
void av1_convolve_horiz_c(const uint8_t *src, int src_stride, uint8_t *dst, void av1_convolve_horiz_c(const uint8_t *src, int src_stride, uint8_t *dst,
int dst_stride, int w, int h, int dst_stride, int w, int h,
const InterpFilterParams filter_params, const InterpFilterParams filter_params,
const int subpel_x_q4, int x_step_q4, int avg) { const int subpel_x_q4, int x_step_q4,
ConvolveParams *conv_params) {
int x, y; int x, y;
int filter_size = filter_params.taps; int filter_size = filter_params.taps;
src -= filter_size / 2 - 1; src -= filter_size / 2 - 1;
...@@ -39,12 +40,15 @@ void av1_convolve_horiz_c(const uint8_t *src, int src_stride, uint8_t *dst, ...@@ -39,12 +40,15 @@ void av1_convolve_horiz_c(const uint8_t *src, int src_stride, uint8_t *dst,
filter_params, x_q4 & SUBPEL_MASK); filter_params, x_q4 & SUBPEL_MASK);
int k, sum = 0; int k, sum = 0;
for (k = 0; k < filter_size; ++k) sum += src_x[k] * x_filter[k]; for (k = 0; k < filter_size; ++k) sum += src_x[k] * x_filter[k];
if (avg) {
dst[x] = ROUND_POWER_OF_TWO( if (conv_params->round)
dst[x] + clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1); sum = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
} else {
dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); if (conv_params->ref)
} dst[x] = ROUND_POWER_OF_TWO(dst[x] + sum, 1);
else
dst[x] = sum;
x_q4 += x_step_q4; x_q4 += x_step_q4;
} }
src += src_stride; src += src_stride;
...@@ -55,7 +59,8 @@ void av1_convolve_horiz_c(const uint8_t *src, int src_stride, uint8_t *dst, ...@@ -55,7 +59,8 @@ void av1_convolve_horiz_c(const uint8_t *src, int src_stride, uint8_t *dst,
void av1_convolve_vert_c(const uint8_t *src, int src_stride, uint8_t *dst, void av1_convolve_vert_c(const uint8_t *src, int src_stride, uint8_t *dst,
int dst_stride, int w, int h, int dst_stride, int w, int h,
const InterpFilterParams filter_params, const InterpFilterParams filter_params,
const int subpel_y_q4, int y_step_q4, int avg) { const int subpel_y_q4, int y_step_q4,
ConvolveParams *conv_params) {
int x, y; int x, y;
int filter_size = filter_params.taps; int filter_size = filter_params.taps;
src -= src_stride * (filter_size / 2 - 1); src -= src_stride * (filter_size / 2 - 1);
...@@ -69,14 +74,15 @@ void av1_convolve_vert_c(const uint8_t *src, int src_stride, uint8_t *dst, ...@@ -69,14 +74,15 @@ void av1_convolve_vert_c(const uint8_t *src, int src_stride, uint8_t *dst,
int k, sum = 0; int k, sum = 0;
for (k = 0; k < filter_size; ++k) for (k = 0; k < filter_size; ++k)
sum += src_y[k * src_stride] * y_filter[k]; sum += src_y[k * src_stride] * y_filter[k];
if (avg) {
dst[y * dst_stride] = ROUND_POWER_OF_TWO( if (conv_params->round)
dst[y * dst_stride] + sum = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)),
1); if (conv_params->ref)
} else { dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] + sum, 1);
dst[y * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); else
} dst[y * dst_stride] = sum;
y_q4 += y_step_q4; y_q4 += y_step_q4;
} }
++src; ++src;
...@@ -85,20 +91,27 @@ void av1_convolve_vert_c(const uint8_t *src, int src_stride, uint8_t *dst, ...@@ -85,20 +91,27 @@ void av1_convolve_vert_c(const uint8_t *src, int src_stride, uint8_t *dst,
} }
static void convolve_copy(const uint8_t *src, int src_stride, uint8_t *dst, static void convolve_copy(const uint8_t *src, int src_stride, uint8_t *dst,
int dst_stride, int w, int h, int avg) { int dst_stride, int w, int h,
if (avg == 0) { ConvolveParams *conv_params) {
int r; if (conv_params->ref == 0) {
int r, c;
for (r = 0; r < h; ++r) { for (r = 0; r < h; ++r) {
memcpy(dst, src, w); memcpy(dst, src, w);
if (conv_params->round == 0)
for (c = 0; c < w; ++c) dst[c] = dst[c] << FILTER_BITS;
src += src_stride; src += src_stride;
dst += dst_stride; dst += dst_stride;
} }
} else { } else {
int r, c; int r, c;
for (r = 0; r < h; ++r) { for (r = 0; r < h; ++r) {
for (c = 0; c < w; ++c) { if (conv_params->round)
dst[c] = clip_pixel(ROUND_POWER_OF_TWO(dst[c] + src[c], 1)); for (c = 0; c < w; ++c)
} dst[c] = clip_pixel(ROUND_POWER_OF_TWO(dst[c] + src[c], 1));
else
for (c = 0; c < w; ++c)
dst[c] = clip_pixel(
ROUND_POWER_OF_TWO(dst[c] + (src[c] << FILTER_BITS), 1));
src += src_stride; src += src_stride;
dst += dst_stride; dst += dst_stride;
} }
...@@ -108,11 +121,12 @@ static void convolve_copy(const uint8_t *src, int src_stride, uint8_t *dst, ...@@ -108,11 +121,12 @@ static void convolve_copy(const uint8_t *src, int src_stride, uint8_t *dst,
void av1_convolve_horiz_facade(const uint8_t *src, int src_stride, uint8_t *dst, void av1_convolve_horiz_facade(const uint8_t *src, int src_stride, uint8_t *dst,
int dst_stride, int w, int h, int dst_stride, int w, int h,
const InterpFilterParams filter_params, const InterpFilterParams filter_params,
const int subpel_x_q4, int x_step_q4, int avg) { const int subpel_x_q4, int x_step_q4,
if (filter_params.taps == SUBPEL_TAPS) { ConvolveParams *conv_params) {
if (filter_params.taps == SUBPEL_TAPS && conv_params->round == 1) {
const int16_t *filter_x = const int16_t *filter_x =
av1_get_interp_filter_subpel_kernel(filter_params, subpel_x_q4); av1_get_interp_filter_subpel_kernel(filter_params, subpel_x_q4);
if (avg == 0) if (conv_params->ref == 0)
aom_convolve8_horiz(src, src_stride, dst, dst_stride, filter_x, x_step_q4, aom_convolve8_horiz(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
NULL, -1, w, h); NULL, -1, w, h);
else else
...@@ -120,18 +134,19 @@ void av1_convolve_horiz_facade(const uint8_t *src, int src_stride, uint8_t *dst, ...@@ -120,18 +134,19 @@ void av1_convolve_horiz_facade(const uint8_t *src, int src_stride, uint8_t *dst,
x_step_q4, NULL, -1, w, h); x_step_q4, NULL, -1, w, h);
} else { } else {
av1_convolve_horiz(src, src_stride, dst, dst_stride, w, h, filter_params, av1_convolve_horiz(src, src_stride, dst, dst_stride, w, h, filter_params,
subpel_x_q4, x_step_q4, avg); subpel_x_q4, x_step_q4, conv_params);
} }
} }
void av1_convolve_vert_facade(const uint8_t *src, int src_stride, uint8_t *dst, void av1_convolve_vert_facade(const uint8_t *src, int src_stride, uint8_t *dst,
int dst_stride, int w, int h, int dst_stride, int w, int h,
const InterpFilterParams filter_params, const InterpFilterParams filter_params,
const int subpel_y_q4, int y_step_q4, int avg) { const int subpel_y_q4, int y_step_q4,
if (filter_params.taps == SUBPEL_TAPS) { ConvolveParams *conv_params) {
if (filter_params.taps == SUBPEL_TAPS && conv_params->round == 1) {
const int16_t *filter_y = const int16_t *filter_y =
av1_get_interp_filter_subpel_kernel(filter_params, subpel_y_q4); av1_get_interp_filter_subpel_kernel(filter_params, subpel_y_q4);
if (avg == 0) { if (conv_params->ref == 0) {
aom_convolve8_vert(src, src_stride, dst, dst_stride, NULL, -1, filter_y, aom_convolve8_vert(src, src_stride, dst, dst_stride, NULL, -1, filter_y,
y_step_q4, w, h); y_step_q4, w, h);
} else { } else {
...@@ -140,7 +155,7 @@ void av1_convolve_vert_facade(const uint8_t *src, int src_stride, uint8_t *dst, ...@@ -140,7 +155,7 @@ void av1_convolve_vert_facade(const uint8_t *src, int src_stride, uint8_t *dst,
} }
} else { } else {
av1_convolve_vert(src, src_stride, dst, dst_stride, w, h, filter_params, av1_convolve_vert(src, src_stride, dst, dst_stride, w, h, filter_params,
subpel_y_q4, y_step_q4, avg); subpel_y_q4, y_step_q4, conv_params);
} }
} }
...@@ -152,7 +167,7 @@ void av1_convolve(const uint8_t *src, int src_stride, uint8_t *dst, ...@@ -152,7 +167,7 @@ void av1_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
const InterpFilter interp_filter, const InterpFilter interp_filter,
#endif #endif
const int subpel_x_q4, int x_step_q4, const int subpel_y_q4, const int subpel_x_q4, int x_step_q4, const int subpel_y_q4,
int y_step_q4, int ref_idx) { int y_step_q4, ConvolveParams *conv_params) {
int ignore_horiz = x_step_q4 == 16 && subpel_x_q4 == 0; int ignore_horiz = x_step_q4 == 16 && subpel_x_q4 == 0;
int ignore_vert = y_step_q4 == 16 && subpel_y_q4 == 0; int ignore_vert = y_step_q4 == 16 && subpel_y_q4 == 0;
...@@ -162,29 +177,31 @@ void av1_convolve(const uint8_t *src, int src_stride, uint8_t *dst, ...@@ -162,29 +177,31 @@ void av1_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
assert(x_step_q4 <= MAX_STEP); assert(x_step_q4 <= MAX_STEP);
if (ignore_horiz && ignore_vert) { if (ignore_horiz && ignore_vert) {
convolve_copy(src, src_stride, dst, dst_stride, w, h, ref_idx); convolve_copy(src, src_stride, dst, dst_stride, w, h, conv_params);
} else if (ignore_vert) { } else if (ignore_vert) {
#if CONFIG_DUAL_FILTER #if CONFIG_DUAL_FILTER
InterpFilterParams filter_params = InterpFilterParams filter_params =
av1_get_interp_filter_params(interp_filter[1 + 2 * ref_idx]); av1_get_interp_filter_params(interp_filter[1 + 2 * conv_params->ref]);
#else #else
InterpFilterParams filter_params = InterpFilterParams filter_params =
av1_get_interp_filter_params(interp_filter); av1_get_interp_filter_params(interp_filter);
#endif #endif
assert(filter_params.taps <= MAX_FILTER_TAP); assert(filter_params.taps <= MAX_FILTER_TAP);
av1_convolve_horiz_facade(src, src_stride, dst, dst_stride, w, h, av1_convolve_horiz_facade(src, src_stride, dst, dst_stride, w, h,
filter_params, subpel_x_q4, x_step_q4, ref_idx); filter_params, subpel_x_q4, x_step_q4,
conv_params);
} else if (ignore_horiz) { } else if (ignore_horiz) {
#if CONFIG_DUAL_FILTER #if CONFIG_DUAL_FILTER
InterpFilterParams filter_params = InterpFilterParams filter_params =
av1_get_interp_filter_params(interp_filter[2 * ref_idx]); av1_get_interp_filter_params(interp_filter[2 * conv_params->ref]);
#else #else
InterpFilterParams filter_params = InterpFilterParams filter_params =
av1_get_interp_filter_params(interp_filter); av1_get_interp_filter_params(interp_filter);
#endif #endif
assert(filter_params.taps <= MAX_FILTER_TAP); assert(filter_params.taps <= MAX_FILTER_TAP);
av1_convolve_vert_facade(src, src_stride, dst, dst_stride, w, h, av1_convolve_vert_facade(src, src_stride, dst, dst_stride, w, h,
filter_params, subpel_y_q4, y_step_q4, ref_idx); filter_params, subpel_y_q4, y_step_q4,
conv_params);
} else { } else {
// temp's size is set to a 256 aligned value to facilitate SIMD // temp's size is set to a 256 aligned value to facilitate SIMD
// implementation. The value is greater than (maximum possible intermediate // implementation. The value is greater than (maximum possible intermediate
...@@ -196,11 +213,11 @@ void av1_convolve(const uint8_t *src, int src_stride, uint8_t *dst, ...@@ -196,11 +213,11 @@ void av1_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
InterpFilterParams filter_params; InterpFilterParams filter_params;
#if CONFIG_DUAL_FILTER #if CONFIG_DUAL_FILTER
InterpFilterParams filter_params_x = InterpFilterParams filter_params_x =
av1_get_interp_filter_params(interp_filter[1 + 2 * ref_idx]); av1_get_interp_filter_params(interp_filter[1 + 2 * conv_params->ref]);
InterpFilterParams filter_params_y = InterpFilterParams filter_params_y =
av1_get_interp_filter_params(interp_filter[0 + 2 * ref_idx]); av1_get_interp_filter_params(interp_filter[0 + 2 * conv_params->ref]);
if (interp_filter[0 + 2 * ref_idx] == MULTITAP_SHARP && if (interp_filter[0 + 2 * conv_params->ref] == MULTITAP_SHARP &&
interp_filter[1 + 2 * ref_idx] == MULTITAP_SHARP) { interp_filter[1 + 2 * conv_params->ref] == MULTITAP_SHARP) {
// Avoid two directions both using 12-tap filter. // Avoid two directions both using 12-tap filter.
// This will reduce hardware implementation cost. // This will reduce hardware implementation cost.
filter_params_y = av1_get_interp_filter_params(EIGHTTAP_SHARP); filter_params_y = av1_get_interp_filter_params(EIGHTTAP_SHARP);
...@@ -211,6 +228,9 @@ void av1_convolve(const uint8_t *src, int src_stride, uint8_t *dst, ...@@ -211,6 +228,9 @@ void av1_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
if (filter_params_y.taps < filter_params_x.taps) { if (filter_params_y.taps < filter_params_x.taps) {
int intermediate_width; int intermediate_width;
int temp_stride = max_intermediate_size; int temp_stride = max_intermediate_size;
ConvolveParams temp_conv_params;
temp_conv_params.ref = 0;
temp_conv_params.round = 1;
filter_params = filter_params_y; filter_params = filter_params_y;
filter_size = filter_params_x.taps; filter_size = filter_params_x.taps;
intermediate_width = intermediate_width =
...@@ -221,19 +241,23 @@ void av1_convolve(const uint8_t *src, int src_stride, uint8_t *dst, ...@@ -221,19 +241,23 @@ void av1_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
av1_convolve_vert_facade(src - (filter_size / 2 - 1), src_stride, temp, av1_convolve_vert_facade(src - (filter_size / 2 - 1), src_stride, temp,
temp_stride, intermediate_width, h, temp_stride, intermediate_width, h,
filter_params, subpel_y_q4, y_step_q4, 0); filter_params, subpel_y_q4, y_step_q4,
&temp_conv_params);
filter_params = filter_params_x; filter_params = filter_params_x;
assert(filter_params.taps <= MAX_FILTER_TAP); assert(filter_params.taps <= MAX_FILTER_TAP);
av1_convolve_horiz_facade(temp + (filter_size / 2 - 1), temp_stride, dst, av1_convolve_horiz_facade(temp + (filter_size / 2 - 1), temp_stride, dst,
dst_stride, w, h, filter_params, subpel_x_q4, dst_stride, w, h, filter_params, subpel_x_q4,
x_step_q4, ref_idx); x_step_q4, conv_params);
} else } else
#endif // CONFIG_DUAL_FILTER #endif // CONFIG_DUAL_FILTER
{ {
int intermediate_height; int intermediate_height;
int temp_stride = MAX_SB_SIZE; int temp_stride = MAX_SB_SIZE;
ConvolveParams temp_conv_params;
temp_conv_params.ref = 0;
temp_conv_params.round = 1;
#if CONFIG_DUAL_FILTER #if CONFIG_DUAL_FILTER
filter_params = filter_params_x; filter_params = filter_params_x;
filter_size = filter_params_y.taps; filter_size = filter_params_y.taps;
...@@ -251,16 +275,16 @@ void av1_convolve(const uint8_t *src, int src_stride, uint8_t *dst, ...@@ -251,16 +275,16 @@ void av1_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
av1_convolve_horiz_facade(src - src_stride * (filter_size / 2 - 1), av1_convolve_horiz_facade(src - src_stride * (filter_size / 2 - 1),
src_stride, temp, temp_stride, w, src_stride, temp, temp_stride, w,
intermediate_height, filter_params, subpel_x_q4, intermediate_height, filter_params, subpel_x_q4,
x_step_q4, 0); x_step_q4, &temp_conv_params);
#if CONFIG_DUAL_FILTER #if CONFIG_DUAL_FILTER
filter_params = filter_params_y; filter_params = filter_params_y;
#endif #endif
assert(filter_params.taps <= MAX_FILTER_TAP); assert(filter_params.taps <= MAX_FILTER_TAP);
av1_convolve_vert_facade(temp + temp_stride * (filter_size / 2 - 1), av1_convolve_vert_facade(
temp_stride, dst, dst_stride, w, h, temp + temp_stride * (filter_size / 2 - 1), temp_stride, dst,
filter_params, subpel_y_q4, y_step_q4, ref_idx); dst_stride, w, h, filter_params, subpel_y_q4, y_step_q4, conv_params);
} }
} }
} }
......
...@@ -17,6 +17,11 @@ ...@@ -17,6 +17,11 @@
extern "C" { extern "C" {
#endif #endif
typedef struct ConvolveParams {
int ref;
int round;
} ConvolveParams;
void av1_convolve(const uint8_t *src, int src_stride, uint8_t *dst, void av1_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
int dst_stride, int w, int h, int dst_stride, int w, int h,
#if CONFIG_DUAL_FILTER #if CONFIG_DUAL_FILTER
...@@ -25,7 +30,7 @@ void av1_convolve(const uint8_t *src, int src_stride, uint8_t *dst, ...@@ -25,7 +30,7 @@ void av1_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
const InterpFilter interp_filter, const InterpFilter interp_filter,
#endif #endif
const int subpel_x, int xstep, const int subpel_y, int ystep, const int subpel_x, int xstep, const int subpel_y, int ystep,
int avg); ConvolveParams *conv_params);
#if CONFIG_AOM_HIGHBITDEPTH #if CONFIG_AOM_HIGHBITDEPTH
void av1_highbd_convolve(const uint8_t *src, int src_stride, uint8_t *dst, void av1_highbd_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
......
...@@ -62,8 +62,11 @@ static INLINE void inter_predictor(const uint8_t *src, int src_stride, ...@@ -62,8 +62,11 @@ static INLINE void inter_predictor(const uint8_t *src, int src_stride,
// ref_idx > 0 means this is the second reference frame // ref_idx > 0 means this is the second reference frame
// first reference frame's prediction result is already in dst // first reference frame's prediction result is already in dst
// therefore we need to average the first and second results // therefore we need to average the first and second results
ConvolveParams conv_params;
conv_params.round = 1;
conv_params.ref = ref_idx;
av1_convolve(src, src_stride, dst, dst_stride, w, h, interp_filter, av1_convolve(src, src_stride, dst, dst_stride, w, h, interp_filter,
subpel_x, xs, subpel_y, ys, ref_idx); subpel_x, xs, subpel_y, ys, &conv_params);
} }
} }
......
...@@ -667,16 +667,17 @@ static void filter_horiz_v4p_ssse3(const uint8_t *src_ptr, ptrdiff_t src_pitch, ...@@ -667,16 +667,17 @@ static void filter_horiz_v4p_ssse3(const uint8_t *src_ptr, ptrdiff_t src_pitch,
void av1_convolve_horiz_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, void av1_convolve_horiz_ssse3(const uint8_t *src, int src_stride, uint8_t *dst,
int dst_stride, int w, int h, int dst_stride, int w, int h,
const InterpFilterParams filter_params, const InterpFilterParams filter_params,
const int subpel_x_q4, int x_step_q4, int avg) { const int subpel_x_q4, int x_step_q4,
ConvolveParams *conv_params) {
DECLARE_ALIGNED(16, uint16_t, temp[8 * 8]); DECLARE_ALIGNED(16, uint16_t, temp[8 * 8]);
__m128i verf[6]; __m128i verf[6];
__m128i horf[2]; __m128i horf[2];
SubpelFilterCoeffs hCoeffs, vCoeffs; SubpelFilterCoeffs hCoeffs, vCoeffs;
const uint8_t *src_ptr; const uint8_t *src_ptr;
store_pixel_t store2p = store2pixelTab[avg]; store_pixel_t store2p = store2pixelTab[conv_params->ref];
store_pixel_t store4p = store4pixelTab[avg]; store_pixel_t store4p = store4pixelTab[conv_params->ref];
transpose_to_dst_t transpose_4x4 = trans4x4Tab[avg]; transpose_to_dst_t transpose_4x4 = trans4x4Tab[conv_params->ref];
transpose_to_dst_t transpose_8x8 = trans8x8Tab[avg]; transpose_to_dst_t transpose_8x8 = trans8x8Tab[conv_params->ref];
const int tapsNum = filter_params.taps; const int tapsNum = filter_params.taps;
int block_height, block_residu; int block_height, block_residu;
...@@ -685,7 +686,7 @@ void av1_convolve_horiz_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, ...@@ -685,7 +686,7 @@ void av1_convolve_horiz_ssse3(const uint8_t *src, int src_stride, uint8_t *dst,
if (0 == subpel_x_q4 || 16 != x_step_q4) { if (0 == subpel_x_q4 || 16 != x_step_q4) {
av1_convolve_horiz_c(src, src_stride, dst, dst_stride, w, h, filter_params, av1_convolve_horiz_c(src, src_stride, dst, dst_stride, w, h, filter_params,
subpel_x_q4, x_step_q4, avg); subpel_x_q4, x_step_q4, conv_params);
return; return;
} }
...@@ -694,7 +695,7 @@ void av1_convolve_horiz_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, ...@@ -694,7 +695,7 @@ void av1_convolve_horiz_ssse3(const uint8_t *src, int src_stride, uint8_t *dst,
if (!hCoeffs || !vCoeffs) { if (!hCoeffs || !vCoeffs) {
av1_convolve_horiz_c(src, src_stride, dst, dst_stride, w, h, filter_params, av1_convolve_horiz_c(src, src_stride, dst, dst_stride, w, h, filter_params,
subpel_x_q4, x_step_q4, avg); subpel_x_q4, x_step_q4, conv_params);
return; return;
} }
...@@ -881,19 +882,20 @@ static void filter_vert_compute_large(const uint8_t *src, int src_stride, ...@@ -881,19 +882,20 @@ static void filter_vert_compute_large(const uint8_t *src, int src_stride,
void av1_convolve_vert_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, void av1_convolve_vert_ssse3(const uint8_t *src, int src_stride, uint8_t *dst,
int dst_stride, int w, int h, int dst_stride, int w, int h,
const InterpFilterParams filter_params, const InterpFilterParams filter_params,
const int subpel_y_q4, int y_step_q4, int avg) { const int subpel_y_q4, int y_step_q4,
ConvolveParams *conv_params) {
__m128i verf[6]; __m128i verf[6];
SubpelFilterCoeffs vCoeffs; SubpelFilterCoeffs vCoeffs;
const uint8_t *src_ptr; const uint8_t *src_ptr;
uint8_t *dst_ptr = dst; uint8_t *dst_ptr = dst;
store_pixel_t store2p = store2pixelTab[avg]; store_pixel_t store2p = store2pixelTab[conv_params->ref];
store_pixel_t store4p = store4pixelTab[avg]; store_pixel_t store4p = store4pixelTab[conv_params->ref];
store_pixel_t store8p = store8pixelTab[avg]; store_pixel_t store8p = store8pixelTab[conv_params->ref];
const int tapsNum = filter_params.taps; const int tapsNum = filter_params.taps;
if (0 == subpel_y_q4 || 16 != y_step_q4) { if (0 == subpel_y_q4 || 16 != y_step_q4) {
av1_convolve_vert_c(src, src_stride, dst, dst_stride, w, h, filter_params, av1_convolve_vert_c(src, src_stride, dst, dst_stride, w, h, filter_params,
subpel_y_q4, y_step_q4, avg); subpel_y_q4, y_step_q4, conv_params);
return; return;
} }
...@@ -901,7 +903,7 @@ void av1_convolve_vert_ssse3(const uint8_t *src, int src_stride, uint8_t *dst, ...@@ -901,7 +903,7 @@ void av1_convolve_vert_ssse3(const uint8_t *src, int src_stride, uint8_t *dst,
if (!vCoeffs) { if (!vCoeffs) {
av1_convolve_vert_c(src, src_stride, dst, dst_stride, w, h, filter_params, av1_convolve_vert_c(src, src_stride, dst, dst_stride, w, h, filter_params,
subpel_y_q4, y_step_q4, avg); subpel_y_q4, y_step_q4, conv_params);
return; return;
} }
......
...@@ -23,7 +23,8 @@ using std::tr1::tuple; ...@@ -23,7 +23,8 @@ using std::tr1::tuple;
using libaom_test::ACMRandom; using libaom_test::ACMRandom;
typedef void (*conv_filter_t)(const uint8_t *, int, uint8_t *, int, int, int, typedef void (*conv_filter_t)(const uint8_t *, int, uint8_t *, int, int, int,
const InterpFilterParams, const int, int, int); const InterpFilterParams, const int, int,
ConvolveParams *);
#if CONFIG_AOM_HIGHBITDEPTH #if CONFIG_AOM_HIGHBITDEPTH
typedef void (*hbd_conv_filter_t)(const uint16_t *, int, uint16_t *, int, int, typedef void (*hbd_conv_filter_t)(const uint16_t *, int, uint16_t *, int, int,
int, const InterpFilterParams, const int, int, int, const InterpFilterParams, const int, int,
...@@ -68,7 +69,8 @@ class AV1ConvolveOptimzTest : public ::testing::TestWithParam<ConvParams> { ...@@ -68,7 +69,8 @@ class AV1ConvolveOptimzTest : public ::testing::TestWithParam<ConvParams> {
height_ = std::tr1::get<1>(block); height_ = std::tr1::get<1>(block);
filter_ = GET_PARAM(3); filter_ = GET_PARAM(3);
subpel_ = GET_PARAM(4); subpel_ = GET_PARAM(4);
avg_ = GET_PARAM(5); conv_params_.round = 1;
conv_params_.ref = GET_PARAM(5);
alloc_ = new uint8_t[maxBlockSize * 4]; alloc_ = new uint8_t[maxBlockSize * 4];
src_ = alloc_ + (vertiOffset * maxWidth); src_ = alloc_ + (vertiOffset * maxWidth);
...@@ -102,7 +104,7 @@ class AV1ConvolveOptimzTest : public ::testing::TestWithParam<ConvParams> { ...@@ -102,7 +104,7 @@ class AV1ConvolveOptimzTest : public ::testing::TestWithParam<ConvParams> {
int height_; int height_;
int filter_; int filter_;
int subpel_; int subpel_;
int avg_; ConvolveParams conv_params_;
}; };