Commit 940c22a2 authored by Yunqing Wang's avatar Yunqing Wang

Modify convolve function prototype

This patch modified convolve function prototype. The temp buffer used in
convolve function is already available in conv_params, which doesn't need
to be passed in. Instead, pass in the destination buffer so that the
result can be written in the destination directly for single ref case.
More patch will be followed.

Change-Id: Ib28dc3ba5783a1034c70570d78fa8c8af7cbed7c
parent 5891f98f
......@@ -585,26 +585,26 @@ if (aom_config("CONFIG_LOOP_RESTORATION") eq "yes") {
}
# CONVOLVE_ROUND/COMPOUND_ROUND functions
add_proto qw/void av1_convolve_2d/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
add_proto qw/void av1_convolve_2d/, "const uint8_t *src, int src_stride, const uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
specialize qw/av1_convolve_2d sse2 avx2/;
add_proto qw/void av1_convolve_rounding/, "const int32_t *src, int src_stride, uint8_t *dst, int dst_stride, int w, int h, int bits";
specialize qw/av1_convolve_rounding avx2/;
add_proto qw/void av1_convolve_2d_copy/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
add_proto qw/void av1_convolve_2d_copy/, "const uint8_t *src, int src_stride, const uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
specialize qw/av1_convolve_2d_copy sse2/;
add_proto qw/void av1_convolve_x/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
add_proto qw/void av1_convolve_x/, "const uint8_t *src, int src_stride, const uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
specialize qw/av1_convolve_x sse2/;
add_proto qw/void av1_convolve_y/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
add_proto qw/void av1_convolve_y/, "const uint8_t *src, int src_stride, const uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
specialize qw/av1_convolve_y sse2/;
add_proto qw/void av1_convolve_2d_scale/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params";
specialize qw/av1_convolve_2d_scale sse4_1/;
if (aom_config("CONFIG_JNT_COMP") eq "yes") {
add_proto qw/void av1_jnt_convolve_2d/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
add_proto qw/void av1_jnt_convolve_2d/, "const uint8_t *src, int src_stride, const uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
specialize qw/av1_jnt_convolve_2d sse4_1/;
add_proto qw/void av1_jnt_convolve_2d_copy/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
add_proto qw/void av1_jnt_convolve_2d_copy/, "const uint8_t *src, int src_stride, const uint8_t *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
specialize qw/av1_jnt_convolve_2d_copy sse2/;
}
......
......@@ -373,18 +373,22 @@ void av1_convolve_rounding_c(const int32_t *src, int src_stride, uint8_t *dst,
bit widths for various intermediate values, see the comments above
av1_warp_affine_c.
*/
void av1_convolve_2d_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst,
int dst_stride, int w, int h,
void av1_convolve_2d_c(const uint8_t *src, int src_stride, const uint8_t *dst0,
int dst_stride0, int w, int h,
InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params) {
CONV_BUF_TYPE *dst = conv_params->dst;
int dst_stride = conv_params->dst_stride;
int32_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
int im_h = h + filter_params_y->taps - 1;
int im_stride = w;
const int fo_vert = filter_params_y->taps / 2 - 1;
const int fo_horiz = filter_params_x->taps / 2 - 1;
const int bd = 8;
(void)dst0;
(void)dst_stride0;
// horizontal filter
const uint8_t *src_horiz = src - fo_vert * src_stride;
......@@ -425,16 +429,20 @@ void av1_convolve_2d_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst,
}
}
void av1_convolve_y_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst,
int dst_stride, int w, int h,
void av1_convolve_y_c(const uint8_t *src, int src_stride, const uint8_t *dst0,
int dst_stride0, int w, int h,
InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params) {
CONV_BUF_TYPE *dst = conv_params->dst;
int dst_stride = conv_params->dst_stride;
const int fo_vert = filter_params_y->taps / 2 - 1;
const int bits = FILTER_BITS - conv_params->round_0 - conv_params->round_1;
(void)filter_params_x;
(void)subpel_x_q4;
(void)dst0;
(void)dst_stride0;
// vertical filter
const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
......@@ -454,16 +462,20 @@ void av1_convolve_y_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst,
}
}
void av1_convolve_x_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst,
int dst_stride, int w, int h,
void av1_convolve_x_c(const uint8_t *src, int src_stride, const uint8_t *dst0,
int dst_stride0, int w, int h,
InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params) {
CONV_BUF_TYPE *dst = conv_params->dst;
int dst_stride = conv_params->dst_stride;
const int fo_horiz = filter_params_x->taps / 2 - 1;
const int bits = FILTER_BITS - conv_params->round_1;
(void)filter_params_y;
(void)subpel_y_q4;
(void)dst0;
(void)dst_stride0;
// horizontal filter
const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
......@@ -484,11 +496,13 @@ void av1_convolve_x_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst,
}
void av1_convolve_2d_copy_c(const uint8_t *src, int src_stride,
CONV_BUF_TYPE *dst, int dst_stride, int w, int h,
const uint8_t *dst0, int dst_stride0, int w, int h,
InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params) {
CONV_BUF_TYPE *dst = conv_params->dst;
int dst_stride = conv_params->dst_stride;
const int bits =
FILTER_BITS * 2 - conv_params->round_1 - conv_params->round_0;
......@@ -496,6 +510,8 @@ void av1_convolve_2d_copy_c(const uint8_t *src, int src_stride,
(void)filter_params_y;
(void)subpel_x_q4;
(void)subpel_y_q4;
(void)dst0;
(void)dst_stride0;
for (int y = 0; y < h; ++y) {
for (int x = 0; x < w; ++x) {
......@@ -510,17 +526,21 @@ void av1_convolve_2d_copy_c(const uint8_t *src, int src_stride,
#if CONFIG_JNT_COMP
void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride,
CONV_BUF_TYPE *dst, int dst_stride, int w, int h,
const uint8_t *dst0, int dst_stride0, int w, int h,
InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params) {
CONV_BUF_TYPE *dst = conv_params->dst;
int dst_stride = conv_params->dst_stride;
int32_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
int im_h = h + filter_params_y->taps - 1;
int im_stride = w;
const int fo_vert = filter_params_y->taps / 2 - 1;
const int fo_horiz = filter_params_x->taps / 2 - 1;
const int bd = 8;
(void)dst0;
(void)dst_stride0;
// horizontal filter
const uint8_t *src_horiz = src - fo_vert * src_stride;
......@@ -573,11 +593,13 @@ void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride,
}
void av1_jnt_convolve_2d_copy_c(const uint8_t *src, int src_stride,
CONV_BUF_TYPE *dst, int dst_stride, int w,
const uint8_t *dst0, int dst_stride0, int w,
int h, InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params) {
CONV_BUF_TYPE *dst = conv_params->dst;
int dst_stride = conv_params->dst_stride;
const int bits =
FILTER_BITS * 2 - conv_params->round_1 - conv_params->round_0;
......@@ -585,6 +607,8 @@ void av1_jnt_convolve_2d_copy_c(const uint8_t *src, int src_stride,
(void)filter_params_y;
(void)subpel_x_q4;
(void)subpel_y_q4;
(void)dst0;
(void)dst_stride0;
for (int y = 0; y < h; ++y) {
for (int x = 0; x < w; ++x) {
......@@ -712,111 +736,59 @@ void av1_convolve_2d_facade(const uint8_t *src, int src_stride, uint8_t *dst,
&filter_params_y);
#endif
if (filter_params_y.taps < filter_params_x.taps) {
uint8_t tr_src[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) *
(MAX_SB_SIZE + MAX_FILTER_TAP - 1)];
int tr_src_stride = MAX_SB_SIZE + MAX_FILTER_TAP - 1;
CONV_BUF_TYPE tr_dst[MAX_SB_SIZE * MAX_SB_SIZE];
int tr_dst_stride = MAX_SB_SIZE;
int fo_vert = filter_params_y.taps / 2 - 1;
int fo_horiz = filter_params_x.taps / 2 - 1;
transpose_uint8(tr_src, tr_src_stride,
src - fo_vert * src_stride - fo_horiz, src_stride,
w + filter_params_x.taps - 1, h + filter_params_y.taps - 1);
transpose_int32(tr_dst, tr_dst_stride, conv_params->dst,
conv_params->dst_stride, w, h);
// horizontal and vertical parameters are swapped because of the transpose
#if CONFIG_JNT_COMP
if (scaled)
av1_convolve_2d_scale(tr_src + fo_horiz * tr_src_stride + fo_vert,
tr_src_stride, tr_dst, tr_dst_stride, h, w,
&filter_params_y, &filter_params_x, subpel_y_q4,
y_step_q4, subpel_x_q4, x_step_q4, conv_params);
else
av1_jnt_convolve_2d(tr_src + fo_horiz * tr_src_stride + fo_vert,
tr_src_stride, tr_dst, tr_dst_stride, h, w,
&filter_params_y, &filter_params_x, subpel_y_q4,
subpel_x_q4, conv_params);
#else
if (scaled)
av1_convolve_2d_scale(tr_src + fo_horiz * tr_src_stride + fo_vert,
tr_src_stride, tr_dst, tr_dst_stride, h, w,
&filter_params_y, &filter_params_x, subpel_y_q4,
y_step_q4, subpel_x_q4, x_step_q4, conv_params);
else
av1_convolve_2d(tr_src + fo_horiz * tr_src_stride + fo_vert,
tr_src_stride, tr_dst, tr_dst_stride, h, w,
&filter_params_y, &filter_params_x, subpel_y_q4,
subpel_x_q4, conv_params);
#endif // CONFIG_JNT_COMP
transpose_int32(conv_params->dst, conv_params->dst_stride, tr_dst,
tr_dst_stride, h, w);
if (scaled) {
av1_convolve_2d_scale(src, src_stride, conv_params->dst,
conv_params->dst_stride, w, h, &filter_params_x,
&filter_params_y, subpel_x_q4, x_step_q4, subpel_y_q4,
y_step_q4, conv_params);
} else {
#if CONFIG_JNT_COMP
if (scaled) {
av1_convolve_2d_scale(src, src_stride, conv_params->dst,
conv_params->dst_stride, w, h, &filter_params_x,
&filter_params_y, subpel_x_q4, x_step_q4,
subpel_y_q4, y_step_q4, conv_params);
if (subpel_x_q4 == 0 && subpel_y_q4 == 0) {
av1_jnt_convolve_2d_copy(src, src_stride, dst, dst_stride, w, h,
&filter_params_x, &filter_params_y, subpel_x_q4,
subpel_y_q4, conv_params);
} else if (subpel_x_q4 == 0) {
// place holder
av1_jnt_convolve_2d(src, src_stride, dst, dst_stride, w, h,
&filter_params_x, &filter_params_y, subpel_x_q4,
subpel_y_q4, conv_params);
} else if (subpel_y_q4 == 0) {
// place holder
av1_jnt_convolve_2d(src, src_stride, dst, dst_stride, w, h,
&filter_params_x, &filter_params_y, subpel_x_q4,
subpel_y_q4, conv_params);
} else {
if (subpel_x_q4 == 0 && subpel_y_q4 == 0) {
av1_jnt_convolve_2d_copy(src, src_stride, conv_params->dst,
conv_params->dst_stride, w, h,
&filter_params_x, &filter_params_y,
subpel_x_q4, subpel_y_q4, conv_params);
} else if (subpel_x_q4 == 0) {
// place holder
av1_jnt_convolve_2d(src, src_stride, conv_params->dst,
conv_params->dst_stride, w, h, &filter_params_x,
&filter_params_y, subpel_x_q4, subpel_y_q4,
conv_params);
} else if (subpel_y_q4 == 0) {
// place holder
av1_jnt_convolve_2d(src, src_stride, conv_params->dst,
conv_params->dst_stride, w, h, &filter_params_x,
&filter_params_y, subpel_x_q4, subpel_y_q4,
conv_params);
} else {
av1_jnt_convolve_2d(src, src_stride, conv_params->dst,
conv_params->dst_stride, w, h, &filter_params_x,
&filter_params_y, subpel_x_q4, subpel_y_q4,
conv_params);
}
av1_jnt_convolve_2d(src, src_stride, dst, dst_stride, w, h,
&filter_params_x, &filter_params_y, subpel_x_q4,
subpel_y_q4, conv_params);
}
}
#else
if (scaled) {
av1_convolve_2d_scale(src, src_stride, conv_params->dst,
conv_params->dst_stride, w, h, &filter_params_x,
&filter_params_y, subpel_x_q4, x_step_q4,
subpel_y_q4, y_step_q4, conv_params);
if (scaled) {
av1_convolve_2d_scale(src, src_stride, conv_params->dst,
conv_params->dst_stride, w, h, &filter_params_x,
&filter_params_y, subpel_x_q4, x_step_q4, subpel_y_q4,
y_step_q4, conv_params);
} else {
// Special case convolve functions should produce the same result as
// av1_convolve_2d.
if (subpel_x_q4 == 0 && subpel_y_q4 == 0) {
av1_convolve_2d_copy(src, src_stride, dst, dst_stride, w, h,
&filter_params_x, &filter_params_y, subpel_x_q4,
subpel_y_q4, conv_params);
} else if (subpel_x_q4 == 0) {
av1_convolve_y(src, src_stride, dst, dst_stride, w, h, &filter_params_x,
&filter_params_y, subpel_x_q4, subpel_y_q4, conv_params);
} else if (subpel_y_q4 == 0) {
av1_convolve_x(src, src_stride, dst, dst_stride, w, h, &filter_params_x,
&filter_params_y, subpel_x_q4, subpel_y_q4, conv_params);
} else {
// Special case convolve functions should produce the same result as
// av1_convolve_2d.
if (subpel_x_q4 == 0 && subpel_y_q4 == 0) {
av1_convolve_2d_copy(src, src_stride, conv_params->dst,
conv_params->dst_stride, w, h, &filter_params_x,
&filter_params_y, subpel_x_q4, subpel_y_q4,
conv_params);
} else if (subpel_x_q4 == 0) {
av1_convolve_y(src, src_stride, conv_params->dst,
conv_params->dst_stride, w, h, &filter_params_x,
&filter_params_y, subpel_x_q4, subpel_y_q4, conv_params);
} else if (subpel_y_q4 == 0) {
av1_convolve_x(src, src_stride, conv_params->dst,
conv_params->dst_stride, w, h, &filter_params_x,
&filter_params_y, subpel_x_q4, subpel_y_q4, conv_params);
} else {
// subpel_x_q4 != 0 && subpel_y_q4 != 0
av1_convolve_2d(src, src_stride, conv_params->dst,
conv_params->dst_stride, w, h, &filter_params_x,
&filter_params_y, subpel_x_q4, subpel_y_q4,
conv_params);
}
// subpel_x_q4 != 0 && subpel_y_q4 != 0
av1_convolve_2d(src, src_stride, dst, dst_stride, w, h, &filter_params_x,
&filter_params_y, subpel_x_q4, subpel_y_q4, conv_params);
}
#endif // CONFIG_JNT_COMP
}
#endif // CONFIG_JNT_COMP
}
#if CONFIG_HIGHBITDEPTH
......
......@@ -18,12 +18,16 @@
#include "av1/common/convolve.h"
void av1_convolve_2d_avx2(const uint8_t *src, int src_stride,
CONV_BUF_TYPE *dst, int dst_stride, int w, int h,
const uint8_t *dst0, int dst_stride0, int w, int h,
InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params) {
CONV_BUF_TYPE *dst = conv_params->dst;
int dst_stride = conv_params->dst_stride;
const int bd = 8;
(void)dst0;
(void)dst_stride0;
DECLARE_ALIGNED(32, int16_t,
im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE]);
......
......@@ -18,12 +18,16 @@
#include "av1/common/convolve.h"
void av1_convolve_2d_sse2(const uint8_t *src, int src_stride,
CONV_BUF_TYPE *dst, int dst_stride, int w, int h,
const uint8_t *dst0, int dst_stride0, int w, int h,
InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params) {
CONV_BUF_TYPE *dst = conv_params->dst;
int dst_stride = conv_params->dst_stride;
const int bd = 8;
(void)dst0;
(void)dst_stride0;
DECLARE_ALIGNED(16, int16_t,
im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE]);
......@@ -201,15 +205,19 @@ void av1_convolve_2d_sse2(const uint8_t *src, int src_stride,
}
void av1_convolve_2d_copy_sse2(const uint8_t *src, int src_stride,
CONV_BUF_TYPE *dst, int dst_stride, int w, int h,
InterpFilterParams *filter_params_x,
const uint8_t *dst0, int dst_stride0, int w,
int h, InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params) {
CONV_BUF_TYPE *dst = conv_params->dst;
int dst_stride = conv_params->dst_stride;
(void)filter_params_x;
(void)filter_params_y;
(void)subpel_x_q4;
(void)subpel_y_q4;
(void)dst0;
(void)dst_stride0;
const int bits =
FILTER_BITS * 2 - conv_params->round_1 - conv_params->round_0;
......@@ -314,15 +322,19 @@ void av1_convolve_2d_copy_sse2(const uint8_t *src, int src_stride,
#if CONFIG_JNT_COMP
void av1_jnt_convolve_2d_copy_sse2(const uint8_t *src, int src_stride,
CONV_BUF_TYPE *dst, int dst_stride, int w,
const uint8_t *dst0, int dst_stride0, int w,
int h, InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params) {
CONV_BUF_TYPE *dst = conv_params->dst;
int dst_stride = conv_params->dst_stride;
(void)filter_params_x;
(void)filter_params_y;
(void)subpel_x_q4;
(void)subpel_y_q4;
(void)dst0;
(void)dst_stride0;
const int bits =
FILTER_BITS * 2 - conv_params->round_1 - conv_params->round_0;
......
......@@ -20,12 +20,16 @@
#if CONFIG_JNT_COMP
void av1_jnt_convolve_2d_sse4_1(const uint8_t *src, int src_stride,
CONV_BUF_TYPE *dst, int dst_stride, int w,
const uint8_t *dst0, int dst_stride0, int w,
int h, InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params) {
CONV_BUF_TYPE *dst = conv_params->dst;
int dst_stride = conv_params->dst_stride;
const int bd = 8;
(void)dst0;
(void)dst_stride0;
DECLARE_ALIGNED(16, int16_t,
im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE]);
......
......@@ -17,12 +17,14 @@
#include "aom_dsp/aom_filter.h"
#include "av1/common/convolve.h"
void av1_convolve_y_sse2(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst,
int dst_stride, int w, int h,
void av1_convolve_y_sse2(const uint8_t *src, int src_stride,
const uint8_t *dst0, int dst_stride0, int w, int h,
InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params) {
CONV_BUF_TYPE *dst = conv_params->dst;
int dst_stride = conv_params->dst_stride;
int i, j;
const int fo_vert = filter_params_y->taps / 2 - 1;
const int do_average = conv_params->do_average;
......@@ -33,6 +35,8 @@ void av1_convolve_y_sse2(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst,
(void)filter_params_x;
(void)subpel_x_q4;
(void)dst0;
(void)dst_stride0;
const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_y, subpel_y_q4 & SUBPEL_MASK);
......@@ -114,12 +118,14 @@ void av1_convolve_y_sse2(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst,
}
}
void av1_convolve_x_sse2(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst,
int dst_stride, int w, int h,
void av1_convolve_x_sse2(const uint8_t *src, int src_stride,
const uint8_t *dst0, int dst_stride0, int w, int h,
InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params) {
CONV_BUF_TYPE *dst = conv_params->dst;
int dst_stride = conv_params->dst_stride;
int i, j;
const int fo_horiz = filter_params_x->taps / 2 - 1;
const int do_average = conv_params->do_average;
......@@ -130,6 +136,8 @@ void av1_convolve_x_sse2(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst,
(void)filter_params_y;
(void)subpel_y_q4;
(void)dst0;
(void)dst_stride0;
const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_x, subpel_x_q4 & SUBPEL_MASK);
......
......@@ -87,10 +87,10 @@ void AV1Convolve2DTest::RunCheckOutput(convolve_2d_func test_impl) {
// Choose random locations within the source block
int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
av1_convolve_2d_c(input + offset_r * w + offset_c, w, output,
av1_convolve_2d_c(input + offset_r * w + offset_c, w, NULL,
MAX_SB_SIZE, out_w, out_h, &filter_params_x,
&filter_params_y, subx, suby, &conv_params1);
test_impl(input + offset_r * w + offset_c, w, output2, MAX_SB_SIZE,
test_impl(input + offset_r * w + offset_c, w, NULL, MAX_SB_SIZE,
out_w, out_h, &filter_params_x, &filter_params_y, subx,
suby, &conv_params2);
......@@ -155,10 +155,10 @@ void AV1Convolve2DTest::RunCheckOutput2(convolve_2d_func test_impl) {
// Choose random locations within the source block
int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
av1_jnt_convolve_2d_c(input + offset_r * w + offset_c, w, output,
av1_jnt_convolve_2d_c(input + offset_r * w + offset_c, w, NULL,
MAX_SB_SIZE, out_w, out_h, &filter_params_x,
&filter_params_y, subx, suby, &conv_params1);
test_impl(input + offset_r * w + offset_c, w, output2, MAX_SB_SIZE,
test_impl(input + offset_r * w + offset_c, w, NULL, MAX_SB_SIZE,
out_w, out_h, &filter_params_x, &filter_params_y, subx,
suby, &conv_params2);
......@@ -196,13 +196,13 @@ void AV1Convolve2DTest::RunCheckOutput2(convolve_2d_func test_impl) {
// Choose random locations within the source block
int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7);
int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7);
av1_jnt_convolve_2d_c(input + offset_r * w + offset_c, w,
output, MAX_SB_SIZE, out_w, out_h,
av1_jnt_convolve_2d_c(input + offset_r * w + offset_c, w, NULL,
MAX_SB_SIZE, out_w, out_h,
&filter_params_x, &filter_params_y, subx,
suby, &conv_params1);
test_impl(input + offset_r * w + offset_c, w, output2,
MAX_SB_SIZE, out_w, out_h, &filter_params_x,
&filter_params_y, subx, suby, &conv_params2);
test_impl(input + offset_r * w + offset_c, w, NULL, MAX_SB_SIZE,
out_w, out_h, &filter_params_x, &filter_params_y,
subx, suby, &conv_params2);
for (j = 0; j < out_h; ++j)
for (k = 0; k < out_w; ++k) {
......
......@@ -25,7 +25,7 @@ namespace libaom_test {
namespace AV1Convolve2D {
typedef void (*convolve_2d_func)(const uint8_t *src, int src_stride,
CONV_BUF_TYPE *dst, int dst_stride, int w,
const uint8_t *dst, int dst_stride, int w,
int h, InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment