Commit d78224f7 authored by Yunqing Wang's avatar Yunqing Wang

Add convolve 1d functions

Added convolve 1d functions for 1d-only sub-pixel interpolation, including
av1_convolve_x() function for "subpel_x_q4 == 0" case and av1_convolve_y()
function for "subpel_y_q4 == 0" case. Separated into different cases for
speeding up the codec(especially the decoder). The optimized version will
be added later.

Change-Id: I08fd317d02c786af68270a971bb3b17c94bd52ca
parent 43e30f49
......@@ -573,6 +573,10 @@ if (aom_config("CONFIG_CONVOLVE_ROUND") eq "yes") {
if (aom_config("CONFIG_COMPOUND_ROUND") ne "yes") {
add_proto qw/void av1_convolve_2d_copy/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
specialize qw/av1_convolve_2d_copy sse2/;
add_proto qw/void av1_convolve_x/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
specialize qw/av1_convolve_x c/;
add_proto qw/void av1_convolve_y/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_q4, const int subpel_y_q4, ConvolveParams *conv_params";
specialize qw/av1_convolve_y c/;
}
add_proto qw/void av1_convolve_2d_scale/, "const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, int dst_stride, int w, int h, InterpFilterParams *filter_params_x, InterpFilterParams *filter_params_y, const int subpel_x_qn, const int x_step_qn, const int subpel_y_q4, const int y_step_qn, ConvolveParams *conv_params";
......
......@@ -623,6 +623,66 @@ void av1_convolve_2d_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst,
}
}
void av1_convolve_x_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst,
int dst_stride, int w, int h,
InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params) {
int x, y, k;
const int fo_vert = filter_params_y->taps / 2 - 1;
const int bits = FILTER_BITS - conv_params->round_0 - conv_params->round_1;
(void)filter_params_x;
(void)subpel_x_q4;
// vertical filter
const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_y, subpel_y_q4 & SUBPEL_MASK);
for (y = 0; y < h; ++y) {
for (x = 0; x < w; ++x) {
CONV_BUF_TYPE res = 0;
for (k = 0; k < filter_params_y->taps; ++k) {
res += y_filter[k] * src[(y - fo_vert + k) * src_stride + x];
}
res *= (1 << bits);
if (conv_params->do_average)
dst[y * dst_stride + x] += res;
else
dst[y * dst_stride + x] = res;
}
}
}
void av1_convolve_y_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst,
int dst_stride, int w, int h,
InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params) {
int x, y, k;
const int fo_horiz = filter_params_x->taps / 2 - 1;
const int bits = FILTER_BITS - conv_params->round_1;
(void)filter_params_y;
(void)subpel_y_q4;
// horizontal filter
const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_x, subpel_x_q4 & SUBPEL_MASK);
for (y = 0; y < h; ++y) {
for (x = 0; x < w; ++x) {
CONV_BUF_TYPE res = 0;
for (k = 0; k < filter_params_x->taps; ++k) {
res += x_filter[k] * src[y * src_stride + x - fo_horiz + k];
}
res = (1 << bits) * ROUND_POWER_OF_TWO(res, conv_params->round_0);
if (conv_params->do_average)
dst[y * dst_stride + x] += res;
else
dst[y * dst_stride + x] = res;
}
}
}
void av1_convolve_2d_copy_c(const uint8_t *src, int src_stride,
CONV_BUF_TYPE *dst, int dst_stride, int w, int h,
InterpFilterParams *filter_params_x,
......@@ -882,17 +942,21 @@ void av1_convolve_2d_facade(const uint8_t *src, int src_stride, uint8_t *dst,
conv_params->dst_stride, w, h, &filter_params_x,
&filter_params_y, subpel_x_q4, subpel_y_q4, conv_params);
#else
// Special case convolve functions should produce the same result as
// av1_convolve_2d.
if (subpel_x_q4 == 0 && subpel_y_q4 == 0) {
av1_convolve_2d_copy(src, src_stride, conv_params->dst,
conv_params->dst_stride, w, h, &filter_params_x,
&filter_params_y, subpel_x_q4, subpel_y_q4,
conv_params);
} else if (subpel_x_q4 == 0 || subpel_y_q4 == 0) {
// place holder
av1_convolve_2d(src, src_stride, conv_params->dst,
conv_params->dst_stride, w, h, &filter_params_x,
&filter_params_y, subpel_x_q4, subpel_y_q4,
conv_params);
} else if (subpel_x_q4 == 0) {
av1_convolve_x(src, src_stride, conv_params->dst,
conv_params->dst_stride, w, h, &filter_params_x,
&filter_params_y, subpel_x_q4, subpel_y_q4, conv_params);
} else if (subpel_y_q4 == 0) {
av1_convolve_y(src, src_stride, conv_params->dst,
conv_params->dst_stride, w, h, &filter_params_x,
&filter_params_y, subpel_x_q4, subpel_y_q4, conv_params);
} else {
// subpel_x_q4 != 0 && subpel_y_q4 != 0
av1_convolve_2d(src, src_stride, conv_params->dst,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment