Commit 63dac2e6 authored by Sebastien Alaiwan's avatar Sebastien Alaiwan
Browse files

convolve.c: reduce iterator scopes

Change-Id: Ia921d259fe3462cfe8655e48978595fd9db0c0f9
parent fb97241b
...@@ -34,18 +34,17 @@ void av1_convolve_horiz_rs_c(const uint8_t *src, int src_stride, uint8_t *dst, ...@@ -34,18 +34,17 @@ void av1_convolve_horiz_rs_c(const uint8_t *src, int src_stride, uint8_t *dst,
int dst_stride, int w, int h, int dst_stride, int w, int h,
const int16_t *x_filters, int interp_taps, const int16_t *x_filters, int interp_taps,
const int x0_qn, const int x_step_qn) { const int x0_qn, const int x_step_qn) {
int x, y;
src -= interp_taps / 2 - 1; src -= interp_taps / 2 - 1;
for (y = 0; y < h; ++y) { for (int y = 0; y < h; ++y) {
int x_qn = x0_qn; int x_qn = x0_qn;
for (x = 0; x < w; ++x) { for (int x = 0; x < w; ++x) {
const uint8_t *const src_x = &src[x_qn >> RS_SCALE_SUBPEL_BITS]; const uint8_t *const src_x = &src[x_qn >> RS_SCALE_SUBPEL_BITS];
const int x_filter_idx = const int x_filter_idx =
(x_qn & RS_SCALE_SUBPEL_MASK) >> RS_SCALE_EXTRA_BITS; (x_qn & RS_SCALE_SUBPEL_MASK) >> RS_SCALE_EXTRA_BITS;
assert(x_filter_idx <= RS_SUBPEL_MASK); assert(x_filter_idx <= RS_SUBPEL_MASK);
const int16_t *const x_filter = &x_filters[x_filter_idx * interp_taps]; const int16_t *const x_filter = &x_filters[x_filter_idx * interp_taps];
int k, sum = 0; int sum = 0;
for (k = 0; k < interp_taps; ++k) sum += src_x[k] * x_filter[k]; for (int k = 0; k < interp_taps; ++k) sum += src_x[k] * x_filter[k];
dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
x_qn += x_step_qn; x_qn += x_step_qn;
} }
...@@ -59,18 +58,17 @@ void av1_highbd_convolve_horiz_rs_c(const uint16_t *src, int src_stride, ...@@ -59,18 +58,17 @@ void av1_highbd_convolve_horiz_rs_c(const uint16_t *src, int src_stride,
uint16_t *dst, int dst_stride, int w, int h, uint16_t *dst, int dst_stride, int w, int h,
const int16_t *x_filters, int interp_taps, const int16_t *x_filters, int interp_taps,
int x0_qn, int x_step_qn, int bd) { int x0_qn, int x_step_qn, int bd) {
int x, y;
src -= interp_taps / 2 - 1; src -= interp_taps / 2 - 1;
for (y = 0; y < h; ++y) { for (int y = 0; y < h; ++y) {
int x_qn = x0_qn; int x_qn = x0_qn;
for (x = 0; x < w; ++x) { for (int x = 0; x < w; ++x) {
const uint16_t *const src_x = &src[x_qn >> RS_SCALE_SUBPEL_BITS]; const uint16_t *const src_x = &src[x_qn >> RS_SCALE_SUBPEL_BITS];
const int x_filter_idx = const int x_filter_idx =
(x_qn & RS_SCALE_SUBPEL_MASK) >> RS_SCALE_EXTRA_BITS; (x_qn & RS_SCALE_SUBPEL_MASK) >> RS_SCALE_EXTRA_BITS;
assert(x_filter_idx <= RS_SUBPEL_MASK); assert(x_filter_idx <= RS_SUBPEL_MASK);
const int16_t *const x_filter = &x_filters[x_filter_idx * interp_taps]; const int16_t *const x_filter = &x_filters[x_filter_idx * interp_taps];
int k, sum = 0; int sum = 0;
for (k = 0; k < interp_taps; ++k) sum += src_x[k] * x_filter[k]; for (int k = 0; k < interp_taps; ++k) sum += src_x[k] * x_filter[k];
dst[x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd); dst[x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd);
x_qn += x_step_qn; x_qn += x_step_qn;
} }
...@@ -86,18 +84,17 @@ void av1_convolve_horiz_c(const uint8_t *src, int src_stride, uint8_t *dst, ...@@ -86,18 +84,17 @@ void av1_convolve_horiz_c(const uint8_t *src, int src_stride, uint8_t *dst,
const InterpFilterParams filter_params, const InterpFilterParams filter_params,
const int subpel_x_q4, int x_step_q4, const int subpel_x_q4, int x_step_q4,
ConvolveParams *conv_params) { ConvolveParams *conv_params) {
int x, y;
int filter_size = filter_params.taps; int filter_size = filter_params.taps;
assert(conv_params->round == CONVOLVE_OPT_ROUND); assert(conv_params->round == CONVOLVE_OPT_ROUND);
src -= filter_size / 2 - 1; src -= filter_size / 2 - 1;
for (y = 0; y < h; ++y) { for (int y = 0; y < h; ++y) {
int x_q4 = subpel_x_q4; int x_q4 = subpel_x_q4;
for (x = 0; x < w; ++x) { for (int x = 0; x < w; ++x) {
const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS];
const int16_t *x_filter = av1_get_interp_filter_subpel_kernel( const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
filter_params, x_q4 & SUBPEL_MASK); filter_params, x_q4 & SUBPEL_MASK);
int k, sum = 0; int sum = 0;
for (k = 0; k < filter_size; ++k) sum += src_x[k] * x_filter[k]; for (int k = 0; k < filter_size; ++k) sum += src_x[k] * x_filter[k];
sum = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); sum = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
if (conv_params->do_average) if (conv_params->do_average)
...@@ -117,20 +114,19 @@ void av1_convolve_horiz_scale(const uint8_t *src, int src_stride, uint8_t *dst, ...@@ -117,20 +114,19 @@ void av1_convolve_horiz_scale(const uint8_t *src, int src_stride, uint8_t *dst,
const InterpFilterParams filter_params, const InterpFilterParams filter_params,
const int subpel_x_qn, int x_step_qn, const int subpel_x_qn, int x_step_qn,
ConvolveParams *conv_params) { ConvolveParams *conv_params) {
int x, y;
int filter_size = filter_params.taps; int filter_size = filter_params.taps;
assert(conv_params->round == CONVOLVE_OPT_ROUND); assert(conv_params->round == CONVOLVE_OPT_ROUND);
src -= filter_size / 2 - 1; src -= filter_size / 2 - 1;
for (y = 0; y < h; ++y) { for (int y = 0; y < h; ++y) {
int x_qn = subpel_x_qn; int x_qn = subpel_x_qn;
for (x = 0; x < w; ++x) { for (int x = 0; x < w; ++x) {
const uint8_t *const src_x = &src[x_qn >> SCALE_SUBPEL_BITS]; const uint8_t *const src_x = &src[x_qn >> SCALE_SUBPEL_BITS];
const int x_filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS; const int x_filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
assert(x_filter_idx < SUBPEL_SHIFTS); assert(x_filter_idx < SUBPEL_SHIFTS);
const int16_t *x_filter = const int16_t *x_filter =
av1_get_interp_filter_subpel_kernel(filter_params, x_filter_idx); av1_get_interp_filter_subpel_kernel(filter_params, x_filter_idx);
int k, sum = 0; int sum = 0;
for (k = 0; k < filter_size; ++k) sum += src_x[k] * x_filter[k]; for (int k = 0; k < filter_size; ++k) sum += src_x[k] * x_filter[k];
sum = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); sum = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
if (conv_params->do_average) if (conv_params->do_average)
...@@ -150,18 +146,17 @@ void av1_convolve_vert_c(const uint8_t *src, int src_stride, uint8_t *dst, ...@@ -150,18 +146,17 @@ void av1_convolve_vert_c(const uint8_t *src, int src_stride, uint8_t *dst,
const InterpFilterParams filter_params, const InterpFilterParams filter_params,
const int subpel_y_q4, int y_step_q4, const int subpel_y_q4, int y_step_q4,
ConvolveParams *conv_params) { ConvolveParams *conv_params) {
int x, y;
int filter_size = filter_params.taps; int filter_size = filter_params.taps;
assert(conv_params->round == CONVOLVE_OPT_ROUND); assert(conv_params->round == CONVOLVE_OPT_ROUND);
src -= src_stride * (filter_size / 2 - 1); src -= src_stride * (filter_size / 2 - 1);
for (x = 0; x < w; ++x) { for (int x = 0; x < w; ++x) {
int y_q4 = subpel_y_q4; int y_q4 = subpel_y_q4;
for (y = 0; y < h; ++y) { for (int y = 0; y < h; ++y) {
const uint8_t *const src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; const uint8_t *const src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
const int16_t *y_filter = av1_get_interp_filter_subpel_kernel( const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
filter_params, y_q4 & SUBPEL_MASK); filter_params, y_q4 & SUBPEL_MASK);
int k, sum = 0; int sum = 0;
for (k = 0; k < filter_size; ++k) for (int k = 0; k < filter_size; ++k)
sum += src_y[k * src_stride] * y_filter[k]; sum += src_y[k * src_stride] * y_filter[k];
sum = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); sum = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
...@@ -182,21 +177,20 @@ static void av1_convolve_vert_scale(const uint8_t *src, int src_stride, ...@@ -182,21 +177,20 @@ static void av1_convolve_vert_scale(const uint8_t *src, int src_stride,
const InterpFilterParams filter_params, const InterpFilterParams filter_params,
const int subpel_y_qn, int y_step_qn, const int subpel_y_qn, int y_step_qn,
ConvolveParams *conv_params) { ConvolveParams *conv_params) {
int x, y;
int filter_size = filter_params.taps; int filter_size = filter_params.taps;
assert(conv_params->round == CONVOLVE_OPT_ROUND); assert(conv_params->round == CONVOLVE_OPT_ROUND);
src -= src_stride * (filter_size / 2 - 1); src -= src_stride * (filter_size / 2 - 1);
for (x = 0; x < w; ++x) { for (int x = 0; x < w; ++x) {
int y_qn = subpel_y_qn; int y_qn = subpel_y_qn;
for (y = 0; y < h; ++y) { for (int y = 0; y < h; ++y) {
const uint8_t *const src_y = const uint8_t *const src_y =
&src[(y_qn >> SCALE_SUBPEL_BITS) * src_stride]; &src[(y_qn >> SCALE_SUBPEL_BITS) * src_stride];
const int y_filter_idx = (y_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS; const int y_filter_idx = (y_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
assert(y_filter_idx < SUBPEL_SHIFTS); assert(y_filter_idx < SUBPEL_SHIFTS);
const int16_t *y_filter = const int16_t *y_filter =
av1_get_interp_filter_subpel_kernel(filter_params, y_filter_idx); av1_get_interp_filter_subpel_kernel(filter_params, y_filter_idx);
int k, sum = 0; int sum = 0;
for (k = 0; k < filter_size; ++k) for (int k = 0; k < filter_size; ++k)
sum += src_y[k * src_stride] * y_filter[k]; sum += src_y[k * src_stride] * y_filter[k];
sum = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); sum = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS));
...@@ -217,16 +211,14 @@ static void convolve_copy(const uint8_t *src, int src_stride, uint8_t *dst, ...@@ -217,16 +211,14 @@ static void convolve_copy(const uint8_t *src, int src_stride, uint8_t *dst,
ConvolveParams *conv_params) { ConvolveParams *conv_params) {
assert(conv_params->round == CONVOLVE_OPT_ROUND); assert(conv_params->round == CONVOLVE_OPT_ROUND);
if (conv_params->do_average == 0) { if (conv_params->do_average == 0) {
int r; for (int r = 0; r < h; ++r) {
for (r = 0; r < h; ++r) {
memcpy(dst, src, w); memcpy(dst, src, w);
src += src_stride; src += src_stride;
dst += dst_stride; dst += dst_stride;
} }
} else { } else {
int r, c; for (int r = 0; r < h; ++r) {
for (r = 0; r < h; ++r) { for (int c = 0; c < w; ++c) {
for (c = 0; c < w; ++c) {
dst[c] = clip_pixel(ROUND_POWER_OF_TWO(dst[c] + src[c], 1)); dst[c] = clip_pixel(ROUND_POWER_OF_TWO(dst[c] + src[c], 1));
} }
src += src_stride; src += src_stride;
...@@ -371,9 +363,8 @@ void av1_convolve_vert_facade_scale(const uint8_t *src, int src_stride, ...@@ -371,9 +363,8 @@ void av1_convolve_vert_facade_scale(const uint8_t *src, int src_stride,
#if CONFIG_CONVOLVE_ROUND #if CONFIG_CONVOLVE_ROUND
void av1_convolve_rounding_c(const int32_t *src, int src_stride, uint8_t *dst, void av1_convolve_rounding_c(const int32_t *src, int src_stride, uint8_t *dst,
int dst_stride, int w, int h, int bits) { int dst_stride, int w, int h, int bits) {
int r, c; for (int r = 0; r < h; ++r) {
for (r = 0; r < h; ++r) { for (int c = 0; c < w; ++c) {
for (c = 0; c < w; ++c) {
dst[r * dst_stride + c] = dst[r * dst_stride + c] =
clip_pixel(ROUND_POWER_OF_TWO(src[r * src_stride + c], bits)); clip_pixel(ROUND_POWER_OF_TWO(src[r * src_stride + c], bits));
} }
...@@ -387,7 +378,6 @@ void av1_convolve_2d_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, ...@@ -387,7 +378,6 @@ void av1_convolve_2d_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst,
InterpFilterParams *filter_params_y, InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4, const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params) { ConvolveParams *conv_params) {
int x, y, k;
uint8_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE]; uint8_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
int im_h = h + filter_params_y->taps - 1; int im_h = h + filter_params_y->taps - 1;
int im_stride = w; int im_stride = w;
...@@ -398,10 +388,10 @@ void av1_convolve_2d_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, ...@@ -398,10 +388,10 @@ void av1_convolve_2d_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst,
const uint8_t *src_horiz = src - fo_vert * src_stride; const uint8_t *src_horiz = src - fo_vert * src_stride;
const int16_t *x_filter = av1_get_interp_filter_subpel_kernel( const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_x, subpel_x_q4 & SUBPEL_MASK); *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
for (y = 0; y < im_h; ++y) { for (int y = 0; y < im_h; ++y) {
for (x = 0; x < w; ++x) { for (int x = 0; x < w; ++x) {
int32_t sum = 0; int32_t sum = 0;
for (k = 0; k < filter_params_x->taps; ++k) { for (int k = 0; k < filter_params_x->taps; ++k) {
sum += x_filter[k] * src_horiz[y * src_stride + x - fo_horiz + k]; sum += x_filter[k] * src_horiz[y * src_stride + x - fo_horiz + k];
} }
im_block[y * im_stride + x] = im_block[y * im_stride + x] =
...@@ -413,10 +403,10 @@ void av1_convolve_2d_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, ...@@ -413,10 +403,10 @@ void av1_convolve_2d_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst,
uint8_t *src_vert = im_block + fo_vert * im_stride; uint8_t *src_vert = im_block + fo_vert * im_stride;
const int16_t *y_filter = av1_get_interp_filter_subpel_kernel( const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_y, subpel_y_q4 & SUBPEL_MASK); *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
for (y = 0; y < h; ++y) { for (int y = 0; y < h; ++y) {
for (x = 0; x < w; ++x) { for (int x = 0; x < w; ++x) {
CONV_BUF_TYPE sum = 0; CONV_BUF_TYPE sum = 0;
for (k = 0; k < filter_params_y->taps; ++k) { for (int k = 0; k < filter_params_y->taps; ++k) {
sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x]; sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x];
} }
CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1); CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1);
...@@ -435,7 +425,6 @@ void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, ...@@ -435,7 +425,6 @@ void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride,
InterpFilterParams *filter_params_y, InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4, const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params) { ConvolveParams *conv_params) {
int x, y, k;
uint8_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE]; uint8_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
int im_h = h + filter_params_y->taps - 1; int im_h = h + filter_params_y->taps - 1;
int im_stride = w; int im_stride = w;
...@@ -446,10 +435,10 @@ void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, ...@@ -446,10 +435,10 @@ void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride,
const uint8_t *src_horiz = src - fo_vert * src_stride; const uint8_t *src_horiz = src - fo_vert * src_stride;
const int16_t *x_filter = av1_get_interp_filter_subpel_kernel( const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_x, subpel_x_q4 & SUBPEL_MASK); *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
for (y = 0; y < im_h; ++y) { for (int y = 0; y < im_h; ++y) {
for (x = 0; x < w; ++x) { for (int x = 0; x < w; ++x) {
int32_t sum = 0; int32_t sum = 0;
for (k = 0; k < filter_params_x->taps; ++k) { for (int k = 0; k < filter_params_x->taps; ++k) {
sum += x_filter[k] * src_horiz[y * src_stride + x - fo_horiz + k]; sum += x_filter[k] * src_horiz[y * src_stride + x - fo_horiz + k];
} }
im_block[y * im_stride + x] = im_block[y * im_stride + x] =
...@@ -461,10 +450,10 @@ void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, ...@@ -461,10 +450,10 @@ void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride,
uint8_t *src_vert = im_block + fo_vert * im_stride; uint8_t *src_vert = im_block + fo_vert * im_stride;
const int16_t *y_filter = av1_get_interp_filter_subpel_kernel( const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_y, subpel_y_q4 & SUBPEL_MASK); *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
for (y = 0; y < h; ++y) { for (int y = 0; y < h; ++y) {
for (x = 0; x < w; ++x) { for (int x = 0; x < w; ++x) {
CONV_BUF_TYPE sum = 0; CONV_BUF_TYPE sum = 0;
for (k = 0; k < filter_params_y->taps; ++k) { for (int k = 0; k < filter_params_y->taps; ++k) {
sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x]; sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x];
} }
CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1); CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1);
...@@ -495,7 +484,6 @@ void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, ...@@ -495,7 +484,6 @@ void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride,
const int subpel_x_qn, const int x_step_qn, const int subpel_x_qn, const int x_step_qn,
const int subpel_y_qn, const int y_step_qn, const int subpel_y_qn, const int y_step_qn,
ConvolveParams *conv_params) { ConvolveParams *conv_params) {
int x, y, k;
uint8_t im_block[(2 * MAX_SB_SIZE + MAX_FILTER_TAP) * MAX_SB_SIZE]; uint8_t im_block[(2 * MAX_SB_SIZE + MAX_FILTER_TAP) * MAX_SB_SIZE];
int im_h = (((h - 1) * y_step_qn + subpel_y_qn) >> SCALE_SUBPEL_BITS) + int im_h = (((h - 1) * y_step_qn + subpel_y_qn) >> SCALE_SUBPEL_BITS) +
filter_params_y->taps; filter_params_y->taps;
...@@ -505,16 +493,16 @@ void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, ...@@ -505,16 +493,16 @@ void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride,
// horizontal filter // horizontal filter
const uint8_t *src_horiz = src - fo_vert * src_stride; const uint8_t *src_horiz = src - fo_vert * src_stride;
for (y = 0; y < im_h; ++y) { for (int y = 0; y < im_h; ++y) {
int x_qn = subpel_x_qn; int x_qn = subpel_x_qn;
for (x = 0; x < w; ++x, x_qn += x_step_qn) { for (int x = 0; x < w; ++x, x_qn += x_step_qn) {
const uint8_t *const src_x = &src_horiz[(x_qn >> SCALE_SUBPEL_BITS)]; const uint8_t *const src_x = &src_horiz[(x_qn >> SCALE_SUBPEL_BITS)];
const int x_filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS; const int x_filter_idx = (x_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
assert(x_filter_idx < SUBPEL_SHIFTS); assert(x_filter_idx < SUBPEL_SHIFTS);
const int16_t *x_filter = const int16_t *x_filter =
av1_get_interp_filter_subpel_kernel(*filter_params_x, x_filter_idx); av1_get_interp_filter_subpel_kernel(*filter_params_x, x_filter_idx);
int sum = 0; int sum = 0;
for (k = 0; k < filter_params_x->taps; ++k) for (int k = 0; k < filter_params_x->taps; ++k)
sum += x_filter[k] * src_x[k - fo_horiz]; sum += x_filter[k] * src_x[k - fo_horiz];
im_block[y * im_stride + x] = im_block[y * im_stride + x] =
clip_pixel(ROUND_POWER_OF_TWO(sum, conv_params->round_0)); clip_pixel(ROUND_POWER_OF_TWO(sum, conv_params->round_0));
...@@ -524,9 +512,9 @@ void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, ...@@ -524,9 +512,9 @@ void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride,
// vertical filter // vertical filter
const uint8_t *src_vert = im_block + fo_vert * im_stride; const uint8_t *src_vert = im_block + fo_vert * im_stride;
for (x = 0; x < w; ++x) { for (int x = 0; x < w; ++x) {
int y_qn = subpel_y_qn; int y_qn = subpel_y_qn;
for (y = 0; y < h; ++y, y_qn += y_step_qn) { for (int y = 0; y < h; ++y, y_qn += y_step_qn) {
const uint8_t *const src_y = const uint8_t *const src_y =
&src_vert[(y_qn >> SCALE_SUBPEL_BITS) * im_stride]; &src_vert[(y_qn >> SCALE_SUBPEL_BITS) * im_stride];
const int y_filter_idx = (y_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS; const int y_filter_idx = (y_qn & SCALE_SUBPEL_MASK) >> SCALE_EXTRA_BITS;
...@@ -534,7 +522,7 @@ void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride, ...@@ -534,7 +522,7 @@ void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride,
const int16_t *y_filter = const int16_t *y_filter =
av1_get_interp_filter_subpel_kernel(*filter_params_y, y_filter_idx); av1_get_interp_filter_subpel_kernel(*filter_params_y, y_filter_idx);
CONV_BUF_TYPE sum = 0; CONV_BUF_TYPE sum = 0;
for (k = 0; k < filter_params_y->taps; ++k) { for (int k = 0; k < filter_params_y->taps; ++k) {
sum += y_filter[k] * src_y[(k - fo_vert) * im_stride]; sum += y_filter[k] * src_y[(k - fo_vert) * im_stride];
} }
CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1); CONV_BUF_TYPE res = ROUND_POWER_OF_TWO(sum, conv_params->round_1);
...@@ -579,7 +567,6 @@ void av1_convolve_2d_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, ...@@ -579,7 +567,6 @@ void av1_convolve_2d_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst,
InterpFilterParams *filter_params_y, InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4, const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params) { ConvolveParams *conv_params) {
int x, y, k;
int32_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE]; int32_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
int im_h = h + filter_params_y->taps - 1; int im_h = h + filter_params_y->taps - 1;
int im_stride = w; int im_stride = w;
...@@ -591,10 +578,10 @@ void av1_convolve_2d_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, ...@@ -591,10 +578,10 @@ void av1_convolve_2d_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst,
const uint8_t *src_horiz = src - fo_vert * src_stride; const uint8_t *src_horiz = src - fo_vert * src_stride;
const int16_t *x_filter = av1_get_interp_filter_subpel_kernel( const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_x, subpel_x_q4 & SUBPEL_MASK); *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
for (y = 0; y < im_h; ++y) { for (int y = 0; y < im_h; ++y) {
for (x = 0; x < w; ++x) { for (int x = 0; x < w; ++x) {
int32_t sum = (1 << (bd + FILTER_BITS - 1)); int32_t sum = (1 << (bd + FILTER_BITS - 1));
for (k = 0; k < filter_params_x->taps; ++k) { for (int k = 0; k < filter_params_x->taps; ++k) {
sum += x_filter[k] * src_horiz[y * src_stride + x - fo_horiz + k]; sum += x_filter[k] * src_horiz[y * src_stride + x - fo_horiz + k];
} }
assert(0 <= sum && sum < (1 << (bd + FILTER_BITS + 1))); assert(0 <= sum && sum < (1 << (bd + FILTER_BITS + 1)));
...@@ -608,10 +595,10 @@ void av1_convolve_2d_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, ...@@ -608,10 +595,10 @@ void av1_convolve_2d_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst,
const int16_t *y_filter = av1_get_interp_filter_subpel_kernel( const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_y, subpel_y_q4 & SUBPEL_MASK); *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0; const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0;
for (y = 0; y < h; ++y) { for (int y = 0; y < h; ++y) {
for (x = 0; x < w; ++x) { for (int x = 0; x < w; ++x) {
CONV_BUF_TYPE sum = 1 << offset_bits; CONV_BUF_TYPE sum = 1 << offset_bits;
for (k = 0; k < filter_params_y->taps; ++k) { for (int k = 0; k < filter_params_y->taps; ++k) {
sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x]; sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x];
} }
assert(0 <= sum && sum < (1 << (offset_bits + 2))); assert(0 <= sum && sum < (1 << (offset_bits + 2)));
...@@ -632,7 +619,6 @@ void av1_convolve_x_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, ...@@ -632,7 +619,6 @@ void av1_convolve_x_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst,
InterpFilterParams *filter_params_y, InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4, const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params) { ConvolveParams *conv_params) {
int x, y, k;
const int fo_vert = filter_params_y->taps / 2 - 1; const int fo_vert = filter_params_y->taps / 2 - 1;
const int bits = FILTER_BITS - conv_params->round_0 - conv_params->round_1; const int bits = FILTER_BITS - conv_params->round_0 - conv_params->round_1;
(void)filter_params_x; (void)filter_params_x;
...@@ -641,10 +627,10 @@ void av1_convolve_x_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, ...@@ -641,10 +627,10 @@ void av1_convolve_x_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst,
// vertical filter // vertical filter
const int16_t *y_filter = av1_get_interp_filter_subpel_kernel( const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_y, subpel_y_q4 & SUBPEL_MASK); *filter_params_y, subpel_y_q4 & SUBPEL_MASK);
for (y = 0; y < h; ++y) { for (int y = 0; y < h; ++y) {
for (x = 0; x < w; ++x) { for (int x = 0; x < w; ++x) {
CONV_BUF_TYPE res = 0; CONV_BUF_TYPE res = 0;
for (k = 0; k < filter_params_y->taps; ++k) { for (int k = 0; k < filter_params_y->taps; ++k) {
res += y_filter[k] * src[(y - fo_vert + k) * src_stride + x]; res += y_filter[k] * src[(y - fo_vert + k) * src_stride + x];
} }
res *= (1 << bits); res *= (1 << bits);
...@@ -662,7 +648,6 @@ void av1_convolve_y_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, ...@@ -662,7 +648,6 @@ void av1_convolve_y_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst,
InterpFilterParams *filter_params_y, InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4, const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params) { ConvolveParams *conv_params) {
int x, y, k;
const int fo_horiz = filter_params_x->taps / 2 - 1; const int fo_horiz = filter_params_x->taps / 2 - 1;
const int bits = FILTER_BITS - conv_params->round_1; const int bits = FILTER_BITS - conv_params->round_1;
(void)filter_params_y; (void)filter_params_y;
...@@ -671,10 +656,10 @@ void av1_convolve_y_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst, ...@@ -671,10 +656,10 @@ void av1_convolve_y_c(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst,
// horizontal filter // horizontal filter
const int16_t *x_filter = av1_get_interp_filter_subpel_kernel( const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_x, subpel_x_q4 & SUBPEL_MASK); *filter_params_x, subpel_x_q4 & SUBPEL_MASK);
for (y = 0; y < h; ++y) { for (int y = 0; y < h; ++y) {
for (x = 0; x < w; ++x) { for (int x = 0; x < w; ++x) {
CONV_BUF_TYPE res = 0; CONV_BUF_TYPE res = 0;
for (k = 0; k < filter_params_x->taps; ++k) { for (int k = 0; k < filter_params_x->taps; ++k) {
res += x_filter[k] * src[y * src_stride + x - fo_horiz + k]; res += x_filter[k] * src[y * src_stride + x - fo_horiz + k];
} }
res = (1 << bits) * ROUND_POWER_OF_TWO(res, conv_params->round_0); res = (1 << bits) * ROUND_POWER_OF_TWO(res, conv_params->round_0);
...@@ -694,14 +679,14 @@ void av1_convolve_2d_copy_c(const uint8_t *src, int src_stride, ...@@ -694,14 +679,14 @@ void av1_convolve_2d_copy_c(const uint8_t *src, int src_stride,
ConvolveParams *conv_params) { ConvolveParams *conv_params) {
const int bits = const int bits =
FILTER_BITS * 2 - conv_params->round_1 - conv_params->round_0; FILTER_BITS * 2 - conv_params->round_1 - conv_params->round_0;
int x, y;
(void)filter_params_x; (void)filter_params_x;
(void)filter_params_y; (void)filter_params_y;
(void)subpel_x_q4; (void)subpel_x_q4;
(void)subpel_y_q4; (void)subpel_y_q4;
for (y = 0; y < h; ++y) { for (int y = 0; y < h; ++y) {
for (x = 0; x < w; ++x) { for (int x = 0; x < w; ++x) {
CONV_BUF_TYPE res = (1 << bits) * src[y * src_stride + x]; CONV_BUF_TYPE res = (1 << bits) * src[y * src_stride + x];
if (conv_params->do_average) if (conv_params->do_average)
dst[y * dst_stride + x] += res; dst[y * dst_stride + x] += res;
...@@ -718,7 +703,6 @@ void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, ...@@ -718,7 +703,6 @@ void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride,
InterpFilterParams *filter_params_y, InterpFilterParams *filter_params_y,
const int subpel_x_q4, const int subpel_y_q4, const int subpel_x_q4, const int subpel_y_q4,
ConvolveParams *conv_params) { ConvolveParams *conv_params) {
int x, y, k;
int32_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE]; int32_t im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
int im_h = h + filter_params_y->taps - 1; int im_h = h + filter_params_y->taps - 1;
int im_stride = w; int im_stride = w;
...@@ -730,10 +714,10 @@ void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride, ...@@ -730,10 +714,10 @@ void av1_jnt_convolve_2d_c(const uint8_t *src, int src_stride,
const uint8_t *src_horiz = src - fo_vert * src_stride; const uint8_t *src_horiz = src - fo_vert * src_stride;
const int16_t *x_filter = av1_get_interp_filter_subpel_kernel( const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(