Commit 27e90295 authored by Rupert Swarbrick's avatar Rupert Swarbrick
Browse files

Pack InterpFilters into a single integer

Before this patch, if CONFIG_DUAL_FILTER was true then an MB_MODE_INFO
stored its filter choices as an array of four numbers, each of which
was between 0 and 10. It also seems that elements 2 and 3 of the array
were always the same as elements 0 and 1 when used.

This patch defines a new type(def) called InterpFilters together with
constructor and extractor functions. When CONFIG_DUAL_FILTER is zero,
InterpFilters is a synonym for InterpFilter and the constructor and
extractor functions should compile away to nothing. When it is
nonzero, InterpFilters is a uint32_t which stores the x filter in the
high part and the y filter in the low part (this looks strange, but
matches the old numbering).

Making this change allows us to get rid of lots of special case code
that was dependent on CONFIG_DUAL_FILTER. The uniform
extract/make/broadcast interface also actually shortens code in
general.

Change-Id: I6b24a61bac3e4b220d8d46d0b27cfe865dcfba81
parent 26ad0b22
......@@ -378,12 +378,8 @@ typedef struct MB_MODE_INFO {
uint8_t use_intrabc;
#endif // CONFIG_INTRABC
// Only for INTER blocks
#if CONFIG_DUAL_FILTER
InterpFilter interp_filter[4];
#else
InterpFilter interp_filter;
#endif
// Only for INTER blocks
InterpFilters interp_filters;
MV_REFERENCE_FRAME ref_frame[2];
TX_TYPE tx_type;
#if CONFIG_TXK_SEL
......
......@@ -557,34 +557,17 @@ void av1_convolve_2d_scale_c(const uint8_t *src, int src_stride,
void av1_convolve_2d_facade(const uint8_t *src, int src_stride, uint8_t *dst,
int dst_stride, int w, int h,
const InterpFilter *interp_filter,
const int subpel_x_q4, int x_step_q4,
const int subpel_y_q4, int y_step_q4, int scaled,
ConvolveParams *conv_params) {
InterpFilters interp_filters, const int subpel_x_q4,
int x_step_q4, const int subpel_y_q4, int y_step_q4,
int scaled, ConvolveParams *conv_params) {
(void)x_step_q4;
(void)y_step_q4;
(void)dst;
(void)dst_stride;
#if CONFIG_DUAL_FILTER
InterpFilterParams filter_params_x =
av1_get_interp_filter_params(interp_filter[1 + 2 * conv_params->ref]);
InterpFilterParams filter_params_y =
av1_get_interp_filter_params(interp_filter[0 + 2 * conv_params->ref]);
#if USE_EXTRA_FILTER
if (filter_params_x.interp_filter == MULTITAP_SHARP &&
filter_params_y.interp_filter == MULTITAP_SHARP) {
// Avoid two directions both using 12-tap filter.
// This will reduce hardware implementation cost.
filter_params_y = av1_get_interp_filter_params(EIGHTTAP_SHARP);
}
#endif // USE_EXTRA_FILTER
#else
InterpFilterParams filter_params_x =
av1_get_interp_filter_params(*interp_filter);
InterpFilterParams filter_params_y =
av1_get_interp_filter_params(*interp_filter);
#endif
InterpFilterParams filter_params_x, filter_params_y;
av1_get_convolve_filter_params(interp_filters, 1, &filter_params_x,
&filter_params_y);
if (filter_params_y.taps < filter_params_x.taps) {
uint8_t tr_src[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) *
......@@ -871,7 +854,7 @@ void av1_highbd_convolve_2d_scale_c(const uint16_t *src, int src_stride,
void av1_highbd_convolve_2d_facade(const uint8_t *src8, int src_stride,
uint8_t *dst, int dst_stride, int w, int h,
const InterpFilter *interp_filter,
InterpFilters interp_filters,
const int subpel_x_q4, int x_step_q4,
const int subpel_y_q4, int y_step_q4,
int scaled, ConvolveParams *conv_params,
......@@ -880,26 +863,11 @@ void av1_highbd_convolve_2d_facade(const uint8_t *src8, int src_stride,
(void)y_step_q4;
(void)dst;
(void)dst_stride;
#if CONFIG_DUAL_FILTER
InterpFilterParams filter_params_x =
av1_get_interp_filter_params(interp_filter[1 + 2 * conv_params->ref]);
InterpFilterParams filter_params_y =
av1_get_interp_filter_params(interp_filter[0 + 2 * conv_params->ref]);
#if USE_EXTRA_FILTER
if (filter_params_x.interp_filter == MULTITAP_SHARP &&
filter_params_y.interp_filter == MULTITAP_SHARP) {
// Avoid two directions both using 12-tap filter.
// This will reduce hardware implementation cost.
filter_params_y = av1_get_interp_filter_params(EIGHTTAP_SHARP);
}
#endif
#else
InterpFilterParams filter_params_x =
av1_get_interp_filter_params(*interp_filter);
InterpFilterParams filter_params_y =
av1_get_interp_filter_params(*interp_filter);
#endif
InterpFilterParams filter_params_x, filter_params_y;
av1_get_convolve_filter_params(interp_filters, 1, &filter_params_x,
&filter_params_y);
const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
if (filter_params_y.taps < filter_params_x.taps) {
uint16_t tr_src[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) *
......@@ -942,66 +910,6 @@ void av1_highbd_convolve_2d_facade(const uint8_t *src8, int src_stride,
conv_params, bd);
}
}
void av1_highbd_convolve_2d_facade_scale(const uint8_t *src8, int src_stride,
uint8_t *dst, int dst_stride, int w,
int h,
const InterpFilter *interp_filter,
const int subpel_x_qn, int x_step_qn,
const int subpel_y_qn, int y_step_qn,
ConvolveParams *conv_params, int bd) {
(void)dst;
(void)dst_stride;
#if CONFIG_DUAL_FILTER
InterpFilterParams filter_params_x =
av1_get_interp_filter_params(interp_filter[1 + 2 * conv_params->ref]);
InterpFilterParams filter_params_y =
av1_get_interp_filter_params(interp_filter[0 + 2 * conv_params->ref]);
#if USE_EXTRA_FILTER
if (filter_params_x.interp_filter == MULTITAP_SHARP &&
filter_params_y.interp_filter == MULTITAP_SHARP) {
// Avoid two directions both using 12-tap filter.
// This will reduce hardware implementation cost.
filter_params_y = av1_get_interp_filter_params(EIGHTTAP_SHARP);
}
#endif
#else
InterpFilterParams filter_params_x =
av1_get_interp_filter_params(*interp_filter);
InterpFilterParams filter_params_y =
av1_get_interp_filter_params(*interp_filter);
#endif
const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
if (filter_params_y.taps < filter_params_x.taps) {
uint16_t tr_src[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) *
(MAX_SB_SIZE + MAX_FILTER_TAP - 1)];
int tr_src_stride = MAX_SB_SIZE + MAX_FILTER_TAP - 1;
CONV_BUF_TYPE tr_dst[MAX_SB_SIZE * MAX_SB_SIZE];
int tr_dst_stride = MAX_SB_SIZE;
int fo_vert = filter_params_y.taps / 2 - 1;
int fo_horiz = filter_params_x.taps / 2 - 1;
transpose_uint16(
tr_src, tr_src_stride, src - fo_vert * src_stride - fo_horiz,
src_stride, w + filter_params_x.taps - 1, h + filter_params_y.taps - 1);
transpose_int32(tr_dst, tr_dst_stride, conv_params->dst,
conv_params->dst_stride, w, h);
// horizontal and vertical parameters are swapped because of the transpose
av1_highbd_convolve_2d_scale(
tr_src + fo_horiz * tr_src_stride + fo_vert, tr_src_stride, tr_dst,
tr_dst_stride, h, w, &filter_params_y, &filter_params_x, subpel_y_qn,
y_step_qn, subpel_x_qn, x_step_qn, conv_params, bd);
transpose_int32(conv_params->dst, conv_params->dst_stride, tr_dst,
tr_dst_stride, h, w);
} else {
av1_highbd_convolve_2d_scale(
src, src_stride, conv_params->dst, conv_params->dst_stride, w, h,
&filter_params_x, &filter_params_y, subpel_x_qn, x_step_qn, subpel_y_qn,
y_step_qn, conv_params, bd);
}
}
#endif // CONFIG_HIGHBITDEPTH
#endif // CONFIG_CONVOLVE_ROUND
......@@ -1014,11 +922,7 @@ typedef void (*ConvolveFunc)(const uint8_t *src, int src_stride, uint8_t *dst,
static void convolve_helper(const uint8_t *src, int src_stride, uint8_t *dst,
int dst_stride, int w, int h,
#if CONFIG_DUAL_FILTER
const InterpFilter *interp_filter,
#else
const InterpFilter interp_filter,
#endif
const InterpFilters interp_filters,
const int subpel_x_q4, int x_step_q4,
const int subpel_y_q4, int y_step_q4,
ConvolveParams *conv_params,
......@@ -1026,16 +930,11 @@ static void convolve_helper(const uint8_t *src, int src_stride, uint8_t *dst,
ConvolveFunc convolve_vert) {
int ignore_horiz = x_step_q4 == SUBPEL_SHIFTS && subpel_x_q4 == 0;
int ignore_vert = y_step_q4 == SUBPEL_SHIFTS && subpel_y_q4 == 0;
#if CONFIG_DUAL_FILTER
InterpFilterParams filter_params_x =
av1_get_interp_filter_params(interp_filter[1 + 2 * conv_params->ref]);
InterpFilterParams filter_params_y =
av1_get_interp_filter_params(interp_filter[0 + 2 * conv_params->ref]);
InterpFilterParams filter_params;
#else
InterpFilterParams filter_params =
av1_get_interp_filter_params(interp_filter);
#endif
InterpFilterParams filter_params_x, filter_params_y;
av1_get_convolve_filter_params(interp_filters, 0, &filter_params_x,
&filter_params_y);
assert(conv_params->round == CONVOLVE_OPT_ROUND);
assert(w <= MAX_BLOCK_WIDTH);
......@@ -1046,18 +945,12 @@ static void convolve_helper(const uint8_t *src, int src_stride, uint8_t *dst,
if (ignore_horiz && ignore_vert) {
convolve_copy(src, src_stride, dst, dst_stride, w, h, conv_params);
} else if (ignore_vert) {
#if CONFIG_DUAL_FILTER
filter_params = filter_params_x;
#endif
assert(filter_params.taps <= MAX_FILTER_TAP);
convolve_horiz(src, src_stride, dst, dst_stride, w, h, filter_params,
assert(filter_params_x.taps <= MAX_FILTER_TAP);
convolve_horiz(src, src_stride, dst, dst_stride, w, h, filter_params_x,
subpel_x_q4, x_step_q4, conv_params);
} else if (ignore_horiz) {
#if CONFIG_DUAL_FILTER
filter_params = filter_params_y;
#endif
assert(filter_params.taps <= MAX_FILTER_TAP);
convolve_vert(src, src_stride, dst, dst_stride, w, h, filter_params,
assert(filter_params_y.taps <= MAX_FILTER_TAP);
convolve_vert(src, src_stride, dst, dst_stride, w, h, filter_params_y,
subpel_y_q4, y_step_q4, conv_params);
} else {
// temp's size is set to a 256 aligned value to facilitate SIMD
......@@ -1068,12 +961,7 @@ static void convolve_helper(const uint8_t *src, int src_stride, uint8_t *dst,
int max_intermediate_size = ((MAX_SB_SIZE * 2 + 16) + 16);
int filter_size;
#if CONFIG_DUAL_FILTER && USE_EXTRA_FILTER
if (interp_filter[0 + 2 * conv_params->ref] == MULTITAP_SHARP &&
interp_filter[1 + 2 * conv_params->ref] == MULTITAP_SHARP) {
// Avoid two directions both using 12-tap filter.
// This will reduce hardware implementation cost.
filter_params_y = av1_get_interp_filter_params(EIGHTTAP_SHARP);
}
av1_convolve_filter_params_fixup_1212(&filter_params_x, &filter_params_y);
// we do filter with fewer taps first to reduce hardware implementation
// complexity
......@@ -1084,22 +972,21 @@ static void convolve_helper(const uint8_t *src, int src_stride, uint8_t *dst,
temp_conv_params.ref = 0;
temp_conv_params.do_average = 0;
temp_conv_params.round = CONVOLVE_OPT_ROUND;
filter_params = filter_params_y;
filter_size = filter_params_x.taps;
intermediate_width =
(((w - 1) * x_step_q4 + subpel_x_q4) >> SUBPEL_BITS) + filter_size;
assert(intermediate_width <= max_intermediate_size);
assert(filter_params.taps <= MAX_FILTER_TAP);
assert(filter_params_y.taps <= MAX_FILTER_TAP);
convolve_vert(src - (filter_size / 2 - 1), src_stride, temp, temp_stride,
intermediate_width, h, filter_params, subpel_y_q4,
intermediate_width, h, filter_params_y, subpel_y_q4,
y_step_q4, &temp_conv_params);
filter_params = filter_params_x;
assert(filter_params.taps <= MAX_FILTER_TAP);
assert(filter_params_x.taps <= MAX_FILTER_TAP);
convolve_horiz(temp + (filter_size / 2 - 1), temp_stride, dst, dst_stride,
w, h, filter_params, subpel_x_q4, x_step_q4, conv_params);
w, h, filter_params_x, subpel_x_q4, x_step_q4,
conv_params);
} else
#endif // CONFIG_DUAL_FILTER && USE_EXTRA_FILTER
{
......@@ -1109,30 +996,22 @@ static void convolve_helper(const uint8_t *src, int src_stride, uint8_t *dst,
temp_conv_params.ref = 0;
temp_conv_params.do_average = 0;
temp_conv_params.round = CONVOLVE_OPT_ROUND;
#if CONFIG_DUAL_FILTER
filter_params = filter_params_x;
filter_size = filter_params_y.taps;
#else
filter_size = filter_params.taps;
#endif
intermediate_height =
(((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;
assert(intermediate_height <= max_intermediate_size);
(void)max_intermediate_size;
assert(filter_params.taps <= MAX_FILTER_TAP);
assert(filter_params_x.taps <= MAX_FILTER_TAP);
convolve_horiz(src - src_stride * (filter_size / 2 - 1), src_stride, temp,
temp_stride, w, intermediate_height, filter_params,
temp_stride, w, intermediate_height, filter_params_x,
subpel_x_q4, x_step_q4, &temp_conv_params);
#if CONFIG_DUAL_FILTER
filter_params = filter_params_y;
#endif
assert(filter_params.taps <= MAX_FILTER_TAP);
assert(filter_params_y.taps <= MAX_FILTER_TAP);
convolve_vert(temp + temp_stride * (filter_size / 2 - 1), temp_stride,
dst, dst_stride, w, h, filter_params, subpel_y_q4,
dst, dst_stride, w, h, filter_params_y, subpel_y_q4,
y_step_q4, conv_params);
}
}
......@@ -1140,11 +1019,7 @@ static void convolve_helper(const uint8_t *src, int src_stride, uint8_t *dst,
static void convolve_scale_helper(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride, int w, int h,
#if CONFIG_DUAL_FILTER
const InterpFilter *interp_filter,
#else
const InterpFilter interp_filter,
#endif
const InterpFilters interp_filters,
const int subpel_x_qn, int x_step_qn,
const int subpel_y_qn, int y_step_qn,
ConvolveParams *conv_params,
......@@ -1152,16 +1027,11 @@ static void convolve_scale_helper(const uint8_t *src, int src_stride,
ConvolveFunc convolve_vert) {
int ignore_horiz = x_step_qn == SCALE_SUBPEL_SHIFTS && subpel_x_qn == 0;
int ignore_vert = y_step_qn == SCALE_SUBPEL_SHIFTS && subpel_y_qn == 0;
#if CONFIG_DUAL_FILTER
InterpFilterParams filter_params_x =
av1_get_interp_filter_params(interp_filter[1 + 2 * conv_params->ref]);
InterpFilterParams filter_params_y =
av1_get_interp_filter_params(interp_filter[0 + 2 * conv_params->ref]);
InterpFilterParams filter_params;
#else
InterpFilterParams filter_params =
av1_get_interp_filter_params(interp_filter);
#endif
InterpFilterParams filter_params_x, filter_params_y;
av1_get_convolve_filter_params(interp_filters, 0, &filter_params_x,
&filter_params_y);
assert(conv_params->round == CONVOLVE_OPT_ROUND);
assert(w <= MAX_BLOCK_WIDTH);
......@@ -1172,18 +1042,12 @@ static void convolve_scale_helper(const uint8_t *src, int src_stride,
if (ignore_horiz && ignore_vert) {
convolve_copy(src, src_stride, dst, dst_stride, w, h, conv_params);
} else if (ignore_vert) {
#if CONFIG_DUAL_FILTER
filter_params = filter_params_x;
#endif
assert(filter_params.taps <= MAX_FILTER_TAP);
convolve_horiz(src, src_stride, dst, dst_stride, w, h, filter_params,
assert(filter_params_x.taps <= MAX_FILTER_TAP);
convolve_horiz(src, src_stride, dst, dst_stride, w, h, filter_params_x,
subpel_x_qn, x_step_qn, conv_params);
} else if (ignore_horiz) {
#if CONFIG_DUAL_FILTER
filter_params = filter_params_y;
#endif
assert(filter_params.taps <= MAX_FILTER_TAP);
convolve_vert(src, src_stride, dst, dst_stride, w, h, filter_params,
assert(filter_params_y.taps <= MAX_FILTER_TAP);
convolve_vert(src, src_stride, dst, dst_stride, w, h, filter_params_y,
subpel_y_qn, y_step_qn, conv_params);
} else {
// temp's size is set to a 256 aligned value to facilitate SIMD
......@@ -1194,12 +1058,7 @@ static void convolve_scale_helper(const uint8_t *src, int src_stride,
int max_intermediate_size = ((MAX_SB_SIZE * 2 + 16) + 16);
int filter_size;
#if CONFIG_DUAL_FILTER && USE_EXTRA_FILTER
if (interp_filter[0 + 2 * conv_params->ref] == MULTITAP_SHARP &&
interp_filter[1 + 2 * conv_params->ref] == MULTITAP_SHARP) {
// Avoid two directions both using 12-tap filter.
// This will reduce hardware implementation cost.
filter_params_y = av1_get_interp_filter_params(EIGHTTAP_SHARP);
}
av1_convolve_filter_params_fixup_1212(&filter_params_x, &filter_params_y);
// we do filter with fewer taps first to reduce hardware implementation
// complexity
......@@ -1210,23 +1069,22 @@ static void convolve_scale_helper(const uint8_t *src, int src_stride,
temp_conv_params.ref = 0;
temp_conv_params.do_average = 0;
temp_conv_params.round = CONVOLVE_OPT_ROUND;
filter_params = filter_params_y;
filter_size = filter_params_x.taps;
intermediate_width =
(((w - 1) * x_step_qn + subpel_x_qn) >> SCALE_SUBPEL_BITS) +
filter_size;
assert(intermediate_width <= max_intermediate_size);
assert(filter_params.taps <= MAX_FILTER_TAP);
assert(filter_params_y.taps <= MAX_FILTER_TAP);
convolve_vert(src - (filter_size / 2 - 1), src_stride, temp, temp_stride,
intermediate_width, h, filter_params, subpel_y_qn,
intermediate_width, h, filter_params_y, subpel_y_qn,
y_step_qn, &temp_conv_params);
filter_params = filter_params_x;
assert(filter_params.taps <= MAX_FILTER_TAP);
assert(filter_params_x.taps <= MAX_FILTER_TAP);
convolve_horiz(temp + (filter_size / 2 - 1), temp_stride, dst, dst_stride,
w, h, filter_params, subpel_x_qn, x_step_qn, conv_params);
w, h, filter_params_x, subpel_x_qn, x_step_qn,
conv_params);
} else {
#endif // CONFIG_DUAL_FILTER && USE_EXTRA_FILTER
int intermediate_height;
......@@ -1235,31 +1093,23 @@ static void convolve_scale_helper(const uint8_t *src, int src_stride,
temp_conv_params.ref = 0;
temp_conv_params.do_average = 0;
temp_conv_params.round = CONVOLVE_OPT_ROUND;
#if CONFIG_DUAL_FILTER
filter_params = filter_params_x;
filter_size = filter_params_y.taps;
#else
filter_size = filter_params.taps;
#endif
intermediate_height =
(((h - 1) * y_step_qn + subpel_y_qn) >> SCALE_SUBPEL_BITS) +
filter_size;
assert(intermediate_height <= max_intermediate_size);
(void)max_intermediate_size;
assert(filter_params.taps <= MAX_FILTER_TAP);
assert(filter_params_x.taps <= MAX_FILTER_TAP);
convolve_horiz(src - src_stride * (filter_size / 2 - 1), src_stride, temp,
temp_stride, w, intermediate_height, filter_params,
temp_stride, w, intermediate_height, filter_params_x,
subpel_x_qn, x_step_qn, &temp_conv_params);
#if CONFIG_DUAL_FILTER
filter_params = filter_params_y;
#endif
assert(filter_params.taps <= MAX_FILTER_TAP);
assert(filter_params_y.taps <= MAX_FILTER_TAP);
convolve_vert(temp + temp_stride * (filter_size / 2 - 1), temp_stride,
dst, dst_stride, w, h, filter_params, subpel_y_qn,
dst, dst_stride, w, h, filter_params_y, subpel_y_qn,
y_step_qn, conv_params);
#if CONFIG_DUAL_FILTER && USE_EXTRA_FILTER
}
......@@ -1268,44 +1118,29 @@ static void convolve_scale_helper(const uint8_t *src, int src_stride,
}
void av1_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
int dst_stride, int w, int h,
#if CONFIG_DUAL_FILTER
const InterpFilter *interp_filter,
#else
const InterpFilter interp_filter,
#endif
int dst_stride, int w, int h, InterpFilters interp_filters,
const int subpel_x_q4, int x_step_q4, const int subpel_y_q4,
int y_step_q4, ConvolveParams *conv_params) {
convolve_helper(src, src_stride, dst, dst_stride, w, h, interp_filter,
convolve_helper(src, src_stride, dst, dst_stride, w, h, interp_filters,
subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, conv_params,
av1_convolve_horiz_facade, av1_convolve_vert_facade);
}
void av1_convolve_c(const uint8_t *src, int src_stride, uint8_t *dst,
int dst_stride, int w, int h,
#if CONFIG_DUAL_FILTER
const InterpFilter *interp_filter,
#else
const InterpFilter interp_filter,
#endif
int dst_stride, int w, int h, InterpFilters interp_filters,
const int subpel_x_q4, int x_step_q4, const int subpel_y_q4,
int y_step_q4, ConvolveParams *conv_params) {
convolve_helper(src, src_stride, dst, dst_stride, w, h, interp_filter,
convolve_helper(src, src_stride, dst, dst_stride, w, h, interp_filters,
subpel_x_q4, x_step_q4, subpel_y_q4, y_step_q4, conv_params,
av1_convolve_horiz_facade_c, av1_convolve_vert_facade_c);
}
void av1_convolve_scale(const uint8_t *src, int src_stride, uint8_t *dst,
int dst_stride, int w, int h,
#if CONFIG_DUAL_FILTER
const InterpFilter *interp_filter,
#else
const InterpFilter interp_filter,
#endif
const int subpel_x_qn, int x_step_qn,
const int subpel_y_qn, int y_step_qn,
InterpFilters interp_filters, const int subpel_x_qn,
int x_step_qn, const int subpel_y_qn, int y_step_qn,
ConvolveParams *conv_params) {
convolve_scale_helper(src, src_stride, dst, dst_stride, w, h, interp_filter,
convolve_scale_helper(src, src_stride, dst, dst_stride, w, h, interp_filters,
subpel_x_qn, x_step_qn, subpel_y_qn, y_step_qn,
conv_params, av1_convolve_horiz_facade_scale,
av1_convolve_vert_facade_scale);
......@@ -1567,14 +1402,9 @@ void av1_highbd_convolve_vert_facade_scale(
void av1_highbd_convolve(const uint8_t *src8, int src_stride, uint8_t *dst8,
int dst_stride, int w, int h,
#if CONFIG_DUAL_FILTER
const InterpFilter *interp_filter,
#else
const InterpFilter interp_filter,
#endif
const int subpel_x_q4, int x_step_q4,
const int subpel_y_q4, int y_step_q4, int ref_idx,
int bd) {
InterpFilters interp_filters, const int subpel_x_q4,
int x_step_q4, const int subpel_y_q4, int y_step_q4,
int ref_idx, int bd) {
uint16_t *src = CONVERT_TO_SHORTPTR(src8);
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
int ignore_horiz = x_step_q4 == SUBPEL_SHIFTS && subpel_x_q4 == 0;
......@@ -1587,27 +1417,20 @@ void av1_highbd_convolve(const uint8_t *src8, int src_stride, uint8_t *dst8,
if (ignore_horiz && ignore_vert) {
highbd_convolve_copy(src, src_stride, dst, dst_stride, w, h, ref_idx, bd);
} else if (ignore_vert) {
#if CONFIG_DUAL_FILTER
InterpFilterParams filter_params =
av1_get_interp_filter_params(interp_filter[1 + 2 * ref_idx]);
#else
InterpFilterParams filter_params =
av1_get_interp_filter_params(interp_filter);
#endif
return;
}
InterpFilterParams filter_params_x, filter_params_y;
av1_get_convolve_filter_params(interp_filters, 0, &filter_params_x,
&filter_params_y);
if (ignore_vert) {
av1_highbd_convolve_horiz_facade(src8, src_stride, dst8, dst_stride, w, h,
filter_params, subpel_x_q4, x_step_q4,
filter_params_x, subpel_x_q4, x_step_q4,
ref_idx, bd);
} else if (ignore_horiz) {
#if CONFIG_DUAL_FILTER
InterpFilterParams filter_params =
av1_get_interp_filter_params(interp_filter[0 + 2 * ref_idx]);
#else
InterpFilterParams filter_params =
av1_get_interp_filter_params(interp_filter);
#endif
av1_highbd_convolve_vert_facade(src8, src_stride, dst8, dst_stride, w, h,
filter_params, subpel_y_q4, y_step_q4,
filter_params_y, subpel_y_q4, y_step_q4,
ref_idx, bd);
} else {
// temp's size is set to a 256 aligned value to facilitate SIMD
......@@ -1618,54 +1441,37 @@ void av1_highbd_convolve(const uint8_t *src8, int src_stride, uint8_t *dst8,
uint8_t *temp8 = CONVERT_TO_BYTEPTR(temp);
int max_intermediate_size = ((MAX_SB_SIZE * 2 + 16) + 16);
int filter_size;
InterpFilterParams filter_params;
#if CONFIG_DUAL_FILTER
InterpFilterParams filter_params_x =
av1_get_interp_filter_params(interp_filter[1 + 2 * ref_idx]);
InterpFilterParams filter_params_y =
av1_get_interp_filter_params(interp_filter[0 + 2 * ref_idx]);
#endif
#if CONFIG_DUAL_FILTER && USE_EXTRA_FILTER
if (interp_filter[0 + 2 * ref_idx] == MULTITAP_SHARP &&
interp_filter[1 + 2 * ref_idx] == MULTITAP_SHARP) {
// Avoid two directions both using 12-tap filter.
// This will reduce hardware implementation cost.
filter_params_y = av1_get_interp_filter_params(EIGHTTAP_SHARP);
}
av1_convolve_filter_params_fixup_1212(&filter_params_x, &filter_params_y);
if (filter_params_y.taps < filter_params_x.taps) {
int intermediate_width;
int temp_stride = max_intermediate_size;
filter_params = filter_params_y;
filter_size = filter_params_x.taps;
intermediate_width =
(((w - 1) * x_step_q4 + subpel_x_q4) >> SUBPEL_BITS) + filter_size;
assert(intermediate_width <= max_intermediate_size);
assert(filter_params.taps <= MAX_FILTER_TAP);
assert(filter_params_y.taps <= MAX_FILTER_TAP);
av1_highbd_convolve_vert_facade(
src8 - (filter_size / 2 - 1), src_stride, temp8, temp_stride,
intermediate_width, h, filter_params, subpel_y_q4, y_step_q4, 0, bd);
av1_highbd_convolve_vert_facade(src8 - (filter_size / 2 - 1), src_stride,
temp8, temp_stride, intermediate_width, h,
filter_params_y, subpel_y_q4, y_step_q4,
0, bd);
filter_params = filter_params_x;
assert(filter_params.taps <= MAX_FILTER_TAP);
assert(filter_params_x.taps <= MAX_FILTER_TAP);
av1_highbd_convolve_horiz_facade(
temp8 + (filter_size / 2 - 1), temp_stride, dst8, dst_stride, w, h,
filter_params, subpel_x_q4, x_step_q4, ref_idx, bd);
filter_params_x, subpel_x_q4, x_step_q4, ref_idx, bd);
} else
#endif // CONFIG_DUAL_FILTER && USE_EXTRA_FILTER
{
int intermediate_height;
int temp_stride = MAX_SB_SIZE;
#if CONFIG_DUAL_FILTER
filter_params = filter_params_x;
filter_size = filter_params_y.taps;
#else
filter_params = av1_get_interp_filter_params(interp_filter);
filter_size = filter_params.taps;
#endif
intermediate_height =
(((h - 1) * y_step_q4 + subpel_y_q4) >> SUBPEL_BITS) + filter_size;