Commit 7927a97d authored by Angie Chiang's avatar Angie Chiang

Add av1_convolve_2d_facade

When convolve_round is on, av1_convolve_2d_facade will be used for
interpolation rather than av1_convolve. Will remove the experiment
code of convolve_round experiment from av1_convolve in another CL.

So far we use 4-bit rounding in the intermediate stage on top of using
post rounding for compound mode after the last stage.

This will give us roughly 0.45% gain on lowres , 0.39% on midres and
roughly 0.6-0.7% on hdres
Altogether, is 1.15% on lowresm, 0.74% on midres and roughly 1.7-1.8% on
hdres

Note that there no restriction usage of 12-tap filter in the CL.
Adding that, we will lose roughly 0.1% again on lowres.

Change-Id: I6332e1d888e28a3b3ddc29711817d66e52cb5cdf
parent a9598cd6
......@@ -199,15 +199,79 @@ void av1_convolve_vert_facade(const uint8_t *src, int src_stride, uint8_t *dst,
#if CONFIG_CONVOLVE_ROUND
void av1_convolve_rounding(const int32_t *src, int src_stride, uint8_t *dst,
int dst_stride, int w, int h) {
int dst_stride, int w, int h, int bits) {
int r, c;
for (r = 0; r < h; ++r) {
for (c = 0; c < w; ++c) {
dst[r * dst_stride + c] =
clip_pixel(ROUND_POWER_OF_TWO(src[r * src_stride + c], FILTER_BITS));
clip_pixel(ROUND_POWER_OF_TWO_SIGNED(src[r * src_stride + c], bits));
}
}
}
void av1_convolve_2d(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst,
int dst_stride, int w, int h,
InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y, const int subpel_x_q4,
const int subpel_y_q4, ConvolveParams *conv_params) {
int x, y, k;
CONV_BUF_TYPE im_block[(MAX_SB_SIZE + MAX_FILTER_TAP - 1) * MAX_SB_SIZE];
int im_h = h + filter_params_y->taps - 1;
int im_stride = w;
const int fo_vert = filter_params_y->taps / 2 - 1;
const int fo_horiz = filter_params_x->taps / 2 - 1;
(void)conv_params;
// horizontal filter
const uint8_t *src_horiz = src - fo_vert * src_stride;
const int16_t *x_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_x, subpel_x_q4 & SUBPEL_MASK);
for (y = 0; y < im_h; ++y) {
for (x = 0; x < w; ++x) {
CONV_BUF_TYPE sum = 0;
for (k = 0; k < filter_params_x->taps; ++k) {
sum += x_filter[k] * src_horiz[y * src_stride + x - fo_horiz + k];
}
im_block[y * im_stride + x] =
ROUND_POWER_OF_TWO_SIGNED(sum, conv_params->round_0);
}
}
// vertical filter
CONV_BUF_TYPE *src_vert = im_block + fo_vert * im_stride;
const int16_t *y_filter = av1_get_interp_filter_subpel_kernel(
*filter_params_y, subpel_y_q4 & SUBPEL_MASK);
for (y = 0; y < h; ++y) {
for (x = 0; x < w; ++x) {
CONV_BUF_TYPE sum = 0;
for (k = 0; k < filter_params_y->taps; ++k) {
sum += y_filter[k] * src_vert[(y - fo_vert + k) * im_stride + x];
}
dst[y * dst_stride + x] +=
ROUND_POWER_OF_TWO_SIGNED(sum, conv_params->round_1);
}
}
}
void av1_convolve_2d_facade(const uint8_t *src, int src_stride, uint8_t *dst,
int dst_stride, int w, int h,
const InterpFilter *interp_filter,
const int subpel_x_q4, int x_step_q4,
const int subpel_y_q4, int y_step_q4,
ConvolveParams *conv_params) {
(void)x_step_q4;
(void)y_step_q4;
(void)dst;
(void)dst_stride;
InterpFilterParams filter_params_x =
av1_get_interp_filter_params(interp_filter[1 + 2 * conv_params->ref]);
InterpFilterParams filter_params_y =
av1_get_interp_filter_params(interp_filter[0 + 2 * conv_params->ref]);
av1_convolve_2d(src, src_stride, conv_params->dst, conv_params->dst_stride, w,
h, &filter_params_x, &filter_params_y, subpel_x_q4,
subpel_y_q4, conv_params);
}
#endif // CONFIG_CONVOLVE_ROUND
void av1_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
......@@ -294,7 +358,6 @@ void av1_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
filter_params = filter_params_x;
assert(filter_params.taps <= MAX_FILTER_TAP);
av1_convolve_horiz_facade(temp + (filter_size / 2 - 1), temp_stride, dst,
dst_stride, w, h, filter_params, subpel_x_q4,
x_step_q4, conv_params);
......
......@@ -23,11 +23,15 @@ typedef enum CONVOLVE_OPT {
CONVOLVE_OPT_NO_ROUND,
} CONVOLVE_OPT;
typedef int32_t CONV_BUF_TYPE;
typedef struct ConvolveParams {
int ref;
CONVOLVE_OPT round;
int32_t *dst;
CONV_BUF_TYPE *dst;
int dst_stride;
int round_0;
int round_1;
} ConvolveParams;
static INLINE ConvolveParams get_conv_params(int ref) {
......@@ -38,18 +42,33 @@ static INLINE ConvolveParams get_conv_params(int ref) {
}
#if CONFIG_CONVOLVE_ROUND
void av1_convolve_2d(const uint8_t *src, int src_stride, CONV_BUF_TYPE *dst,
int dst_stride, int w, int h,
InterpFilterParams *filter_params_x,
InterpFilterParams *filter_params_y, const int subpel_x_q4,
const int subpel_y_q4, ConvolveParams *conv_params);
void av1_convolve_2d_facade(const uint8_t *src, int src_stride, uint8_t *dst,
int dst_stride, int w, int h,
const InterpFilter *interp_filter,
const int subpel_x_q4, int x_step_q4,
const int subpel_y_q4, int y_step_q4,
ConvolveParams *conv_params);
static INLINE ConvolveParams get_conv_params_no_round(int ref, int32_t *dst,
int dst_stride) {
ConvolveParams conv_params;
conv_params.ref = ref;
conv_params.round = CONVOLVE_OPT_NO_ROUND;
conv_params.round_0 = 5;
conv_params.round_1 = 1;
conv_params.dst = dst;
conv_params.dst_stride = dst_stride;
return conv_params;
}
void av1_convolve_rounding(const int32_t *src, int src_stride, uint8_t *dst,
int dst_stride, int w, int h);
int dst_stride, int w, int h, int bits);
#endif // CONFIG_CONVOLVE_ROUND
void av1_convolve(const uint8_t *src, int src_stride, uint8_t *dst,
......
......@@ -918,7 +918,8 @@ void build_inter_predictors(MACROBLOCKD *xd, int plane,
MV32 scaled_mv[2];
SubpelParams subpel_params[2];
#if CONFIG_CONVOLVE_ROUND
int32_t tmp_dst[MAX_SB_SIZE * MAX_SB_SIZE];
DECLARE_ALIGNED(16, int32_t, tmp_dst[MAX_SB_SIZE * MAX_SB_SIZE]);
av1_zero(tmp_dst);
#endif // CONFIG_CONVOLVE_ROUND
for (ref = 0; ref < 1 + is_compound; ++ref) {
......@@ -968,15 +969,16 @@ void build_inter_predictors(MACROBLOCKD *xd, int plane,
(scaled_mv[ref].col >> SUBPEL_BITS);
}
for (ref = 0; ref < 1 + is_compound; ++ref) {
const struct scale_factors *const sf = &xd->block_refs[ref]->sf;
struct buf_2d *const pre_buf = &pd->pre[ref];
#if CONFIG_CONVOLVE_ROUND
ConvolveParams conv_params =
get_conv_params_no_round(ref, tmp_dst, MAX_SB_SIZE);
ConvolveParams conv_params =
get_conv_params_no_round(ref, tmp_dst, MAX_SB_SIZE);
#else
ConvolveParams conv_params = get_conv_params(ref);
ConvolveParams conv_params = get_conv_params(ref);
#endif // CONFIG_CONVOLVE_ROUND
for (ref = 0; ref < 1 + is_compound; ++ref) {
const struct scale_factors *const sf = &xd->block_refs[ref]->sf;
struct buf_2d *const pre_buf = &pd->pre[ref];
conv_params.ref = ref;
#if CONFIG_EXT_INTER
if (ref &&
is_masked_compound_type(mi->mbmi.interinter_compound_data.type))
......@@ -1014,7 +1016,9 @@ void build_inter_predictors(MACROBLOCKD *xd, int plane,
#if CONFIG_AOM_HIGHBITDEPTH
if (!(xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH))
#endif // CONFIG_AOM_HIGHBITDEPTH
av1_convolve_rounding(tmp_dst, MAX_SB_SIZE, dst, dst_buf->stride, w, h);
av1_convolve_rounding(tmp_dst, MAX_SB_SIZE, dst, dst_buf->stride, w, h,
FILTER_BITS * 2 + is_compound -
conv_params.round_0 - conv_params.round_1);
#endif // CONFIG_CONVOLVE_ROUND
}
}
......
......@@ -64,11 +64,18 @@ static INLINE void inter_predictor(const uint8_t *src, int src_stride,
sf->predict[subpel_x != 0][subpel_y != 0][conv_params->ref](
src, src_stride, dst, dst_stride, kernel_x, xs, kernel_y, ys, w, h);
} else {
// ref_idx > 0 means this is the second reference frame
// first reference frame's prediction result is already in dst
// therefore we need to average the first and second results
av1_convolve(src, src_stride, dst, dst_stride, w, h, interp_filter,
subpel_x, xs, subpel_y, ys, conv_params);
// ref_idx > 0 means this is the second reference frame
// first reference frame's prediction result is already in dst
// therefore we need to average the first and second results
#if CONFIG_CONVOLVE_ROUND
if (conv_params->round == CONVOLVE_OPT_NO_ROUND)
av1_convolve_2d_facade(src, src_stride, dst, dst_stride, w, h,
interp_filter, subpel_x, xs, subpel_y, ys,
conv_params);
else
#endif
av1_convolve(src, src_stride, dst, dst_stride, w, h, interp_filter,
subpel_x, xs, subpel_y, ys, conv_params);
}
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment