Commit 7b517095 authored by Angie Chiang's avatar Angie Chiang
Browse files

Integrate convolve_round with compound_segment

This integration only covers low bitdepth mode for now

The performance of Convolve_round on top of compound_segment
revives from 0.475% to 0.612% on lowres

Change-Id: I21606c79d0a22c0834966730358267c082d8071e
parent b25ba508
......@@ -554,6 +554,9 @@ if (aom_config("CONFIG_AV1") eq "yes") {
#
# Alpha blending with mask
#
if (aom_config("CONFIG_CONVOLVE_ROUND") eq "yes") {
add_proto qw/void aom_blend_a64_d32_mask/, "int32_t *dst, uint32_t dst_stride, const int32_t *src0, uint32_t src0_stride, const int32_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx";
}
add_proto qw/void aom_blend_a64_mask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx";
add_proto qw/void aom_blend_a64_hmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w";
add_proto qw/void aom_blend_a64_vmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w";
......
......@@ -18,6 +18,69 @@
#include "./aom_dsp_rtcd.h"
#if CONFIG_CONVOLVE_ROUND
// Blending with alpha mask. Mask values come from the range [0, 64],
// as described for AOM_BLEND_A64 in aom_dsp/blend.h. src0 or src1 can
// be the same as dst, or dst can be different from both sources.
void aom_blend_a64_d32_mask_c(int32_t *dst, uint32_t dst_stride,
const int32_t *src0, uint32_t src0_stride,
const int32_t *src1, uint32_t src1_stride,
const uint8_t *mask, uint32_t mask_stride, int h,
int w, int subh, int subw) {
int i, j;
assert(IMPLIES(src0 == dst, src0_stride == dst_stride));
assert(IMPLIES(src1 == dst, src1_stride == dst_stride));
assert(h >= 1);
assert(w >= 1);
assert(IS_POWER_OF_TWO(h));
assert(IS_POWER_OF_TWO(w));
if (subw == 0 && subh == 0) {
for (i = 0; i < h; ++i) {
for (j = 0; j < w; ++j) {
const int m = mask[i * mask_stride + j];
dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
src1[i * src1_stride + j]);
}
}
} else if (subw == 1 && subh == 1) {
for (i = 0; i < h; ++i) {
for (j = 0; j < w; ++j) {
const int m = ROUND_POWER_OF_TWO(
mask[(2 * i) * mask_stride + (2 * j)] +
mask[(2 * i + 1) * mask_stride + (2 * j)] +
mask[(2 * i) * mask_stride + (2 * j + 1)] +
mask[(2 * i + 1) * mask_stride + (2 * j + 1)],
2);
dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
src1[i * src1_stride + j]);
}
}
} else if (subw == 1 && subh == 0) {
for (i = 0; i < h; ++i) {
for (j = 0; j < w; ++j) {
const int m = AOM_BLEND_AVG(mask[i * mask_stride + (2 * j)],
mask[i * mask_stride + (2 * j + 1)]);
dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
src1[i * src1_stride + j]);
}
}
} else {
for (i = 0; i < h; ++i) {
for (j = 0; j < w; ++j) {
const int m = AOM_BLEND_AVG(mask[(2 * i) * mask_stride + j],
mask[(2 * i + 1) * mask_stride + j]);
dst[i * dst_stride + j] = AOM_BLEND_A64(m, src0[i * src0_stride + j],
src1[i * src1_stride + j]);
}
}
}
}
#endif // CONFIG_CONVOLVE_ROUND
// Blending with alpha mask. Mask values come from the range [0, 64],
// as described for AOM_BLEND_A64 in aom_dsp/blend.h. src0 or src1 can
// be the same as dst, or dst can be different from both sources.
......
......@@ -357,6 +357,46 @@ void build_compound_seg_mask_highbd(uint8_t *mask, SEG_MASK_TYPE mask_type,
#elif COMPOUND_SEGMENT_TYPE == 1
#define DIFF_FACTOR 16
#if CONFIG_CONVOLVE_ROUND
static void diffwtd_mask_d32(uint8_t *mask, int which_inverse, int mask_base,
const int32_t *src0, int src0_stride,
const int32_t *src1, int src1_stride,
BLOCK_SIZE sb_type, int h, int w,
ConvolveParams *conv_params) {
int round = 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1;
int i, j, m, diff;
int block_stride = block_size_wide[sb_type];
for (i = 0; i < h; ++i) {
for (j = 0; j < w; ++j) {
diff = abs(ROUND_POWER_OF_TWO(
src0[i * src0_stride + j] - src1[i * src1_stride + j], round));
m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
mask[i * block_stride + j] =
which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
}
}
}
static void build_compound_seg_mask_d32(uint8_t *mask, SEG_MASK_TYPE mask_type,
const int32_t *src0, int src0_stride,
const int32_t *src1, int src1_stride,
BLOCK_SIZE sb_type, int h, int w,
ConvolveParams *conv_params) {
switch (mask_type) {
case DIFFWTD_38:
diffwtd_mask_d32(mask, 0, 38, src0, src0_stride, src1, src1_stride,
sb_type, h, w, conv_params);
break;
case DIFFWTD_38_INV:
diffwtd_mask_d32(mask, 1, 38, src0, src0_stride, src1, src1_stride,
sb_type, h, w, conv_params);
break;
default: assert(0);
}
}
#endif
static void diffwtd_mask(uint8_t *mask, int which_inverse, int mask_base,
const uint8_t *src0, int src0_stride,
const uint8_t *src1, int src1_stride,
......@@ -664,6 +704,21 @@ static void build_masked_compound_wedge_extend_highbd(
}
#endif // CONFIG_HIGHBITDEPTH
#else
#if CONFIG_CONVOLVE_ROUND
static void build_masked_compound_no_round(
CONV_BUF_TYPE *dst, int dst_stride, const CONV_BUF_TYPE *src0,
int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride,
const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type, int h,
int w) {
// Derive subsampling from h and w passed in. May be refactored to
// pass in subsampling factors directly.
const int subh = (2 << b_height_log2_lookup[sb_type]) == h;
const int subw = (2 << b_width_log2_lookup[sb_type]) == w;
const uint8_t *mask = av1_get_compound_type_mask(comp_data, sb_type);
aom_blend_a64_d32_mask(dst, dst_stride, src0, src0_stride, src1, src1_stride,
mask, block_size_wide[sb_type], h, w, subh, subw);
}
#endif // CONFIG_CONVOLVE_ROUND
static void build_masked_compound(
uint8_t *dst, int dst_stride, const uint8_t *src0, int src0_stride,
const uint8_t *src1, int src1_stride,
......@@ -733,6 +788,19 @@ void av1_make_masked_inter_predictor(const uint8_t *pre, int pre_stride,
};
#if CONFIG_HIGHBITDEPTH
#if CONFIG_CONVOLVE_ROUND
DECLARE_ALIGNED(16, CONV_BUF_TYPE, tmp_dst2[MAX_SB_SQUARE]);
int tmp_dst2_stride = MAX_SB_SIZE;
CONV_BUF_TYPE *org_dst = conv_params->dst;
int org_dst_stride = conv_params->dst_stride;
if (conv_params->round == CONVOLVE_OPT_NO_ROUND) {
memset(tmp_dst2, 0, sizeof(tmp_dst2));
conv_params->dst = tmp_dst2;
conv_params->dst_stride = tmp_dst2_stride;
// mask compound has its own average mechanism
conv_params->do_average = 0;
}
#endif // CONFIG_CONVOLVE_ROUND
DECLARE_ALIGNED(16, uint8_t, tmp_dst_[2 * MAX_SB_SQUARE]);
uint8_t *tmp_dst = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
? CONVERT_TO_BYTEPTR(tmp_dst_)
......@@ -748,14 +816,25 @@ void av1_make_masked_inter_predictor(const uint8_t *pre, int pre_stride,
xs, ys, xd);
#if CONFIG_COMPOUND_SEGMENT
if (!plane && comp_data.interinter_compound_type == COMPOUND_SEG) {
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
build_compound_seg_mask_highbd(comp_data.seg_mask, comp_data.mask_type,
dst, dst_stride, tmp_dst, MAX_SB_SIZE,
mi->mbmi.sb_type, h, w, xd->bd);
else
build_compound_seg_mask(comp_data.seg_mask, comp_data.mask_type, dst,
dst_stride, tmp_dst, MAX_SB_SIZE,
mi->mbmi.sb_type, h, w);
} else {
#if CONFIG_CONVOLVE_ROUND
if (conv_params->round == CONVOLVE_OPT_NO_ROUND) {
build_compound_seg_mask_d32(
comp_data.seg_mask, comp_data.mask_type, org_dst, org_dst_stride,
tmp_dst2, tmp_dst2_stride, mi->mbmi.sb_type, h, w, conv_params);
} else {
#endif // CONFIG_CONVOLVE_ROUND
build_compound_seg_mask(comp_data.seg_mask, comp_data.mask_type, dst,
dst_stride, tmp_dst, MAX_SB_SIZE,
mi->mbmi.sb_type, h, w);
#if CONFIG_CONVOLVE_ROUND
}
#endif
}
}
#endif // CONFIG_COMPOUND_SEGMENT
......@@ -769,16 +848,45 @@ void av1_make_masked_inter_predictor(const uint8_t *pre, int pre_stride,
dst, dst_stride, dst, dst_stride, tmp_dst, MAX_SB_SIZE, &comp_data,
mi->mbmi.sb_type, wedge_offset_x, wedge_offset_y, h, w);
#else
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
build_masked_compound_highbd(dst, dst_stride, dst, dst_stride, tmp_dst,
MAX_SB_SIZE, &comp_data, mi->mbmi.sb_type, h,
w, xd->bd);
else
build_masked_compound(dst, dst_stride, dst, dst_stride, tmp_dst,
MAX_SB_SIZE, &comp_data, mi->mbmi.sb_type, h, w);
} else {
#if CONFIG_CONVOLVE_ROUND
if (conv_params->round == CONVOLVE_OPT_NO_ROUND) {
build_masked_compound_no_round(org_dst, org_dst_stride, org_dst,
org_dst_stride, tmp_dst2, tmp_dst2_stride,
&comp_data, mi->mbmi.sb_type, h, w);
av1_convolve_rounding(
org_dst, org_dst_stride, dst, dst_stride, w, h,
FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1);
conv_params->do_post_rounding = 0;
} else {
#endif // CONFIG_CONVOLVE_ROUND
build_masked_compound(dst, dst_stride, dst, dst_stride, tmp_dst,
MAX_SB_SIZE, &comp_data, mi->mbmi.sb_type, h, w);
#if CONFIG_CONVOLVE_ROUND
}
#endif // CONFIG_CONVOLVE_ROUND
}
#endif // CONFIG_SUPERTX
#else // CONFIG_HIGHBITDEPTH
#if CONFIG_CONVOLVE_ROUND
DECLARE_ALIGNED(16, CONV_BUF_TYPE, tmp_dst2[MAX_SB_SQUARE]);
int tmp_dst2_stride = MAX_SB_SIZE;
CONV_BUF_TYPE *org_dst = conv_params->dst;
int org_dst_stride = conv_params->dst_stride;
if (conv_params->round == CONVOLVE_OPT_NO_ROUND) {
memset(tmp_dst2, 0, sizeof(tmp_dst2));
conv_params->dst = tmp_dst2;
conv_params->dst_stride = tmp_dst2_stride;
// mask compound has its own average mechanism
conv_params->do_average = 0;
}
#endif
DECLARE_ALIGNED(16, uint8_t, tmp_dst[MAX_SB_SQUARE]);
av1_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE, subpel_x,
subpel_y, sf, w, h, conv_params, interp_filter,
......@@ -790,18 +898,43 @@ void av1_make_masked_inter_predictor(const uint8_t *pre, int pre_stride,
#endif
xs, ys, xd);
#if CONFIG_COMPOUND_SEGMENT
if (!plane && comp_data.interinter_compound_type == COMPOUND_SEG)
build_compound_seg_mask(comp_data.seg_mask, comp_data.mask_type, dst,
dst_stride, tmp_dst, MAX_SB_SIZE, mi->mbmi.sb_type,
h, w);
if (!plane && comp_data.interinter_compound_type == COMPOUND_SEG) {
#if CONFIG_CONVOLVE_ROUND
if (conv_params->round == CONVOLVE_OPT_NO_ROUND) {
build_compound_seg_mask_d32(
comp_data.seg_mask, comp_data.mask_type, org_dst, org_dst_stride,
tmp_dst2, tmp_dst2_stride, mi->mbmi.sb_type, h, w, conv_params);
} else {
#endif // CONFIG_CONVOLVE_ROUND
build_compound_seg_mask(comp_data.seg_mask, comp_data.mask_type, dst,
dst_stride, tmp_dst, MAX_SB_SIZE,
mi->mbmi.sb_type, h, w);
#if CONFIG_CONVOLVE_ROUND
}
#endif
}
#endif // CONFIG_COMPOUND_SEGMENT
#if CONFIG_SUPERTX
build_masked_compound_wedge_extend(dst, dst_stride, dst, dst_stride, tmp_dst,
MAX_SB_SIZE, &comp_data, mi->mbmi.sb_type,
wedge_offset_x, wedge_offset_y, h, w);
#else
build_masked_compound(dst, dst_stride, dst, dst_stride, tmp_dst, MAX_SB_SIZE,
&comp_data, mi->mbmi.sb_type, h, w);
#if CONFIG_CONVOLVE_ROUND
if (conv_params->round == CONVOLVE_OPT_NO_ROUND) {
build_masked_compound_no_round(org_dst, org_dst_stride, org_dst,
org_dst_stride, tmp_dst2, tmp_dst2_stride,
&comp_data, mi->mbmi.sb_type, h, w);
av1_convolve_rounding(
org_dst, org_dst_stride, dst, dst_stride, w, h,
FILTER_BITS * 2 - conv_params->round_0 - conv_params->round_1);
conv_params->do_post_rounding = 0;
} else {
#endif // CONFIG_CONVOLVE_ROUND
build_masked_compound(dst, dst_stride, dst, dst_stride, tmp_dst,
MAX_SB_SIZE, &comp_data, mi->mbmi.sb_type, h, w);
#if CONFIG_CONVOLVE_ROUND
}
#endif // CONFIG_CONVOLVE_ROUND
#endif // CONFIG_SUPERTX
#endif // CONFIG_HIGHBITDEPTH
#if CONFIG_COMPOUND_SEGMENT
......@@ -1243,8 +1376,15 @@ void build_inter_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd, int plane,
conv_params.do_average = ref;
#if CONFIG_EXT_INTER
if (is_masked_compound_type(mi->mbmi.interinter_compound_type)) {
// masked compound type has its own average mechanism
conv_params = get_conv_params(ref, 0, plane);
#if CONFIG_HIGHBITDEPTH
// TODO(angiebird): integrate convolve_round and compound_segment in
// highbitdepth mode
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
conv_params = get_conv_params(ref, 0, plane);
else
#endif
// masked compound type has its own average mechanism
conv_params.do_average = 0;
}
if (ref && is_masked_compound_type(mi->mbmi.interinter_compound_type))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment