Commit c3a8ae46 authored by Debargha Mukherjee's avatar Debargha Mukherjee

Clean up and use do_post_rounding consistently

This patch simplifies the setting and use of the do_post_rounding
flag consistently across all convolve/warp operations. It is always
set now to 1 for compound modes, but the masked blending operation
can subsequently turn it off.
To accomplish this, for the warped case, the use of the conv_params->dst
buffer is made consistent with the other non-warp convolves,
in the sense that it is used only when is_compound is 1.

Change-Id: If3a37ffa65c3ca75f1df66ca427e6b5df86ac72f
parent 637586ec
...@@ -69,6 +69,8 @@ static INLINE void av1_make_inter_predictor( ...@@ -69,6 +69,8 @@ static INLINE void av1_make_inter_predictor(
// Make sure the selected motion mode is valid for this configuration // Make sure the selected motion mode is valid for this configuration
assert_motion_mode_valid(mi->mbmi.motion_mode, xd->global_motion, xd, mi); assert_motion_mode_valid(mi->mbmi.motion_mode, xd->global_motion, xd, mi);
assert(IMPLIES(conv_params->is_compound, conv_params->dst != NULL));
WarpedMotionParams final_warp_params; WarpedMotionParams final_warp_params;
const int do_warp = const int do_warp =
(w >= 8 && h >= 8 && (w >= 8 && h >= 8 &&
...@@ -86,20 +88,17 @@ static INLINE void av1_make_inter_predictor( ...@@ -86,20 +88,17 @@ static INLINE void av1_make_inter_predictor(
pre_buf->buf0, pre_buf->width, pre_buf->height, pre_buf->buf0, pre_buf->width, pre_buf->height,
pre_buf->stride, dst, p_col, p_row, w, h, dst_stride, pre_buf->stride, dst, p_col, p_row, w, h, dst_stride,
pd->subsampling_x, pd->subsampling_y, conv_params); pd->subsampling_x, pd->subsampling_y, conv_params);
assert(IMPLIES(conv_params->dst != NULL, conv_params->do_post_rounding));
assert(IMPLIES(conv_params->dst == NULL, !conv_params->do_post_rounding));
} else if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { } else if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y, highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y,
sf, w, h, conv_params, interp_filters, xs, ys, sf, w, h, conv_params, interp_filters, xs, ys,
xd->bd); xd->bd);
assert(IMPLIES(conv_params->is_compound, conv_params->do_post_rounding));
assert(!(conv_params->is_compound && conv_params->dst == NULL));
} else { } else {
inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y, sf, w, inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y, sf, w,
h, conv_params, interp_filters, xs, ys); h, conv_params, interp_filters, xs, ys);
assert(IMPLIES(conv_params->is_compound, conv_params->do_post_rounding));
assert(!(conv_params->is_compound && conv_params->dst == NULL));
} }
// For compound, do_post_rounding is always 1.
// For masked compound, this flag will be turned off after the blend stage.
conv_params->do_post_rounding = conv_params->is_compound;
} }
#define NSMOOTHERS 1 #define NSMOOTHERS 1
...@@ -1244,8 +1243,6 @@ static INLINE void build_inter_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd, ...@@ -1244,8 +1243,6 @@ static INLINE void build_inter_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd,
subpel_params[ref].ys, xd); subpel_params[ref].ys, xd);
} }
// if (!is_masked_compound_type(mi->mbmi.interinter_compound_type))
// assert(conv_params.do_post_rounding);
// TODO(angiebird): This part needs optimization // TODO(angiebird): This part needs optimization
if (conv_params.do_post_rounding) { if (conv_params.do_post_rounding) {
assert(!is_masked_compound_type(mi->mbmi.interinter_compound_type)); assert(!is_masked_compound_type(mi->mbmi.interinter_compound_type));
......
...@@ -58,10 +58,6 @@ static INLINE void inter_predictor(const uint8_t *src, int src_stride, ...@@ -58,10 +58,6 @@ static INLINE void inter_predictor(const uint8_t *src, int src_stride,
av1_convolve_2d_facade(src, src_stride, dst, dst_stride, w, h, av1_convolve_2d_facade(src, src_stride, dst, dst_stride, w, h,
interp_filters, subpel_x, xs, subpel_y, ys, 1, interp_filters, subpel_x, xs, subpel_y, ys, 1,
conv_params, sf); conv_params, sf);
if (conv_params->is_compound)
conv_params->do_post_rounding = 1;
else
conv_params->do_post_rounding = 0;
} else { } else {
assert(conv_params->round == CONVOLVE_OPT_ROUND); assert(conv_params->round == CONVOLVE_OPT_ROUND);
av1_convolve_scale(src, src_stride, dst, dst_stride, w, h, interp_filters, av1_convolve_scale(src, src_stride, dst, dst_stride, w, h, interp_filters,
...@@ -81,10 +77,6 @@ static INLINE void inter_predictor(const uint8_t *src, int src_stride, ...@@ -81,10 +77,6 @@ static INLINE void inter_predictor(const uint8_t *src, int src_stride,
interp_filters, subpel_x, xs, subpel_y, ys, 0, interp_filters, subpel_x, xs, subpel_y, ys, 0,
conv_params, sf); conv_params, sf);
if (conv_params->is_compound)
conv_params->do_post_rounding = 1;
else
conv_params->do_post_rounding = 0;
} else { } else {
assert(conv_params->round == CONVOLVE_OPT_ROUND); assert(conv_params->round == CONVOLVE_OPT_ROUND);
...@@ -132,10 +124,6 @@ static INLINE void highbd_inter_predictor(const uint8_t *src, int src_stride, ...@@ -132,10 +124,6 @@ static INLINE void highbd_inter_predictor(const uint8_t *src, int src_stride,
av1_highbd_convolve_2d_facade(src, src_stride, dst, dst_stride, w, h, av1_highbd_convolve_2d_facade(src, src_stride, dst, dst_stride, w, h,
interp_filters, subpel_x, xs, subpel_y, ys, interp_filters, subpel_x, xs, subpel_y, ys,
1, conv_params, bd); 1, conv_params, bd);
if (conv_params->is_compound)
conv_params->do_post_rounding = 1;
else
conv_params->do_post_rounding = 0;
} else { } else {
av1_highbd_convolve_scale(src, src_stride, dst, dst_stride, w, h, av1_highbd_convolve_scale(src, src_stride, dst, dst_stride, w, h,
interp_filters, subpel_x, xs, subpel_y, ys, avg, interp_filters, subpel_x, xs, subpel_y, ys, avg,
...@@ -154,10 +142,6 @@ static INLINE void highbd_inter_predictor(const uint8_t *src, int src_stride, ...@@ -154,10 +142,6 @@ static INLINE void highbd_inter_predictor(const uint8_t *src, int src_stride,
av1_highbd_convolve_2d_facade(src, src_stride, dst, dst_stride, w, h, av1_highbd_convolve_2d_facade(src, src_stride, dst, dst_stride, w, h,
interp_filters, subpel_x, xs, subpel_y, ys, interp_filters, subpel_x, xs, subpel_y, ys,
0, conv_params, bd); 0, conv_params, bd);
if (conv_params->is_compound)
conv_params->do_post_rounding = 1;
else
conv_params->do_post_rounding = 0;
} else { } else {
InterpFilterParams filter_params_x, filter_params_y; InterpFilterParams filter_params_x, filter_params_y;
#if CONFIG_SHORT_FILTER #if CONFIG_SHORT_FILTER
......
...@@ -422,21 +422,17 @@ void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref, ...@@ -422,21 +422,17 @@ void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref,
ConvolveParams *conv_params, int16_t alpha, ConvolveParams *conv_params, int16_t alpha,
int16_t beta, int16_t gamma, int16_t delta) { int16_t beta, int16_t gamma, int16_t delta) {
int32_t tmp[15 * 8]; int32_t tmp[15 * 8];
const int use_conv_params = const int reduce_bits_horiz =
(conv_params->round == CONVOLVE_OPT_NO_ROUND && conv_params->dst); conv_params->round_0 +
int reduce_bits_horiz = conv_params->round_0; AOMMAX(bd + FILTER_BITS - conv_params->round_0 - 14, 0);
if (!use_conv_params && bd + FILTER_BITS + 2 - reduce_bits_horiz > 16) const int reduce_bits_vert = conv_params->is_compound
reduce_bits_horiz += bd + FILTER_BITS - reduce_bits_horiz - 14;
const int reduce_bits_vert = use_conv_params
? conv_params->round_1 ? conv_params->round_1
: 2 * FILTER_BITS - reduce_bits_horiz; : 2 * FILTER_BITS - reduce_bits_horiz;
const int max_bits_horiz = bd + FILTER_BITS + 1 - reduce_bits_horiz; const int max_bits_horiz = bd + FILTER_BITS + 1 - reduce_bits_horiz;
const int offset_bits_horiz = bd + FILTER_BITS - 1; const int offset_bits_horiz = bd + FILTER_BITS - 1;
const int offset_bits_vert = bd + 2 * FILTER_BITS - reduce_bits_horiz; const int offset_bits_vert = bd + 2 * FILTER_BITS - reduce_bits_horiz;
if (use_conv_params) {
conv_params->do_post_rounding = 1;
}
(void)max_bits_horiz; (void)max_bits_horiz;
assert(IMPLIES(conv_params->is_compound, conv_params->dst != NULL));
for (int i = p_row; i < p_row + p_height; i += 8) { for (int i = p_row; i < p_row + p_height; i += 8) {
for (int j = p_col; j < p_col + p_width; j += 8) { for (int j = p_col; j < p_col + p_width; j += 8) {
...@@ -500,7 +496,7 @@ void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref, ...@@ -500,7 +496,7 @@ void av1_highbd_warp_affine_c(const int32_t *mat, const uint16_t *ref,
sum += tmp[(k + m + 4) * 8 + (l + 4)] * coeffs[m]; sum += tmp[(k + m + 4) * 8 + (l + 4)] * coeffs[m];
} }
if (use_conv_params) { if (conv_params->is_compound) {
CONV_BUF_TYPE *p = CONV_BUF_TYPE *p =
&conv_params &conv_params
->dst[(i - p_row + k + 4) * conv_params->dst_stride + ->dst[(i - p_row + k + 4) * conv_params->dst_stride +
...@@ -716,19 +712,15 @@ void av1_warp_affine_c(const int32_t *mat, const uint8_t *ref, int width, ...@@ -716,19 +712,15 @@ void av1_warp_affine_c(const int32_t *mat, const uint8_t *ref, int width,
int16_t gamma, int16_t delta) { int16_t gamma, int16_t delta) {
int32_t tmp[15 * 8]; int32_t tmp[15 * 8];
const int bd = 8; const int bd = 8;
const int use_conv_params =
(conv_params->round == CONVOLVE_OPT_NO_ROUND && conv_params->dst);
const int reduce_bits_horiz = conv_params->round_0; const int reduce_bits_horiz = conv_params->round_0;
const int reduce_bits_vert = use_conv_params const int reduce_bits_vert = conv_params->is_compound
? conv_params->round_1 ? conv_params->round_1
: 2 * FILTER_BITS - reduce_bits_horiz; : 2 * FILTER_BITS - reduce_bits_horiz;
const int max_bits_horiz = bd + FILTER_BITS + 1 - reduce_bits_horiz; const int max_bits_horiz = bd + FILTER_BITS + 1 - reduce_bits_horiz;
const int offset_bits_horiz = bd + FILTER_BITS - 1; const int offset_bits_horiz = bd + FILTER_BITS - 1;
const int offset_bits_vert = bd + 2 * FILTER_BITS - reduce_bits_horiz; const int offset_bits_vert = bd + 2 * FILTER_BITS - reduce_bits_horiz;
if (use_conv_params) {
conv_params->do_post_rounding = 1;
}
(void)max_bits_horiz; (void)max_bits_horiz;
assert(IMPLIES(conv_params->is_compound, conv_params->dst != NULL));
for (int i = p_row; i < p_row + p_height; i += 8) { for (int i = p_row; i < p_row + p_height; i += 8) {
for (int j = p_col; j < p_col + p_width; j += 8) { for (int j = p_col; j < p_col + p_width; j += 8) {
...@@ -798,7 +790,7 @@ void av1_warp_affine_c(const int32_t *mat, const uint8_t *ref, int width, ...@@ -798,7 +790,7 @@ void av1_warp_affine_c(const int32_t *mat, const uint8_t *ref, int width,
sum += tmp[(k + m + 4) * 8 + (l + 4)] * coeffs[m]; sum += tmp[(k + m + 4) * 8 + (l + 4)] * coeffs[m];
} }
if (use_conv_params) { if (conv_params->is_compound) {
CONV_BUF_TYPE *p = CONV_BUF_TYPE *p =
&conv_params &conv_params
->dst[(i - p_row + k + 4) * conv_params->dst_stride + ->dst[(i - p_row + k + 4) * conv_params->dst_stride +
......
...@@ -24,19 +24,14 @@ void av1_highbd_warp_affine_sse4_1(const int32_t *mat, const uint16_t *ref, ...@@ -24,19 +24,14 @@ void av1_highbd_warp_affine_sse4_1(const int32_t *mat, const uint16_t *ref,
int comp_avg = conv_params->do_average; int comp_avg = conv_params->do_average;
__m128i tmp[15]; __m128i tmp[15];
int i, j, k; int i, j, k;
const int use_conv_params = const int reduce_bits_horiz =
(conv_params->round == CONVOLVE_OPT_NO_ROUND && conv_params->dst); conv_params->round_0 +
int reduce_bits_horiz = conv_params->round_0; AOMMAX(bd + FILTER_BITS - conv_params->round_0 - 14, 0);
if (!use_conv_params && bd + FILTER_BITS + 2 - reduce_bits_horiz > 16) const int reduce_bits_vert = conv_params->is_compound
reduce_bits_horiz += bd + FILTER_BITS - reduce_bits_horiz - 14;
const int reduce_bits_vert = use_conv_params
? conv_params->round_1 ? conv_params->round_1
: 2 * FILTER_BITS - reduce_bits_horiz; : 2 * FILTER_BITS - reduce_bits_horiz;
const int offset_bits_horiz = bd + FILTER_BITS - 1; const int offset_bits_horiz = bd + FILTER_BITS - 1;
if (use_conv_params) { assert(IMPLIES(conv_params->is_compound, conv_params->dst != NULL));
conv_params->do_post_rounding = 1;
}
assert(FILTER_BITS == FILTER_BITS);
assert(!(bd == 12 && reduce_bits_horiz < 5)); assert(!(bd == 12 && reduce_bits_horiz < 5));
#if CONFIG_JNT_COMP #if CONFIG_JNT_COMP
...@@ -302,7 +297,7 @@ void av1_highbd_warp_affine_sse4_1(const int32_t *mat, const uint16_t *ref, ...@@ -302,7 +297,7 @@ void av1_highbd_warp_affine_sse4_1(const int32_t *mat, const uint16_t *ref,
__m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd); __m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd);
__m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd); __m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd);
if (use_conv_params) { if (conv_params->is_compound) {
__m128i *const p = __m128i *const p =
(__m128i *)&conv_params (__m128i *)&conv_params
->dst[(i + k + 4) * conv_params->dst_stride + j]; ->dst[(i + k + 4) * conv_params->dst_stride + j];
......
...@@ -212,16 +212,13 @@ void av1_warp_affine_sse4_1(const int32_t *mat, const uint8_t *ref, int width, ...@@ -212,16 +212,13 @@ void av1_warp_affine_sse4_1(const int32_t *mat, const uint8_t *ref, int width,
__m128i tmp[15]; __m128i tmp[15];
int i, j, k; int i, j, k;
const int bd = 8; const int bd = 8;
const int use_conv_params =
(conv_params->round == CONVOLVE_OPT_NO_ROUND && conv_params->dst);
const int reduce_bits_horiz = conv_params->round_0; const int reduce_bits_horiz = conv_params->round_0;
const int reduce_bits_vert = use_conv_params const int reduce_bits_vert = conv_params->is_compound
? conv_params->round_1 ? conv_params->round_1
: 2 * FILTER_BITS - reduce_bits_horiz; : 2 * FILTER_BITS - reduce_bits_horiz;
const int offset_bits_horiz = bd + FILTER_BITS - 1; const int offset_bits_horiz = bd + FILTER_BITS - 1;
if (use_conv_params) { assert(IMPLIES(conv_params->is_compound, conv_params->dst != NULL));
conv_params->do_post_rounding = 1;
}
#if CONFIG_JNT_COMP #if CONFIG_JNT_COMP
const int w0 = conv_params->fwd_offset; const int w0 = conv_params->fwd_offset;
const int w1 = conv_params->bck_offset; const int w1 = conv_params->bck_offset;
...@@ -475,7 +472,7 @@ void av1_warp_affine_sse4_1(const int32_t *mat, const uint8_t *ref, int width, ...@@ -475,7 +472,7 @@ void av1_warp_affine_sse4_1(const int32_t *mat, const uint8_t *ref, int width,
__m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd); __m128i res_lo = _mm_unpacklo_epi32(res_even, res_odd);
__m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd); __m128i res_hi = _mm_unpackhi_epi32(res_even, res_odd);
if (use_conv_params) { if (conv_params->is_compound) {
__m128i *const p = __m128i *const p =
(__m128i *)&conv_params (__m128i *)&conv_params
->dst[(i + k + 4) * conv_params->dst_stride + j]; ->dst[(i + k + 4) * conv_params->dst_stride + j];
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment