Commit 007aa7dd authored by Geza Lore's avatar Geza Lore

Refactoring in preparation for OBMC optimizations.

- Use int32_t instead of int in vpx_obmc{variance,sad} functions
- Remove weigthed_src and obmc mask strides and assume contiguous
  buffers. These inputs can always be packed as contiguous arrays.

Change-Id: I74c09b3fb3337f13d39e13a9cb61e140536f345d
parent f9e38a7b
......@@ -1228,19 +1228,16 @@ MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad4x4)
#define MAKE_OBFP_SAD_WRAPPER(fnname) \
static unsigned int fnname##_bits8(const uint8_t *ref, int ref_stride, \
const int *wsrc, int wsrc_stride, \
const int *msk, int msk_stride) { \
return fnname(ref, ref_stride, wsrc, wsrc_stride, msk, msk_stride); \
const int32_t *wsrc, const int32_t *msk) { \
return fnname(ref, ref_stride, wsrc, msk); \
} \
static unsigned int fnname##_bits10(const uint8_t *ref, int ref_stride, \
const int *wsrc, int wsrc_stride, \
const int *msk, int msk_stride) { \
return fnname(ref, ref_stride, wsrc, wsrc_stride, msk, msk_stride) >> 2; \
const int32_t *wsrc, const int32_t *msk) { \
return fnname(ref, ref_stride, wsrc, msk) >> 2; \
} \
static unsigned int fnname##_bits12(const uint8_t *ref, int ref_stride, \
const int *wsrc, int wsrc_stride, \
const int *msk, int msk_stride) { \
return fnname(ref, ref_stride, wsrc, wsrc_stride, msk, msk_stride) >> 4; \
const int32_t *wsrc, const int32_t *msk) { \
return fnname(ref, ref_stride, wsrc, msk) >> 4; \
}
#if CONFIG_EXT_PARTITION
......
......@@ -3109,8 +3109,7 @@ int vp10_masked_full_pixel_diamond(const VP10_COMP *cpi, MACROBLOCK *x,
#if CONFIG_OBMC
/* returns subpixel variance error function */
#define DIST(r, c) \
vfp->osvf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \
src_stride, mask, mask_stride, &sse)
vfp->osvf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, mask, &sse)
/* checks if (r, c) has better score than previous best */
#define MVC(r, c) \
......@@ -3140,8 +3139,8 @@ int vp10_masked_full_pixel_diamond(const VP10_COMP *cpi, MACROBLOCK *x,
#define CHECK_BETTER1(v, r, c) \
if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
thismse = upsampled_obmc_pref_error(xd, \
mask, mask_stride, \
vfp, z, src_stride, \
mask, \
vfp, z, \
upre(y, y_stride, r, c), \
y_stride, \
w, h, &sse); \
......@@ -3156,14 +3155,12 @@ int vp10_masked_full_pixel_diamond(const VP10_COMP *cpi, MACROBLOCK *x,
v = INT_MAX; \
}
static unsigned int setup_obmc_center_error(const int *mask,
int mask_stride,
static unsigned int setup_obmc_center_error(const int32_t *mask,
const MV *bestmv,
const MV *ref_mv,
int error_per_bit,
const vp10_variance_fn_ptr_t *vfp,
const int *const wsrc,
const int wsrc_stride,
const int32_t *const wsrc,
const uint8_t *const y,
int y_stride,
int offset,
......@@ -3171,18 +3168,16 @@ static unsigned int setup_obmc_center_error(const int *mask,
unsigned int *sse1,
int *distortion) {
unsigned int besterr;
besterr = vfp->ovf(y + offset, y_stride, wsrc, wsrc_stride,
mask, mask_stride, sse1);
besterr = vfp->ovf(y + offset, y_stride, wsrc, mask, sse1);
*distortion = besterr;
besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit);
return besterr;
}
static int upsampled_obmc_pref_error(const MACROBLOCKD *xd,
const int *mask, int mask_stride,
const int32_t *mask,
const vp10_variance_fn_ptr_t *vfp,
const int *const wsrc,
const int wsrc_stride,
const int32_t *const wsrc,
const uint8_t *const y, int y_stride,
int w, int h, unsigned int *sse) {
unsigned int besterr;
......@@ -3191,8 +3186,7 @@ static int upsampled_obmc_pref_error(const MACROBLOCKD *xd,
DECLARE_ALIGNED(16, uint16_t, pred16[MAX_SB_SQUARE]);
vpx_highbd_upsampled_pred(pred16, w, h, y, y_stride);
besterr = vfp->ovf(CONVERT_TO_BYTEPTR(pred16), w, wsrc, wsrc_stride,
mask, mask_stride, sse);
besterr = vfp->ovf(CONVERT_TO_BYTEPTR(pred16), w, wsrc, mask, sse);
} else {
DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]);
#else
......@@ -3201,7 +3195,7 @@ static int upsampled_obmc_pref_error(const MACROBLOCKD *xd,
#endif // CONFIG_VP9_HIGHBITDEPTH
vpx_upsampled_pred(pred, w, h, y, y_stride);
besterr = vfp->ovf(pred, w, wsrc, wsrc_stride, mask, mask_stride, sse);
besterr = vfp->ovf(pred, w, wsrc, mask, sse);
#if CONFIG_VP9_HIGHBITDEPTH
}
#endif
......@@ -3210,15 +3204,14 @@ static int upsampled_obmc_pref_error(const MACROBLOCKD *xd,
static unsigned int upsampled_setup_obmc_center_error(
const MACROBLOCKD *xd,
const int *mask, int mask_stride,
const int32_t *mask,
const MV *bestmv, const MV *ref_mv,
int error_per_bit, const vp10_variance_fn_ptr_t *vfp,
const int *const wsrc, const int wsrc_stride,
const int32_t *const wsrc,
const uint8_t *const y, int y_stride,
int w, int h, int offset, int *mvjcost, int *mvcost[2],
unsigned int *sse1, int *distortion) {
unsigned int besterr = upsampled_obmc_pref_error(xd, mask, mask_stride, vfp,
wsrc, wsrc_stride,
unsigned int besterr = upsampled_obmc_pref_error(xd, mask, vfp, wsrc,
y + offset, y_stride,
w, h, sse1);
*distortion = besterr;
......@@ -3227,8 +3220,8 @@ static unsigned int upsampled_setup_obmc_center_error(
}
int vp10_find_best_obmc_sub_pixel_tree_up(VP10_COMP *cpi, MACROBLOCK *x,
const int *wsrc, int wsrc_stride,
const int *mask, int mask_stride,
const int32_t *wsrc,
const int32_t *mask,
int mi_row, int mi_col,
MV *bestmv, const MV *ref_mv,
int allow_hp, int error_per_bit,
......@@ -3240,7 +3233,6 @@ int vp10_find_best_obmc_sub_pixel_tree_up(VP10_COMP *cpi, MACROBLOCK *x,
int use_upsampled_ref) {
const int *const z = wsrc;
const int *const src_address = z;
const int src_stride = wsrc_stride;
MACROBLOCKD *xd = &x->e_mbd;
struct macroblockd_plane *const pd = &xd->plane[0];
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
......@@ -3292,14 +3284,14 @@ int vp10_find_best_obmc_sub_pixel_tree_up(VP10_COMP *cpi, MACROBLOCK *x,
// use_upsampled_ref can be 0 or 1
if (use_upsampled_ref)
besterr = upsampled_setup_obmc_center_error(
xd, mask, mask_stride, bestmv, ref_mv, error_per_bit,
vfp, z, src_stride, y, y_stride,
xd, mask, bestmv, ref_mv, error_per_bit,
vfp, z, y, y_stride,
w, h, (offset << 3),
mvjcost, mvcost, sse1, distortion);
else
besterr = setup_obmc_center_error(
mask, mask_stride, bestmv, ref_mv, error_per_bit,
vfp, z, src_stride, y, y_stride,
mask, bestmv, ref_mv, error_per_bit,
vfp, z, y, y_stride,
offset, mvjcost, mvcost, sse1, distortion);
for (iter = 0; iter < round; ++iter) {
......@@ -3313,16 +3305,15 @@ int vp10_find_best_obmc_sub_pixel_tree_up(VP10_COMP *cpi, MACROBLOCK *x,
if (use_upsampled_ref) {
const uint8_t *const pre_address = y + tr * y_stride + tc;
thismse = upsampled_obmc_pref_error(xd, mask, mask_stride,
vfp, src_address, src_stride,
thismse = upsampled_obmc_pref_error(xd, mask,
vfp, src_address,
pre_address, y_stride,
w, h, &sse);
} else {
const uint8_t *const pre_address = y + (tr >> 3) * y_stride +
(tc >> 3);
thismse = vfp->osvf(pre_address, y_stride, sp(tc), sp(tr),
src_address, src_stride,
mask, mask_stride, &sse);
src_address, mask, &sse);
}
cost_array[idx] = thismse +
......@@ -3350,15 +3341,14 @@ int vp10_find_best_obmc_sub_pixel_tree_up(VP10_COMP *cpi, MACROBLOCK *x,
if (use_upsampled_ref) {
const uint8_t *const pre_address = y + tr * y_stride + tc;
thismse = upsampled_obmc_pref_error(xd, mask, mask_stride,
vfp, src_address, src_stride,
thismse = upsampled_obmc_pref_error(xd, mask, vfp, src_address,
pre_address, y_stride,
w, h, &sse);
} else {
const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
thismse = vfp->osvf(pre_address, y_stride, sp(tc), sp(tr),
src_address, src_stride, mask, mask_stride, &sse);
src_address, mask, &sse);
}
cost_array[4] = thismse +
......@@ -3422,8 +3412,8 @@ int vp10_find_best_obmc_sub_pixel_tree_up(VP10_COMP *cpi, MACROBLOCK *x,
#undef CHECK_BETTER
static int get_obmc_mvpred_var(const MACROBLOCK *x,
const int *wsrc, int wsrc_stride,
const int *mask, int mask_stride,
const int32_t *wsrc,
const int32_t *mask,
const MV *best_mv, const MV *center_mv,
const vp10_variance_fn_ptr_t *vfp,
int use_mvcost, int is_second) {
......@@ -3433,14 +3423,14 @@ static int get_obmc_mvpred_var(const MACROBLOCK *x,
unsigned int unused;
return vfp->ovf(get_buf_from_mv(in_what, best_mv), in_what->stride,
wsrc, wsrc_stride, mask, mask_stride, &unused) +
wsrc, mask, &unused) +
(use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost,
x->mvcost, x->errorperbit) : 0);
}
int obmc_refining_search_sad(const MACROBLOCK *x,
const int *wsrc, int wsrc_stride,
const int *mask, int mask_stride,
const int32_t *wsrc,
const int32_t *mask,
MV *ref_mv, int error_per_bit,
int search_range,
const vp10_variance_fn_ptr_t *fn_ptr,
......@@ -3450,8 +3440,7 @@ int obmc_refining_search_sad(const MACROBLOCK *x,
const struct buf_2d *const in_what = &xd->plane[0].pre[is_second];
const MV fcenter_mv = {center_mv->row >> 3, center_mv->col >> 3};
unsigned int best_sad = fn_ptr->osdf(get_buf_from_mv(in_what, ref_mv),
in_what->stride,
wsrc, wsrc_stride, mask, mask_stride) +
in_what->stride, wsrc, mask) +
mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit);
int i, j;
......@@ -3463,8 +3452,7 @@ int obmc_refining_search_sad(const MACROBLOCK *x,
ref_mv->col + neighbors[j].col};
if (is_mv_in(x, &mv)) {
unsigned int sad = fn_ptr->osdf(get_buf_from_mv(in_what, &mv),
in_what->stride, wsrc, wsrc_stride,
mask, mask_stride);
in_what->stride, wsrc, mask);
if (sad < best_sad) {
sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit);
if (sad < best_sad) {
......@@ -3487,8 +3475,7 @@ int obmc_refining_search_sad(const MACROBLOCK *x,
int obmc_diamond_search_sad(const MACROBLOCK *x,
const search_site_config *cfg,
const int *wsrc, int wsrc_stride,
const int *mask, int mask_stride,
const int32_t *wsrc, const int32_t *mask,
MV *ref_mv, MV *best_mv,
int search_param,
int sad_per_bit, int *num00,
......@@ -3516,8 +3503,7 @@ int obmc_diamond_search_sad(const MACROBLOCK *x,
*best_mv = *ref_mv;
// Check the starting position
best_sad = fn_ptr->osdf(best_address, in_what->stride,
wsrc, wsrc_stride, mask, mask_stride) +
best_sad = fn_ptr->osdf(best_address, in_what->stride, wsrc, mask) +
mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit);
i = 1;
......@@ -3528,7 +3514,7 @@ int obmc_diamond_search_sad(const MACROBLOCK *x,
best_mv->col + ss[i].mv.col};
if (is_mv_in(x, &mv)) {
int sad = fn_ptr->osdf(best_address + ss[i].offset, in_what->stride,
wsrc, wsrc_stride, mask, mask_stride);
wsrc, mask);
if (sad < best_sad) {
sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit);
if (sad < best_sad) {
......@@ -3552,8 +3538,7 @@ int obmc_diamond_search_sad(const MACROBLOCK *x,
best_mv->col + ss[best_site].mv.col};
if (is_mv_in(x, &this_mv)) {
int sad = fn_ptr->osdf(best_address + ss[best_site].offset,
in_what->stride, wsrc, wsrc_stride,
mask, mask_stride);
in_what->stride, wsrc, mask);
if (sad < best_sad) {
sad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
if (sad < best_sad) {
......@@ -3576,8 +3561,7 @@ int obmc_diamond_search_sad(const MACROBLOCK *x,
}
int vp10_obmc_full_pixel_diamond(const VP10_COMP *cpi, MACROBLOCK *x,
const int *wsrc, int wsrc_stride,
const int *mask, int mask_stride,
const int32_t *wsrc, const int32_t *mask,
MV *mvp_full, int step_param,
int sadpb, int further_steps, int do_refine,
const vp10_variance_fn_ptr_t *fn_ptr,
......@@ -3586,13 +3570,12 @@ int vp10_obmc_full_pixel_diamond(const VP10_COMP *cpi, MACROBLOCK *x,
MV temp_mv;
int thissme, n, num00 = 0;
int bestsme = obmc_diamond_search_sad(x, &cpi->ss_cfg,
wsrc, wsrc_stride,
mask, mask_stride,
wsrc, mask,
mvp_full, &temp_mv,
step_param, sadpb, &n,
fn_ptr, ref_mv, is_second);
if (bestsme < INT_MAX)
bestsme = get_obmc_mvpred_var(x, wsrc, wsrc_stride, mask, mask_stride,
bestsme = get_obmc_mvpred_var(x, wsrc, mask,
&temp_mv, ref_mv, fn_ptr, 1, is_second);
*dst_mv = temp_mv;
......@@ -3608,13 +3591,12 @@ int vp10_obmc_full_pixel_diamond(const VP10_COMP *cpi, MACROBLOCK *x,
num00--;
} else {
thissme = obmc_diamond_search_sad(x, &cpi->ss_cfg,
wsrc, wsrc_stride,
mask, mask_stride,
wsrc, mask,
mvp_full, &temp_mv,
step_param + n, sadpb, &num00,
fn_ptr, ref_mv, is_second);
if (thissme < INT_MAX)
thissme = get_obmc_mvpred_var(x, wsrc, wsrc_stride, mask, mask_stride,
thissme = get_obmc_mvpred_var(x, wsrc, mask,
&temp_mv, ref_mv, fn_ptr, 1, is_second);
// check to see if refining search is needed.
......@@ -3632,11 +3614,11 @@ int vp10_obmc_full_pixel_diamond(const VP10_COMP *cpi, MACROBLOCK *x,
if (do_refine) {
const int search_range = 8;
MV best_mv = *dst_mv;
thissme = obmc_refining_search_sad(x, wsrc, wsrc_stride, mask, mask_stride,
thissme = obmc_refining_search_sad(x, wsrc, mask,
&best_mv, sadpb, search_range,
fn_ptr, ref_mv, is_second);
if (thissme < INT_MAX)
thissme = get_obmc_mvpred_var(x, wsrc, wsrc_stride, mask, mask_stride,
thissme = get_obmc_mvpred_var(x, wsrc, mask,
&best_mv, ref_mv, fn_ptr, 1, is_second);
if (thissme < bestsme) {
bestsme = thissme;
......
......@@ -198,16 +198,16 @@ int vp10_masked_full_pixel_diamond(const struct VP10_COMP *cpi, MACROBLOCK *x,
#if CONFIG_OBMC
int vp10_obmc_full_pixel_diamond(const struct VP10_COMP *cpi, MACROBLOCK *x,
const int *wsrc, int wsrc_stride,
const int *mask, int mask_stride,
const int32_t *wsrc,
const int32_t *mask,
MV *mvp_full, int step_param,
int sadpb, int further_steps, int do_refine,
const vp10_variance_fn_ptr_t *fn_ptr,
const MV *ref_mv, MV *dst_mv,
int is_second);
int vp10_find_best_obmc_sub_pixel_tree_up(struct VP10_COMP *cpi, MACROBLOCK *x,
const int *wsrc, int wsrc_stride,
const int *mask, int mask_stride,
const int32_t *wsrc,
const int32_t *mask,
int mi_row, int mi_col,
MV *bestmv, const MV *ref_mv,
int allow_hp, int error_per_bit,
......
......@@ -6073,8 +6073,7 @@ static INLINE void restore_dst_buf(MACROBLOCKD *xd,
#if CONFIG_OBMC
static void single_motion_search_obmc(VP10_COMP *cpi, MACROBLOCK *x,
BLOCK_SIZE bsize, int mi_row, int mi_col,
const int* wsrc, int wsrc_stride,
const int* mask, int mask_stride,
const int32_t* wsrc, const int32_t* mask,
#if CONFIG_EXT_INTER
int ref_idx,
int mv_idx,
......@@ -6173,8 +6172,7 @@ static void single_motion_search_obmc(VP10_COMP *cpi, MACROBLOCK *x,
mvp_full.col >>= 3;
mvp_full.row >>= 3;
bestsme = vp10_obmc_full_pixel_diamond(cpi, x, wsrc, wsrc_stride,
mask, mask_stride,
bestsme = vp10_obmc_full_pixel_diamond(cpi, x, wsrc, mask,
&mvp_full, step_param, sadpb,
MAX_MVSEARCH_STEPS - 1 - step_param,
1, &cpi->fn_ptr[bsize],
......@@ -6188,8 +6186,7 @@ static void single_motion_search_obmc(VP10_COMP *cpi, MACROBLOCK *x,
if (bestsme < INT_MAX) {
int dis;
vp10_find_best_obmc_sub_pixel_tree_up(cpi, x,
wsrc, wsrc_stride,
mask, mask_stride,
wsrc, mask,
mi_row, mi_col,
&tmp_mv->as_mv, &ref_mv,
cm->allow_high_precision_mv,
......@@ -6796,8 +6793,8 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
#if CONFIG_OBMC
uint8_t *dst_buf1[3], int dst_stride1[3],
uint8_t *dst_buf2[3], int dst_stride2[3],
int *wsrc, int wsrc_strides,
int *mask2d, int mask2d_strides,
const int32_t *const wsrc,
const int32_t *const mask2d,
#endif // CONFIG_OBMC
#if CONFIG_EXT_INTER
int_mv single_newmvs[2][MAX_REF_FRAMES],
......@@ -7739,8 +7736,7 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
pred_mv.as_int = mbmi->mv[0].as_int;
single_motion_search_obmc(cpi, x, bsize, mi_row, mi_col,
wsrc, wsrc_strides,
mask2d, mask2d_strides,
wsrc, mask2d,
#if CONFIG_EXT_INTER
0, mv_idx,
#endif // CONFIG_EXT_INTER
......@@ -8494,13 +8490,11 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * MAX_SB_SQUARE]);
DECLARE_ALIGNED(16, uint8_t, tmp_buf2[MAX_MB_PLANE * MAX_SB_SQUARE]);
#endif // CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED(16, int, weighted_src_buf[MAX_SB_SQUARE]);
DECLARE_ALIGNED(16, int, mask2d_buf[MAX_SB_SQUARE]);
DECLARE_ALIGNED(16, int32_t, weighted_src_buf[MAX_SB_SQUARE]);
DECLARE_ALIGNED(16, int32_t, mask2d_buf[MAX_SB_SQUARE]);
uint8_t *dst_buf1[MAX_MB_PLANE], *dst_buf2[MAX_MB_PLANE];
int dst_stride1[MAX_MB_PLANE] = {MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE};
int dst_stride2[MAX_MB_PLANE] = {MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE};
int weighted_src_stride = MAX_SB_SIZE;
int mask2d_stride = MAX_SB_SIZE;
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
......@@ -8605,8 +8599,7 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
calc_target_weighted_pred(cm, x, xd, mi_row, mi_col,
dst_buf1[0], dst_stride1[0],
dst_buf2[0], dst_stride2[0],
mask2d_buf, mask2d_stride,
weighted_src_buf, weighted_src_stride);
mask2d_buf, weighted_src_buf);
#endif // CONFIG_OBMC
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
......@@ -9143,8 +9136,8 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
#if CONFIG_OBMC
dst_buf1, dst_stride1,
dst_buf2, dst_stride2,
weighted_src_buf, weighted_src_stride,
mask2d_buf, mask2d_stride,
weighted_src_buf,
mask2d_buf,
#endif // CONFIG_OBMC
#if CONFIG_EXT_INTER
single_newmvs,
......@@ -9258,8 +9251,7 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
dst_buf1, dst_stride1,
dst_buf2, dst_stride2,
weighted_src_buf,
weighted_src_stride,
mask2d_buf, mask2d_stride,
mask2d_buf,
#endif // CONFIG_OBMC
#if CONFIG_EXT_INTER
dummy_single_newmvs,
......@@ -10970,14 +10962,16 @@ void calc_target_weighted_pred(VP10_COMMON *cm,
int mi_row, int mi_col,
uint8_t *above_buf, int above_stride,
uint8_t *left_buf, int left_stride,
int *mask_buf, int mask_stride,
int *weighted_src_buf, int weighted_src_stride) {
int32_t *mask_buf,
int32_t *weighted_src_buf) {
BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
int row, col, i, mi_step;
int bw = 8 * xd->n8_w;
int bh = 8 * xd->n8_h;
int *dst = weighted_src_buf;
int *mask2d = mask_buf;
const int mask_stride = bw;
const int weighted_src_stride = bw;
int32_t *dst = weighted_src_buf;
int32_t *mask2d = mask_buf;
uint8_t *src;
#if CONFIG_VP9_HIGHBITDEPTH
int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
......@@ -11009,11 +11003,11 @@ void calc_target_weighted_pred(VP10_COMMON *cm,
int bw = (mi_step * MI_SIZE) >> pd->subsampling_x;
int bh = overlap >> pd->subsampling_y;
int dst_stride = weighted_src_stride;
int *dst = weighted_src_buf + (i * MI_SIZE >> pd->subsampling_x);
int32_t *dst = weighted_src_buf + (i * MI_SIZE >> pd->subsampling_x);
int tmp_stride = above_stride;
uint8_t *tmp = above_buf + (i * MI_SIZE >> pd->subsampling_x);
int mask2d_stride = mask_stride;
int *mask2d = mask_buf + (i * MI_SIZE >> pd->subsampling_x);
int32_t *mask2d = mask_buf + (i * MI_SIZE >> pd->subsampling_x);
const uint8_t *mask1d[2];
setup_obmc_mask(bh, mask1d);
......@@ -11078,14 +11072,14 @@ void calc_target_weighted_pred(VP10_COMMON *cm,
int bw = overlap >> pd->subsampling_x;
int bh = (mi_step * MI_SIZE) >> pd->subsampling_y;
int dst_stride = weighted_src_stride;
int *dst = weighted_src_buf +
int32_t *dst = weighted_src_buf +
(i * MI_SIZE * dst_stride >> pd->subsampling_y);
int tmp_stride = left_stride;
uint8_t *tmp = left_buf +
(i * MI_SIZE * tmp_stride >> pd->subsampling_y);
int mask2d_stride = mask_stride;
int *mask2d = mask_buf +
(i * MI_SIZE * mask2d_stride >> pd->subsampling_y);
int32_t *mask2d = mask_buf +
(i * MI_SIZE * mask2d_stride >> pd->subsampling_y);
const uint8_t *mask1d[2];
setup_obmc_mask(bw, mask1d);
......
......@@ -97,8 +97,7 @@ void calc_target_weighted_pred(VP10_COMMON *cm,
int mi_row, int mi_col,
uint8_t *above_buf, int above_stride,
uint8_t *left_buf, int left_stride,
int *mask_buf, int mask_stride,
int *weighted_src_buf, int weighted_src_stride);
int32_t *mask_buf, int32_t *weighted_src_buf);
#endif // CONFIG_OBMC
#ifdef __cplusplus
......
......@@ -456,21 +456,19 @@ HIGHBD_MASKSADMXN(4, 4)
// b: target weighted prediction (has been *4096 to keep precision)
// m: 2d weights (scaled by 4096)
static INLINE unsigned int obmc_sad(const uint8_t *a, int a_stride,
const int *b, int b_stride,
const int *m, int m_stride,
const int32_t *b,
const int32_t *m,
int width, int height) {
int y, x;
unsigned int sad = 0;
for (y = 0; y < height; y++) {
for (x = 0; x < width; x++) {
int abs_diff = abs(b[x] - a[x] * m[x]);
sad += (abs_diff + 2048) >> 12;
}
for (x = 0; x < width; x++)
sad += ROUND_POWER_OF_TWO(abs(b[x] - a[x] * m[x]), 12);
a += a_stride;
b += b_stride;
m += m_stride;
b += width;
m += width;
}
return sad;
......@@ -478,9 +476,9 @@ static INLINE unsigned int obmc_sad(const uint8_t *a, int a_stride,
#define OBMCSADMxN(m, n) \
unsigned int vpx_obmc_sad##m##x##n##_c(const uint8_t *ref, int ref_stride, \
const int *wsrc, int wsrc_stride, \
const int *msk, int msk_stride) { \
return obmc_sad(ref, ref_stride, wsrc, wsrc_stride, msk, msk_stride, m, n); \
const int32_t *wsrc, \
const int32_t *msk) { \
return obmc_sad(ref, ref_stride, wsrc, msk, m, n); \
}
#if CONFIG_EXT_PARTITION
......@@ -504,22 +502,20 @@ OBMCSADMxN(4, 4)
#if CONFIG_VP9_HIGHBITDEPTH
static INLINE unsigned int highbd_obmc_sad(const uint8_t *a8, int a_stride,
const int *b, int b_stride,
const int *m, int m_stride,
const int32_t *b,
const int32_t *m,
int width, int height) {
int y, x;
unsigned int sad = 0;
const uint16_t *a = CONVERT_TO_SHORTPTR(a8);
for (y = 0; y < height; y++) {
for (x = 0; x < width; x++) {
int abs_diff = abs(b[x] - a[x] * m[x]);
sad += (abs_diff + 2048) >> 12;
}
for (x = 0; x < width; x++)
sad += ROUND_POWER_OF_TWO(abs(b[x] - a[x] * m[x]), 12);
a += a_stride;
b += b_stride;
m += m_stride;
b += width;
m += width;
}
return sad;
......@@ -528,12 +524,9 @@ static INLINE unsigned int highbd_obmc_sad(const uint8_t *a8, int a_stride,
#define HIGHBD_OBMCSADMXN(m, n) \
unsigned int vpx_highbd_obmc_sad##m##x##n##_c(const uint8_t *ref, \
int ref_stride, \
const int *wsrc, \
int wsrc_stride, \
const int *msk, \
int msk_stride) { \
return highbd_obmc_sad(ref, ref_stride, wsrc, wsrc_stride, \
msk, msk_stride, m, n); \
const int32_t *wsrc, \
const int32_t *msk) { \
return highbd_obmc_sad(ref, ref_stride, wsrc, msk, m, n); \
}
#if CONFIG_EXT_PARTITION
......
......@@ -1026,8 +1026,8 @@ HIGHBD_MASK_SUBPIX_VAR(128, 128)
#if CONFIG_VP10 && CONFIG_OBMC
void obmc_variance(const uint8_t *a, int a_stride,
const int *b, int b_stride,
const int *m, int m_stride,
const int32_t *b,
const int32_t *m,
int w, int h, unsigned int *sse, int *sum) {
int i, j;
......@@ -1036,26 +1036,24 @@ void obmc_variance(const uint8_t *a, int a_stride,
for (i = 0; i < h; i++) {
for (j = 0; j < w; j++) {
int scaled_diff = b[j] - a[j] * m[j];
int abs_diff = (abs(scaled_diff) + 2048) >> 12;
int diff = (scaled_diff >= 0) ? abs_diff : -abs_diff;
int diff = ROUND_POWER_OF_TWO_SIGNED(b[j] - a[j] * m[j], 12);
*sum += diff;
*sse += diff * diff;
}
a += a_stride;
b += b_stride;
m += m_stride;
b += w;
m += w;
}
}
#define OBMC_VAR(W, H) \
unsigned int vpx_obmc_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
const int *b, int b_stride, \
const int *m, int m_stride, \
const int32_t *b, \
const int32_t *m, \
unsigned int *sse) { \
int sum; \