Commit 007aa7dd authored by Geza Lore's avatar Geza Lore

Refactoring in preparation for OBMC optimizations.

- Use int32_t instead of int in vpx_obmc{variance,sad} functions
- Remove weigthed_src and obmc mask strides and assume contiguous
  buffers. These inputs can always be packed as contiguous arrays.

Change-Id: I74c09b3fb3337f13d39e13a9cb61e140536f345d
parent f9e38a7b
......@@ -1228,19 +1228,16 @@ MAKE_MBFP_SAD_WRAPPER(vpx_highbd_masked_sad4x4)
#define MAKE_OBFP_SAD_WRAPPER(fnname) \
static unsigned int fnname##_bits8(const uint8_t *ref, int ref_stride, \
const int *wsrc, int wsrc_stride, \
const int *msk, int msk_stride) { \
return fnname(ref, ref_stride, wsrc, wsrc_stride, msk, msk_stride); \
const int32_t *wsrc, const int32_t *msk) { \
return fnname(ref, ref_stride, wsrc, msk); \
} \
static unsigned int fnname##_bits10(const uint8_t *ref, int ref_stride, \
const int *wsrc, int wsrc_stride, \
const int *msk, int msk_stride) { \
return fnname(ref, ref_stride, wsrc, wsrc_stride, msk, msk_stride) >> 2; \
const int32_t *wsrc, const int32_t *msk) { \
return fnname(ref, ref_stride, wsrc, msk) >> 2; \
} \
static unsigned int fnname##_bits12(const uint8_t *ref, int ref_stride, \
const int *wsrc, int wsrc_stride, \
const int *msk, int msk_stride) { \
return fnname(ref, ref_stride, wsrc, wsrc_stride, msk, msk_stride) >> 4; \
const int32_t *wsrc, const int32_t *msk) { \
return fnname(ref, ref_stride, wsrc, msk) >> 4; \
}
#if CONFIG_EXT_PARTITION
......
This diff is collapsed.
......@@ -198,16 +198,16 @@ int vp10_masked_full_pixel_diamond(const struct VP10_COMP *cpi, MACROBLOCK *x,
#if CONFIG_OBMC
int vp10_obmc_full_pixel_diamond(const struct VP10_COMP *cpi, MACROBLOCK *x,
const int *wsrc, int wsrc_stride,
const int *mask, int mask_stride,
const int32_t *wsrc,
const int32_t *mask,
MV *mvp_full, int step_param,
int sadpb, int further_steps, int do_refine,
const vp10_variance_fn_ptr_t *fn_ptr,
const MV *ref_mv, MV *dst_mv,
int is_second);
int vp10_find_best_obmc_sub_pixel_tree_up(struct VP10_COMP *cpi, MACROBLOCK *x,
const int *wsrc, int wsrc_stride,
const int *mask, int mask_stride,
const int32_t *wsrc,
const int32_t *mask,
int mi_row, int mi_col,
MV *bestmv, const MV *ref_mv,
int allow_hp, int error_per_bit,
......
......@@ -6073,8 +6073,7 @@ static INLINE void restore_dst_buf(MACROBLOCKD *xd,
#if CONFIG_OBMC
static void single_motion_search_obmc(VP10_COMP *cpi, MACROBLOCK *x,
BLOCK_SIZE bsize, int mi_row, int mi_col,
const int* wsrc, int wsrc_stride,
const int* mask, int mask_stride,
const int32_t* wsrc, const int32_t* mask,
#if CONFIG_EXT_INTER
int ref_idx,
int mv_idx,
......@@ -6173,8 +6172,7 @@ static void single_motion_search_obmc(VP10_COMP *cpi, MACROBLOCK *x,
mvp_full.col >>= 3;
mvp_full.row >>= 3;
bestsme = vp10_obmc_full_pixel_diamond(cpi, x, wsrc, wsrc_stride,
mask, mask_stride,
bestsme = vp10_obmc_full_pixel_diamond(cpi, x, wsrc, mask,
&mvp_full, step_param, sadpb,
MAX_MVSEARCH_STEPS - 1 - step_param,
1, &cpi->fn_ptr[bsize],
......@@ -6188,8 +6186,7 @@ static void single_motion_search_obmc(VP10_COMP *cpi, MACROBLOCK *x,
if (bestsme < INT_MAX) {
int dis;
vp10_find_best_obmc_sub_pixel_tree_up(cpi, x,
wsrc, wsrc_stride,
mask, mask_stride,
wsrc, mask,
mi_row, mi_col,
&tmp_mv->as_mv, &ref_mv,
cm->allow_high_precision_mv,
......@@ -6796,8 +6793,8 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
#if CONFIG_OBMC
uint8_t *dst_buf1[3], int dst_stride1[3],
uint8_t *dst_buf2[3], int dst_stride2[3],
int *wsrc, int wsrc_strides,
int *mask2d, int mask2d_strides,
const int32_t *const wsrc,
const int32_t *const mask2d,
#endif // CONFIG_OBMC
#if CONFIG_EXT_INTER
int_mv single_newmvs[2][MAX_REF_FRAMES],
......@@ -7739,8 +7736,7 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
pred_mv.as_int = mbmi->mv[0].as_int;
single_motion_search_obmc(cpi, x, bsize, mi_row, mi_col,
wsrc, wsrc_strides,
mask2d, mask2d_strides,
wsrc, mask2d,
#if CONFIG_EXT_INTER
0, mv_idx,
#endif // CONFIG_EXT_INTER
......@@ -8494,13 +8490,11 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * MAX_SB_SQUARE]);
DECLARE_ALIGNED(16, uint8_t, tmp_buf2[MAX_MB_PLANE * MAX_SB_SQUARE]);
#endif // CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED(16, int, weighted_src_buf[MAX_SB_SQUARE]);
DECLARE_ALIGNED(16, int, mask2d_buf[MAX_SB_SQUARE]);
DECLARE_ALIGNED(16, int32_t, weighted_src_buf[MAX_SB_SQUARE]);
DECLARE_ALIGNED(16, int32_t, mask2d_buf[MAX_SB_SQUARE]);
uint8_t *dst_buf1[MAX_MB_PLANE], *dst_buf2[MAX_MB_PLANE];
int dst_stride1[MAX_MB_PLANE] = {MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE};
int dst_stride2[MAX_MB_PLANE] = {MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE};
int weighted_src_stride = MAX_SB_SIZE;
int mask2d_stride = MAX_SB_SIZE;
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
......@@ -8605,8 +8599,7 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
calc_target_weighted_pred(cm, x, xd, mi_row, mi_col,
dst_buf1[0], dst_stride1[0],
dst_buf2[0], dst_stride2[0],
mask2d_buf, mask2d_stride,
weighted_src_buf, weighted_src_stride);
mask2d_buf, weighted_src_buf);
#endif // CONFIG_OBMC
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
......@@ -9143,8 +9136,8 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
#if CONFIG_OBMC
dst_buf1, dst_stride1,
dst_buf2, dst_stride2,
weighted_src_buf, weighted_src_stride,
mask2d_buf, mask2d_stride,
weighted_src_buf,
mask2d_buf,
#endif // CONFIG_OBMC
#if CONFIG_EXT_INTER
single_newmvs,
......@@ -9258,8 +9251,7 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
dst_buf1, dst_stride1,
dst_buf2, dst_stride2,
weighted_src_buf,
weighted_src_stride,
mask2d_buf, mask2d_stride,
mask2d_buf,
#endif // CONFIG_OBMC
#if CONFIG_EXT_INTER
dummy_single_newmvs,
......@@ -10970,14 +10962,16 @@ void calc_target_weighted_pred(VP10_COMMON *cm,
int mi_row, int mi_col,
uint8_t *above_buf, int above_stride,
uint8_t *left_buf, int left_stride,
int *mask_buf, int mask_stride,
int *weighted_src_buf, int weighted_src_stride) {
int32_t *mask_buf,
int32_t *weighted_src_buf) {
BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
int row, col, i, mi_step;
int bw = 8 * xd->n8_w;
int bh = 8 * xd->n8_h;
int *dst = weighted_src_buf;
int *mask2d = mask_buf;
const int mask_stride = bw;
const int weighted_src_stride = bw;
int32_t *dst = weighted_src_buf;
int32_t *mask2d = mask_buf;
uint8_t *src;
#if CONFIG_VP9_HIGHBITDEPTH
int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
......@@ -11009,11 +11003,11 @@ void calc_target_weighted_pred(VP10_COMMON *cm,
int bw = (mi_step * MI_SIZE) >> pd->subsampling_x;
int bh = overlap >> pd->subsampling_y;
int dst_stride = weighted_src_stride;
int *dst = weighted_src_buf + (i * MI_SIZE >> pd->subsampling_x);
int32_t *dst = weighted_src_buf + (i * MI_SIZE >> pd->subsampling_x);
int tmp_stride = above_stride;
uint8_t *tmp = above_buf + (i * MI_SIZE >> pd->subsampling_x);
int mask2d_stride = mask_stride;
int *mask2d = mask_buf + (i * MI_SIZE >> pd->subsampling_x);
int32_t *mask2d = mask_buf + (i * MI_SIZE >> pd->subsampling_x);
const uint8_t *mask1d[2];
setup_obmc_mask(bh, mask1d);
......@@ -11078,14 +11072,14 @@ void calc_target_weighted_pred(VP10_COMMON *cm,
int bw = overlap >> pd->subsampling_x;
int bh = (mi_step * MI_SIZE) >> pd->subsampling_y;
int dst_stride = weighted_src_stride;
int *dst = weighted_src_buf +
int32_t *dst = weighted_src_buf +
(i * MI_SIZE * dst_stride >> pd->subsampling_y);
int tmp_stride = left_stride;
uint8_t *tmp = left_buf +
(i * MI_SIZE * tmp_stride >> pd->subsampling_y);
int mask2d_stride = mask_stride;
int *mask2d = mask_buf +
(i * MI_SIZE * mask2d_stride >> pd->subsampling_y);
int32_t *mask2d = mask_buf +
(i * MI_SIZE * mask2d_stride >> pd->subsampling_y);
const uint8_t *mask1d[2];
setup_obmc_mask(bw, mask1d);
......
......@@ -97,8 +97,7 @@ void calc_target_weighted_pred(VP10_COMMON *cm,
int mi_row, int mi_col,
uint8_t *above_buf, int above_stride,
uint8_t *left_buf, int left_stride,
int *mask_buf, int mask_stride,
int *weighted_src_buf, int weighted_src_stride);
int32_t *mask_buf, int32_t *weighted_src_buf);
#endif // CONFIG_OBMC
#ifdef __cplusplus
......
......@@ -456,21 +456,19 @@ HIGHBD_MASKSADMXN(4, 4)
// b: target weighted prediction (has been *4096 to keep precision)
// m: 2d weights (scaled by 4096)
static INLINE unsigned int obmc_sad(const uint8_t *a, int a_stride,
const int *b, int b_stride,
const int *m, int m_stride,
const int32_t *b,
const int32_t *m,
int width, int height) {
int y, x;
unsigned int sad = 0;
for (y = 0; y < height; y++) {
for (x = 0; x < width; x++) {
int abs_diff = abs(b[x] - a[x] * m[x]);
sad += (abs_diff + 2048) >> 12;
}
for (x = 0; x < width; x++)
sad += ROUND_POWER_OF_TWO(abs(b[x] - a[x] * m[x]), 12);
a += a_stride;
b += b_stride;
m += m_stride;
b += width;
m += width;
}
return sad;
......@@ -478,9 +476,9 @@ static INLINE unsigned int obmc_sad(const uint8_t *a, int a_stride,
#define OBMCSADMxN(m, n) \
unsigned int vpx_obmc_sad##m##x##n##_c(const uint8_t *ref, int ref_stride, \
const int *wsrc, int wsrc_stride, \
const int *msk, int msk_stride) { \
return obmc_sad(ref, ref_stride, wsrc, wsrc_stride, msk, msk_stride, m, n); \
const int32_t *wsrc, \
const int32_t *msk) { \
return obmc_sad(ref, ref_stride, wsrc, msk, m, n); \
}
#if CONFIG_EXT_PARTITION
......@@ -504,22 +502,20 @@ OBMCSADMxN(4, 4)
#if CONFIG_VP9_HIGHBITDEPTH
static INLINE unsigned int highbd_obmc_sad(const uint8_t *a8, int a_stride,
const int *b, int b_stride,
const int *m, int m_stride,
const int32_t *b,
const int32_t *m,
int width, int height) {
int y, x;
unsigned int sad = 0;
const uint16_t *a = CONVERT_TO_SHORTPTR(a8);
for (y = 0; y < height; y++) {
for (x = 0; x < width; x++) {
int abs_diff = abs(b[x] - a[x] * m[x]);
sad += (abs_diff + 2048) >> 12;
}
for (x = 0; x < width; x++)
sad += ROUND_POWER_OF_TWO(abs(b[x] - a[x] * m[x]), 12);
a += a_stride;
b += b_stride;
m += m_stride;
b += width;
m += width;
}
return sad;
......@@ -528,12 +524,9 @@ static INLINE unsigned int highbd_obmc_sad(const uint8_t *a8, int a_stride,
#define HIGHBD_OBMCSADMXN(m, n) \
unsigned int vpx_highbd_obmc_sad##m##x##n##_c(const uint8_t *ref, \
int ref_stride, \
const int *wsrc, \
int wsrc_stride, \
const int *msk, \
int msk_stride) { \
return highbd_obmc_sad(ref, ref_stride, wsrc, wsrc_stride, \
msk, msk_stride, m, n); \
const int32_t *wsrc, \
const int32_t *msk) { \
return highbd_obmc_sad(ref, ref_stride, wsrc, msk, m, n); \
}
#if CONFIG_EXT_PARTITION
......
This diff is collapsed.
......@@ -101,24 +101,18 @@ typedef unsigned int (*vpx_masked_subpixvariance_fn_t)(const uint8_t *src,
#if CONFIG_VP10 && CONFIG_OBMC
typedef unsigned int(*vpx_obmc_sad_fn_t)(const uint8_t *pred,
int pred_stride,
const int *wsrc,
int wsrc_stride,
const int *msk,
int msk_stride);
const int32_t *wsrc,
const int32_t *msk);
typedef unsigned int (*vpx_obmc_variance_fn_t)(const uint8_t *pred,
int pred_stride,
const int *wsrc,
int wsrc_stride,
const int *msk,
int msk_stride,
const int32_t *wsrc,
const int32_t *msk,
unsigned int *sse);
typedef unsigned int (*vpx_obmc_subpixvariance_fn_t)(const uint8_t *pred,
int pred_stride,
int xoffset, int yoffset,
const int *wsrc,
int wsrc_stride,
const int *msk,
int msk_stride,
const int32_t *wsrc,
const int32_t *msk,
unsigned int *sse);
#endif // CONFIG_VP10 && CONFIG_OBMC
......
......@@ -1103,14 +1103,14 @@ if (vpx_config("CONFIG_EXT_INTER") eq "yes") {
if (vpx_config("CONFIG_OBMC") eq "yes") {
foreach (@block_sizes) {
($w, $h) = @$_;
add_proto qw/unsigned int/, "vpx_obmc_sad${w}x${h}", "const uint8_t *ref_ptr, int ref_stride, const int *wsrc_ptr, int wsrc_stride, const int *mask, int mask_stride";
add_proto qw/unsigned int/, "vpx_obmc_sad${w}x${h}", "const uint8_t *ref_ptr, int ref_stride, const int32_t *wsrc_ptr, const int32_t *mask";
specialize "vpx_obmc_sad${w}x${h}";
}
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
foreach (@block_sizes) {
($w, $h) = @$_;
add_proto qw/unsigned int/, "vpx_highbd_obmc_sad${w}x${h}", "const uint8_t *ref_ptr, int ref_stride, const int *wsrc_ptr, int wsrc_stride, const int *mask, int mask_stride";
add_proto qw/unsigned int/, "vpx_highbd_obmc_sad${w}x${h}", "const uint8_t *ref_ptr, int ref_stride, const int32_t *wsrc_ptr, const int32_t *mask";
specialize "vpx_highbd_obmc_sad${w}x${h}";
}
}
......@@ -1400,8 +1400,8 @@ if (vpx_config("CONFIG_EXT_INTER") eq "yes") {
if (vpx_config("CONFIG_OBMC") eq "yes") {
foreach (@block_sizes) {
($w, $h) = @$_;
add_proto qw/unsigned int/, "vpx_obmc_variance${w}x${h}", "const uint8_t *pre_ptr, int pre_stride, const int *wsrc_ptr, int wsrc_stride, const int *mask, int mask_stride, unsigned int *sse";
add_proto qw/unsigned int/, "vpx_obmc_sub_pixel_variance${w}x${h}", "const uint8_t *pre_ptr, int pre_stride, int xoffset, int yoffset, const int *wsrc_ptr, int wsrc_stride, const int *mask, int mask_stride, unsigned int *sse";
add_proto qw/unsigned int/, "vpx_obmc_variance${w}x${h}", "const uint8_t *pre_ptr, int pre_stride, const int32_t *wsrc_ptr, const int32_t *mask, unsigned int *sse";
add_proto qw/unsigned int/, "vpx_obmc_sub_pixel_variance${w}x${h}", "const uint8_t *pre_ptr, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc_ptr, const int32_t *mask, unsigned int *sse";
specialize "vpx_obmc_variance${w}x${h}";
specialize "vpx_obmc_sub_pixel_variance${w}x${h}";
}
......@@ -1410,8 +1410,8 @@ if (vpx_config("CONFIG_OBMC") eq "yes") {
foreach $bd ("_", "_10_", "_12_") {
foreach (@block_sizes) {
($w, $h) = @$_;
add_proto qw/unsigned int/, "vpx_highbd${bd}obmc_variance${w}x${h}", "const uint8_t *pre_ptr, int pre_stride, const int *wsrc_ptr, int wsrc_stride, const int *mask, int mask_stride, unsigned int *sse";
add_proto qw/unsigned int/, "vpx_highbd${bd}obmc_sub_pixel_variance${w}x${h}", "const uint8_t *pre_ptr, int pre_stride, int xoffset, int yoffset, const int *wsrc_ptr, int wsrc_stride, const int *mask, int mask_stride, unsigned int *sse";
add_proto qw/unsigned int/, "vpx_highbd${bd}obmc_variance${w}x${h}", "const uint8_t *pre_ptr, int pre_stride, const int32_t *wsrc_ptr, const int32_t *mask, unsigned int *sse";
add_proto qw/unsigned int/, "vpx_highbd${bd}obmc_sub_pixel_variance${w}x${h}", "const uint8_t *pre_ptr, int pre_stride, int xoffset, int yoffset, const int32_t *wsrc_ptr, const int32_t *mask, unsigned int *sse";
specialize "vpx_highbd${bd}obmc_variance${w}x${h}";
specialize "vpx_highbd${bd}obmc_sub_pixel_variance${w}x${h}";
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment