Commit f2b34c3a authored by Yue Chen's avatar Yue Chen Committed by Gerrit Code Review
Browse files

Merge "Optimize and cleanup obmc predictor and rd search." into nextgenv2

parents ebc2d34c 4c4f04ac
......@@ -1298,97 +1298,63 @@ void vp10_build_inter_predictors_sb_extend(MACROBLOCKD *xd,
#endif // CONFIG_SUPERTX
#if CONFIG_OBMC
// obmc_mask_N[is_neighbor_predictor][overlap_position]
static const uint8_t obmc_mask_1[2][1] = {
{ 55},
{ 9}
// obmc_mask_N[overlap_position]
static const uint8_t obmc_mask_1[1] = {
55
};
static const uint8_t obmc_mask_2[2][2] = {
{ 45, 62},
{ 19, 2}
static const uint8_t obmc_mask_2[2] = {
45, 62
};
static const uint8_t obmc_mask_4[2][4] = {
{ 39, 50, 59, 64},
{ 25, 14, 5, 0}
static const uint8_t obmc_mask_4[4] = {
39, 50, 59, 64
};
static const uint8_t obmc_mask_8[2][8] = {
{ 36, 42, 48, 53, 57, 61, 63, 64},
{ 28, 22, 16, 11, 7, 3, 1, 0}
static const uint8_t obmc_mask_8[8] = {
36, 42, 48, 53, 57, 61, 63, 64
};
static const uint8_t obmc_mask_16[2][16] = {
{ 34, 37, 40, 43, 46, 49, 52, 54, 56, 58, 60, 61, 63, 64, 64, 64},
{ 30, 27, 24, 21, 18, 15, 12, 10, 8, 6, 4, 3, 1, 0, 0, 0}
static const uint8_t obmc_mask_16[16] = {
34, 37, 40, 43, 46, 49, 52, 54, 56, 58, 60, 61, 63, 64, 64, 64
};
static const uint8_t obmc_mask_32[2][32] = {
{ 33, 35, 36, 38, 40, 41, 43, 44,
45, 47, 48, 50, 51, 52, 53, 55,
56, 57, 58, 59, 60, 60, 61, 62,
62, 63, 63, 64, 64, 64, 64, 64 },
{ 31, 29, 28, 26, 24, 23, 21, 20,
19, 17, 16, 14, 13, 12, 11, 9,
8, 7, 6, 5, 4, 4, 3, 2,
2, 1, 1, 0, 0, 0, 0, 0 }
static const uint8_t obmc_mask_32[32] = {
33, 35, 36, 38, 40, 41, 43, 44, 45, 47, 48, 50, 51, 52, 53, 55,
56, 57, 58, 59, 60, 60, 61, 62, 62, 63, 63, 64, 64, 64, 64, 64
};
#if CONFIG_EXT_PARTITION
static const uint8_t obmc_mask_64[2][64] = {
{
33, 34, 35, 35, 36, 37, 38, 39, 40, 40, 41, 42, 43, 44, 44, 44,
45, 46, 47, 47, 48, 49, 50, 51, 51, 51, 52, 52, 53, 54, 55, 56,
56, 56, 57, 57, 58, 58, 59, 60, 60, 60, 60, 60, 61, 62, 62, 62,
62, 62, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
}, {
31, 30, 29, 29, 28, 27, 26, 25, 24, 24, 23, 22, 21, 20, 20, 20,
19, 18, 17, 17, 16, 15, 14, 13, 13, 13, 12, 12, 11, 10, 9, 8,
8, 8, 7, 7, 6, 6, 5, 4, 4, 4, 4, 4, 3, 2, 2, 2,
2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
}
static const uint8_t obmc_mask_64[64] = {
33, 34, 35, 35, 36, 37, 38, 39, 40, 40, 41, 42, 43, 44, 44, 44,
45, 46, 47, 47, 48, 49, 50, 51, 51, 51, 52, 52, 53, 54, 55, 56,
56, 56, 57, 57, 58, 58, 59, 60, 60, 60, 60, 60, 61, 62, 62, 62,
62, 62, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
};
#endif // CONFIG_EXT_PARTITION
void setup_obmc_mask(int length, const uint8_t *mask[2]) {
const uint8_t* vp10_get_obmc_mask(int length) {
switch (length) {
case 1:
mask[0] = obmc_mask_1[0];
mask[1] = obmc_mask_1[1];
break;
return obmc_mask_1;
case 2:
mask[0] = obmc_mask_2[0];
mask[1] = obmc_mask_2[1];
break;
return obmc_mask_2;
case 4:
mask[0] = obmc_mask_4[0];
mask[1] = obmc_mask_4[1];
break;
return obmc_mask_4;
case 8:
mask[0] = obmc_mask_8[0];
mask[1] = obmc_mask_8[1];
break;
return obmc_mask_8;
case 16:
mask[0] = obmc_mask_16[0];
mask[1] = obmc_mask_16[1];
break;
return obmc_mask_16;
case 32:
mask[0] = obmc_mask_32[0];
mask[1] = obmc_mask_32[1];
break;
return obmc_mask_32;
#if CONFIG_EXT_PARTITION
case 64:
mask[0] = obmc_mask_64[0];
mask[1] = obmc_mask_64[1];
break;
return obmc_mask_64;
#endif // CONFIG_EXT_PARTITION
default:
mask[0] = NULL;
mask[1] = NULL;
assert(0);
break;
return NULL;
}
}
......@@ -1398,168 +1364,101 @@ void setup_obmc_mask(int length, const uint8_t *mask[2]) {
// xd->plane[].dst.buf
void vp10_build_obmc_inter_prediction(VP10_COMMON *cm,
MACROBLOCKD *xd, int mi_row, int mi_col,
int use_tmp_dst_buf,
uint8_t *final_buf[MAX_MB_PLANE],
int final_stride[MAX_MB_PLANE],
uint8_t *tmp_buf1[MAX_MB_PLANE],
int tmp_stride1[MAX_MB_PLANE],
uint8_t *tmp_buf2[MAX_MB_PLANE],
int tmp_stride2[MAX_MB_PLANE]) {
const TileInfo *const tile = &xd->tile;
BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
int plane, i, mi_step;
int above_available = mi_row > tile->mi_row_start;
#if CONFIG_VP9_HIGHBITDEPTH
int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
#endif // CONFIG_VP9_HIGHBITDEPTH
if (use_tmp_dst_buf) {
for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
const struct macroblockd_plane *pd = &xd->plane[plane];
int bw = (xd->n8_w * 8) >> pd->subsampling_x;
int bh = (xd->n8_h * 8) >> pd->subsampling_y;
int row;
#if CONFIG_VP9_HIGHBITDEPTH
if (is_hbd) {
uint16_t *final_buf16 = CONVERT_TO_SHORTPTR(final_buf[plane]);
uint16_t *bmc_buf16 = CONVERT_TO_SHORTPTR(pd->dst.buf);
for (row = 0; row < bh; ++row)
memcpy(final_buf16 + row * final_stride[plane],
bmc_buf16 + row * pd->dst.stride, bw * sizeof(uint16_t));
} else {
#endif
for (row = 0; row < bh; ++row)
memcpy(final_buf[plane] + row * final_stride[plane],
pd->dst.buf + row * pd->dst.stride, bw);
uint8_t *above[MAX_MB_PLANE],
int above_stride[MAX_MB_PLANE],
uint8_t *left[MAX_MB_PLANE],
int left_stride[MAX_MB_PLANE]) {
const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
int plane, i;
#if CONFIG_VP9_HIGHBITDEPTH
}
const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
#endif // CONFIG_VP9_HIGHBITDEPTH
}
}
// handle above row
for (i = 0; above_available && i < VPXMIN(xd->n8_w, cm->mi_cols - mi_col);
i += mi_step) {
int mi_row_offset = -1;
int mi_col_offset = i;
int overlap;
MODE_INFO *above_mi = xd->mi[mi_col_offset +
mi_row_offset * xd->mi_stride];
MB_MODE_INFO *above_mbmi = &above_mi->mbmi;
mi_step = VPXMIN(xd->n8_w,
num_8x8_blocks_wide_lookup[above_mbmi->sb_type]);
if (!is_neighbor_overlappable(above_mbmi))
continue;
overlap = num_4x4_blocks_high_lookup[bsize] << 1;
for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
const struct macroblockd_plane *pd = &xd->plane[plane];
int bw = (mi_step * MI_SIZE) >> pd->subsampling_x;
int bh = overlap >> pd->subsampling_y;
int row, col;
int dst_stride = use_tmp_dst_buf ? final_stride[plane] : pd->dst.stride;
uint8_t *dst = use_tmp_dst_buf ?
&final_buf[plane][(i * MI_SIZE) >> pd->subsampling_x] :
&pd->dst.buf[(i * MI_SIZE) >> pd->subsampling_x];
int tmp_stride = tmp_stride1[plane];
uint8_t *tmp = &tmp_buf1[plane][(i * MI_SIZE) >> pd->subsampling_x];
const uint8_t *mask[2];
setup_obmc_mask(bh, mask);
if (xd->up_available) {
const int overlap = num_4x4_blocks_high_lookup[bsize] * 2;
const int miw = VPXMIN(xd->n8_w, cm->mi_cols - mi_col);
const int mi_row_offset = -1;
assert(miw > 0);
i = 0;
do { // for each mi in the above row
const int mi_col_offset = i;
const MB_MODE_INFO *const above_mbmi =
&xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]->mbmi;
const int mi_step =
VPXMIN(xd->n8_w, num_8x8_blocks_wide_lookup[above_mbmi->sb_type]);
if (is_neighbor_overlappable(above_mbmi)) {
for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
const struct macroblockd_plane *pd = &xd->plane[plane];
const int bw = (mi_step * MI_SIZE) >> pd->subsampling_x;
const int bh = overlap >> pd->subsampling_y;
const int dst_stride = pd->dst.stride;
uint8_t *const dst =
&pd->dst.buf[(i * MI_SIZE) >> pd->subsampling_x];
const int tmp_stride = above_stride[plane];
const uint8_t *const tmp =
&above[plane][(i * MI_SIZE) >> pd->subsampling_x];
const uint8_t *const mask = vp10_get_obmc_mask(bh);
#if CONFIG_VP9_HIGHBITDEPTH
if (is_hbd) {
uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
for (row = 0; row < bh; ++row) {
for (col = 0; col < bw; ++col)
dst16[col] = ROUND_POWER_OF_TWO(mask[0][row] * dst16[col] +
mask[1][row] * tmp16[col], 6);
dst16 += dst_stride;
tmp16 += tmp_stride;
}
} else {
if (is_hbd)
vpx_highbd_blend_a64_vmask(dst, dst_stride, dst, dst_stride,
tmp, tmp_stride, mask, bh, bw, xd->bd);
else
#endif // CONFIG_VP9_HIGHBITDEPTH
for (row = 0; row < bh; ++row) {
for (col = 0; col < bw; ++col)
dst[col] = ROUND_POWER_OF_TWO(mask[0][row] * dst[col] +
mask[1][row] * tmp[col], 6);
dst += dst_stride;
tmp += tmp_stride;
}
#if CONFIG_VP9_HIGHBITDEPTH
vpx_blend_a64_vmask(dst, dst_stride, dst, dst_stride,
tmp, tmp_stride, mask, bh, bw);
}
}
#endif // CONFIG_VP9_HIGHBITDEPTH
}
} // each mi in the above row
i += mi_step;
} while (i < miw);
}
if (mi_col == 0 || (mi_col - 1 < tile->mi_col_start))
return;
// handle left column
for (i = 0; i < VPXMIN(xd->n8_h, cm->mi_rows - mi_row);
i += mi_step) {
int mi_row_offset = i;
int mi_col_offset = -1;
int overlap;
MODE_INFO *left_mi = xd->mi[mi_col_offset +
mi_row_offset * xd->mi_stride];
MB_MODE_INFO *left_mbmi = &left_mi->mbmi;
mi_step = VPXMIN(xd->n8_h,
num_8x8_blocks_high_lookup[left_mbmi->sb_type]);
if (!is_neighbor_overlappable(left_mbmi))
continue;
overlap = num_4x4_blocks_wide_lookup[bsize] << 1;
for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
const struct macroblockd_plane *pd = &xd->plane[plane];
int bw = overlap >> pd->subsampling_x;
int bh = (mi_step * MI_SIZE) >> pd->subsampling_y;
int row, col;
int dst_stride = use_tmp_dst_buf ? final_stride[plane] : pd->dst.stride;
uint8_t *dst = use_tmp_dst_buf ?
&final_buf[plane][(i * MI_SIZE * dst_stride) >> pd->subsampling_y] :
&pd->dst.buf[(i * MI_SIZE * dst_stride) >> pd->subsampling_y];
int tmp_stride = tmp_stride2[plane];
uint8_t *tmp = &tmp_buf2[plane]
[(i * MI_SIZE * tmp_stride) >> pd->subsampling_y];
const uint8_t *mask[2];
setup_obmc_mask(bw, mask);
if (xd->left_available) {
const int overlap = num_4x4_blocks_wide_lookup[bsize] * 2;
const int mih = VPXMIN(xd->n8_h, cm->mi_rows - mi_row);
const int mi_col_offset = -1;
assert(mih > 0);
i = 0;
do { // for each mi in the left column
const int mi_row_offset = i;
const MB_MODE_INFO *const left_mbmi =
&xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]->mbmi;
const int mi_step =
VPXMIN(xd->n8_h, num_8x8_blocks_high_lookup[left_mbmi->sb_type]);
if (is_neighbor_overlappable(left_mbmi)) {
for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
const struct macroblockd_plane *pd = &xd->plane[plane];
const int bw = overlap >> pd->subsampling_x;
const int bh = (mi_step * MI_SIZE) >> pd->subsampling_y;
const int dst_stride = pd->dst.stride;
uint8_t *const dst =
&pd->dst.buf[(i * MI_SIZE * dst_stride) >> pd->subsampling_y];
const int tmp_stride = left_stride[plane];
const uint8_t *const tmp =
&left[plane][(i * MI_SIZE * tmp_stride) >> pd->subsampling_y];
const uint8_t *const mask = vp10_get_obmc_mask(bw);
#if CONFIG_VP9_HIGHBITDEPTH
if (is_hbd) {
uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
for (row = 0; row < bh; ++row) {
for (col = 0; col < bw; ++col)
dst16[col] = ROUND_POWER_OF_TWO(mask[0][col] * dst16[col] +
mask[1][col] * tmp16[col], 6);
dst16 += dst_stride;
tmp16 += tmp_stride;
}
} else {
if (is_hbd)
vpx_highbd_blend_a64_hmask(dst, dst_stride, dst, dst_stride,
tmp, tmp_stride, mask, bh, bw, xd->bd);
else
#endif // CONFIG_VP9_HIGHBITDEPTH
for (row = 0; row < bh; ++row) {
for (col = 0; col < bw; ++col)
dst[col] = ROUND_POWER_OF_TWO(mask[0][col] * dst[col] +
mask[1][col] * tmp[col], 6);
dst += dst_stride;
tmp += tmp_stride;
}
#if CONFIG_VP9_HIGHBITDEPTH
vpx_blend_a64_hmask(dst, dst_stride, dst, dst_stride,
tmp, tmp_stride, mask, bh, bw);
}
}
#endif // CONFIG_VP9_HIGHBITDEPTH
}
} // each mi in the left column
i += mi_step;
} while (i < mih);
}
}
#if CONFIG_EXT_INTER
......
......@@ -562,16 +562,13 @@ static INLINE int vp10_is_interp_needed(const MACROBLOCKD *const xd) {
#endif // CONFIG_EXT_INTERP
#if CONFIG_OBMC
void setup_obmc_mask(int length, const uint8_t *mask[2]);
const uint8_t* vp10_get_obmc_mask(int length);
void vp10_build_obmc_inter_prediction(VP10_COMMON *cm,
MACROBLOCKD *xd, int mi_row, int mi_col,
int use_tmp_dst_buf,
uint8_t *final_buf[MAX_MB_PLANE],
int final_stride[MAX_MB_PLANE],
uint8_t *tmp_buf1[MAX_MB_PLANE],
int tmp_stride1[MAX_MB_PLANE],
uint8_t *tmp_buf2[MAX_MB_PLANE],
int tmp_stride2[MAX_MB_PLANE]);
uint8_t *above[MAX_MB_PLANE],
int above_stride[MAX_MB_PLANE],
uint8_t *left[MAX_MB_PLANE],
int left_stride[MAX_MB_PLANE]);
void vp10_build_prediction_by_above_preds(VP10_COMMON *cm,
MACROBLOCKD *xd,
int mi_row, int mi_col,
......
......@@ -1385,7 +1385,7 @@ static void decode_block(VP10Decoder *const pbi, MACROBLOCKD *const xd,
dst_buf2, dst_stride2);
vp10_setup_dst_planes(xd->plane, get_frame_new_buffer(cm),
mi_row, mi_col);
vp10_build_obmc_inter_prediction(cm, xd, mi_row, mi_col, 0, NULL, NULL,
vp10_build_obmc_inter_prediction(cm, xd, mi_row, mi_col,
dst_buf1, dst_stride1,
dst_buf2, dst_stride2);
}
......
......@@ -5091,7 +5091,7 @@ static void encode_superblock(VP10_COMP *cpi, ThreadData *td,
dst_stride2);
vp10_setup_dst_planes(xd->plane, get_frame_new_buffer(cm),
mi_row, mi_col);
vp10_build_obmc_inter_prediction(cm, xd, mi_row, mi_col, 0, NULL, NULL,
vp10_build_obmc_inter_prediction(cm, xd, mi_row, mi_col,
dst_buf1, dst_stride1,
dst_buf2, dst_stride2);
}
......
......@@ -15,6 +15,7 @@
#include "./vpx_dsp_rtcd.h"
#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_dsp/blend.h"
#include "vpx_mem/vpx_mem.h"
#include "vpx_ports/mem.h"
#include "vpx_ports/system_state.h"
......@@ -7780,8 +7781,7 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x,
vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
#endif // CONFIG_EXT_INTER
}
vp10_build_obmc_inter_prediction(cm, xd, mi_row, mi_col, 0,
NULL, NULL,
vp10_build_obmc_inter_prediction(cm, xd, mi_row, mi_col,
dst_buf1, dst_stride1,
dst_buf2, dst_stride2);
model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1,
......@@ -8399,6 +8399,18 @@ static void pick_ext_intra_iframe(VP10_COMP *cpi, MACROBLOCK *x,
}
#endif // CONFIG_EXT_INTRA
#if CONFIG_OBMC
static void calc_target_weighted_pred(
const VP10_COMMON *cm,
const MACROBLOCK *x,
const MACROBLOCKD *xd,
int mi_row, int mi_col,
const uint8_t *above, int above_stride,
const uint8_t *left, int left_stride,
int32_t *mask_buf,
int32_t *wsrc_buf);
#endif // CONFIG_OBMC
void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
TileDataEnc *tile_data,
MACROBLOCK *x,
......@@ -9579,7 +9591,7 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
vp10_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
#if CONFIG_OBMC
if (mbmi->motion_variation == OBMC_CAUSAL)
vp10_build_obmc_inter_prediction(cm, xd, mi_row, mi_col, 0, NULL, NULL,
vp10_build_obmc_inter_prediction(cm, xd, mi_row, mi_col,
dst_buf1, dst_stride1,
dst_buf2, dst_stride2);
#endif // CONFIG_OBMC
......@@ -10980,189 +10992,225 @@ void vp10_rd_pick_inter_mode_sub8x8(struct VP10_COMP *cpi,
}
#if CONFIG_OBMC
void calc_target_weighted_pred(VP10_COMMON *cm,
MACROBLOCK *x,
MACROBLOCKD *xd,
int mi_row, int mi_col,
uint8_t *above_buf, int above_stride,
uint8_t *left_buf, int left_stride,
int32_t *mask_buf,
int32_t *weighted_src_buf) {
BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
int row, col, i, mi_step;
int bw = 8 * xd->n8_w;
int bh = 8 * xd->n8_h;
// This function has a structure similar to vp10_build_obmc_inter_prediction
//
// The OBMC predictor is computed as:
//
// PObmc(x,y) =
// VPX_BLEND_A64(Mh(x),
// VPX_BLEND_A64(Mv(y), P(x,y), PAbove(x,y)),
// PLeft(x, y))
//
// Scaling up by VPX_BLEND_A64_MAX_ALPHA ** 2 and omitting the intermediate
// rounding, this can be written as:
//
// VPX_BLEND_A64_MAX_ALPHA * VPX_BLEND_A64_MAX_ALPHA * Pobmc(x,y) =
// Mh(x) * Mv(y) * P(x,y) +
// Mh(x) * Cv(y) * Pabove(x,y) +
// VPX_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
//
// Where :
//
// Cv(y) = VPX_BLEND_A64_MAX_ALPHA - Mv(y)
// Ch(y) = VPX_BLEND_A64_MAX_ALPHA - Mh(y)
//
// This function computes 'wsrc' and 'mask' as:
//
// wsrc(x, y) =
// VPX_BLEND_A64_MAX_ALPHA * VPX_BLEND_A64_MAX_ALPHA * src(x, y) -
// Mh(x) * Cv(y) * Pabove(x,y) +
// VPX_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
//
// mask(x, y) = Mh(x) * Mv(y)
//
// These can then be used to efficiently approximate the error for any
// predictor P in the context of the provided neighbouring predictors by
// computing:
//
// error(x, y) =
// wsrc(x, y) - mask(x, y) * P(x, y) / (VPX_BLEND_A64_MAX_ALPHA ** 2)
//
static void calc_target_weighted_pred(
const VP10_COMMON *cm,
const MACROBLOCK *x,
const MACROBLOCKD *xd,
int mi_row, int mi_col,
const uint8_t *above, int above_stride,
const uint8_t *left, int left_stride,
int32_t *mask_buf,
int32_t *wsrc_buf) {
const BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
int row, col, i;
const int bw = 8 * xd->n8_w;
const int bh = 8 * xd->n8_h;
const int wsrc_stride = bw;
const int mask_stride = bw;
const int weighted_src_stride = bw;
int32_t *dst = weighted_src_buf;
int32_t *mask2d = mask_buf;
uint8_t *src;
const int src_scale = VPX_BLEND_A64_MAX_ALPHA * VPX_BLEND_A64_MAX_ALPHA;
#if CONFIG_VP9_HIGHBITDEPTH
int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0;
#else
const int is_hbd = 0;
#endif // CONFIG_VP9_HIGHBITDEPTH
for (row = 0; row < bh; ++row) {
for (col = 0; col < bw; ++col) {
dst[col] = 0;
mask2d[col] = 64;
}
dst += weighted_src_stride;
mask2d += mask_stride;
}
// plane 0 should not be subsampled
assert(xd->plane[0].subsampling_x == 0);
assert(xd->plane[0].subsampling_y == 0);
vp10_zero_array(wsrc_buf, bw * bh);
for (i = 0; i < bw * bh; ++i)
mask_buf[i] = VPX_BLEND_A64_MAX_ALPHA;
// handle above row
if (xd->up_available) {
for (i = 0; i < VPXMIN(xd->n8_w, cm->mi_cols - mi_col); i += mi_step) {
int mi_row_offset = -1;
int mi_col_offset = i;
MODE_INFO *above_mi = xd->mi[mi_col_offset +
mi_row_offset * xd->mi_stride];
MB_MODE_INFO *above_mbmi = &above_mi->mbmi;
int overlap = num_4x4_blocks_high_lookup[bsize] << 1;
mi_step = VPXMIN(xd->n8_w,
num_8x8_blocks_wide_lookup[above_mbmi->sb_type]);
const int overlap = num_4x4_blocks_high_lookup[bsize] * 2;
const int miw = VPXMIN(xd->n8_w, cm->mi_cols - mi_col);
const int mi_row_offset = -1;
const uint8_t *const mask1d = vp10_get_obmc_mask(overlap);
assert(miw > 0);
i = 0;
do { // for each mi in the above row
const int mi_col_offset = i;
const MB_MODE_INFO *const above_mbmi =
&xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]->mbmi;
const int mi_step =
VPXMIN(xd->n8_w, num_8x8_blocks_wide_lookup[above_mbmi->sb_type]);
const int neighbor_bw = mi_step * MI_SIZE;
if (is_neighbor_overlappable(above_mbmi)) {
const struct macroblockd_plane *pd = &xd->plane[0];
int bw = (mi_step * MI_SIZE) >> pd->subsampling_x;
int bh = overlap >> pd->subsampling_y;
int dst_stride = weighted_src_stride;
int32_t *dst = weighted_src_buf + (i * MI_SIZE >> pd->subsampling_x);
int tmp_stride = above_stride;
uint8_t *tmp = above_buf + (i * MI_SIZE >> pd->subsampling_x);