Commit 61418bbd authored by Jingning Han's avatar Jingning Han

Fix conflicts between ext-inter and cb4x4 modes

Resolve the broken coding pipeline in ext-inter experiment when
cb4x4 mode is enabled. Turn off rectangular inter-intra mode.
This needs some more work to hook up. Given that it gives fairly
limited coding performance gains, disable it for the moment.

BUG=aomedia:309

Change-Id: I9b406df6183f75697bfd4eed5125a6e9436d84b0
parent 2f97339c
......@@ -40,7 +40,7 @@ extern "C" {
#if CONFIG_EXT_INTER
// Should we try rectangular interintra predictions?
#define USE_RECT_INTERINTRA 1
#define USE_RECT_INTERINTRA 0
#if CONFIG_COMPOUND_SEGMENT
......
......@@ -521,6 +521,9 @@ static const aom_prob
#else // !CONFIG_COMPOUND_SEGMENT
static const aom_prob default_compound_type_probs[BLOCK_SIZES]
[COMPOUND_TYPES - 1] = {
#if CONFIG_CB4X4
{ 208 }, { 208 }, { 208 },
#endif
{ 208 }, { 208 }, { 208 },
{ 208 }, { 208 }, { 208 },
{ 216 }, { 216 }, { 216 },
......@@ -545,6 +548,9 @@ static const aom_prob
};
static const aom_prob default_wedge_interintra_prob[BLOCK_SIZES] = {
#if CONFIG_CB4X4
208, 208, 208,
#endif
208, 208, 208, 208, 208, 208, 216, 216, 216, 224, 224, 224, 240,
#if CONFIG_EXT_PARTITION
208, 208, 208
......
......@@ -731,13 +731,13 @@ void av1_update_mv_context(const MACROBLOCKD *xd, MODE_INFO *mi,
const POSITION *const mv_ref_search = mv_ref_blocks[mi->mbmi.sb_type];
#endif
int context_counter = 0;
const int bw = num_8x8_blocks_wide_lookup[mi->mbmi.sb_type] << 3;
const int bh = num_8x8_blocks_high_lookup[mi->mbmi.sb_type] << 3;
const int bw = block_size_wide[mi->mbmi.sb_type];
const int bh = block_size_high[mi->mbmi.sb_type];
const TileInfo *const tile = &xd->tile;
#if CONFIG_REF_MV
POSITION mv_ref_search[MVREF_NEIGHBOURS];
const int num_8x8_blocks_wide = bw >> 3;
const int num_8x8_blocks_high = bh >> 3;
const int num_8x8_blocks_wide = mi_size_wide[mi->mbmi.sb_type];
const int num_8x8_blocks_high = mi_size_high[mi->mbmi.sb_type];
mv_ref_search[0].row = num_8x8_blocks_high - 1;
mv_ref_search[0].col = -1;
mv_ref_search[1].row = -1;
......
......@@ -120,6 +120,11 @@ static const wedge_code_type wedge_codebook_16_heqw[16] = {
};
const wedge_params_type wedge_params_lookup[BLOCK_SIZES] = {
#if CONFIG_CB4X4
{ 0, NULL, NULL, 0, NULL },
{ 0, NULL, NULL, 0, NULL },
{ 0, NULL, NULL, 0, NULL },
#endif
{ 0, NULL, NULL, 0, NULL },
{ 0, NULL, NULL, 0, NULL },
{ 0, NULL, NULL, 0, NULL },
......@@ -200,6 +205,11 @@ static const wedge_code_type wedge_codebook_32_heqw[32] = {
};
const wedge_params_type wedge_params_lookup[BLOCK_SIZES] = {
#if CONFIG_CB4X4
{ 0, NULL, NULL, 0, NULL },
{ 0, NULL, NULL, 0, NULL },
{ 0, NULL, NULL, 0, NULL },
#endif
{ 0, NULL, NULL, 0, NULL },
{ 0, NULL, NULL, 0, NULL },
{ 0, NULL, NULL, 0, NULL },
......@@ -224,8 +234,8 @@ const wedge_params_type wedge_params_lookup[BLOCK_SIZES] = {
static const uint8_t *get_wedge_mask_inplace(int wedge_index, int neg,
BLOCK_SIZE sb_type) {
const uint8_t *master;
const int bh = 4 << b_height_log2_lookup[sb_type];
const int bw = 4 << b_width_log2_lookup[sb_type];
const int bh = block_size_high[sb_type];
const int bw = block_size_wide[sb_type];
const wedge_code_type *a =
wedge_params_lookup[sb_type].codebook + wedge_index;
const int smoother = wedge_params_lookup[sb_type].smoother;
......@@ -2314,6 +2324,7 @@ void av1_build_ncobmc_inter_predictors_sb(const AV1_COMMON *cm, MACROBLOCKD *xd,
#endif // CONFIG_MOTION_VAR
#if CONFIG_EXT_INTER
/* clang-format off */
#if CONFIG_EXT_PARTITION
static const int ii_weights1d[MAX_SB_SIZE] = {
102, 100, 97, 95, 92, 90, 88, 86, 84, 82, 80, 78, 76, 74, 73, 71, 69, 68, 67,
......@@ -2324,8 +2335,13 @@ static const int ii_weights1d[MAX_SB_SIZE] = {
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 27, 27, 27, 27, 27, 27, 27, 27,
27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
};
static int ii_size_scales[BLOCK_SIZES] = { 32, 16, 16, 16, 8, 8, 8, 4,
4, 4, 2, 2, 2, 1, 1, 1 };
static int ii_size_scales[BLOCK_SIZES] = {
#if CONFIG_CB4X4
32, 32, 32,
#endif
32, 16, 16, 16, 8, 8, 8, 4,
4, 4, 2, 2, 2, 1, 1, 1,
};
#else
static const int ii_weights1d[MAX_SB_SIZE] = {
102, 100, 97, 95, 92, 90, 88, 86, 84, 82, 80, 78, 76, 74, 73, 71,
......@@ -2333,8 +2349,14 @@ static const int ii_weights1d[MAX_SB_SIZE] = {
51, 50, 49, 48, 47, 47, 46, 45, 45, 44, 43, 43, 42, 41, 41, 40,
40, 39, 39, 38, 38, 38, 37, 37, 36, 36, 36, 35, 35, 35, 34, 34,
};
static int ii_size_scales[BLOCK_SIZES] = { 16, 8, 8, 8, 4, 4, 4,
2, 2, 2, 1, 1, 1 };
static int ii_size_scales[BLOCK_SIZES] = {
#if CONFIG_CB4X4
16, 16, 16,
#endif
16, 8, 8, 8, 4, 4, 4,
2, 2, 2, 1, 1, 1,
};
/* clang-format on */
#endif // CONFIG_EXT_PARTITION
static void combine_interintra(INTERINTRA_MODE mode, int use_wedge_interintra,
......@@ -2574,12 +2596,12 @@ static void build_intra_predictors_for_interintra(MACROBLOCKD *xd, uint8_t *ref,
BLOCK_SIZE bsize, int plane) {
struct macroblockd_plane *const pd = &xd->plane[plane];
BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, &xd->plane[plane]);
const int bwl = b_width_log2_lookup[plane_bsize];
const int bhl = b_height_log2_lookup[plane_bsize];
const int bwl = block_size_wide[plane_bsize];
const int bhl = block_size_high[plane_bsize];
TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize];
#if USE_RECT_INTERINTRA
const int pxbw = 4 << bwl;
const int pxbh = 4 << bhl;
const int pxbw = block_size_wide[plane_bsize];
const int pxbh = block_size_high[plane_bsize];
#if CONFIG_AOM_HIGHBITDEPTH
uint16_t tmp16[MAX_SB_SIZE];
#endif
......@@ -2614,7 +2636,8 @@ static void build_intra_predictors_for_interintra(MACROBLOCKD *xd, uint8_t *ref,
}
#endif
av1_predict_intra_block(xd, pd->width, pd->height, max_tx_size, mode, src_2,
ref_stride, dst_2, dst_stride, 0, 1 << bwl, plane);
ref_stride, dst_2, dst_stride, 0,
mi_size_wide[plane_bsize], plane);
#if CONFIG_AOM_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
uint16_t *src_216 = CONVERT_TO_SHORTPTR(src_2);
......@@ -2649,7 +2672,8 @@ static void build_intra_predictors_for_interintra(MACROBLOCKD *xd, uint8_t *ref,
}
#endif
av1_predict_intra_block(xd, pd->width, pd->height, max_tx_size, mode, src_2,
ref_stride, dst_2, dst_stride, 1 << bhl, 0, plane);
ref_stride, dst_2, dst_stride,
mi_size_high[plane_bsize], 0, plane);
#if CONFIG_AOM_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
uint16_t *src_216 = CONVERT_TO_SHORTPTR(src_2);
......@@ -2833,7 +2857,7 @@ void av1_build_inter_predictors_for_planes_single_buf(
const int bw = block_size_wide[plane_bsize];
const int bh = block_size_high[plane_bsize];
if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8) {
if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8 && !CONFIG_CB4X4) {
int x, y;
assert(bsize == BLOCK_8X8);
for (y = 0; y < num_4x4_h; ++y)
......@@ -2919,7 +2943,7 @@ void av1_build_wedge_inter_predictor_from_buf(MACROBLOCKD *xd, BLOCK_SIZE bsize,
const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8) {
if (xd->mi[0]->mbmi.sb_type < BLOCK_8X8 && !CONFIG_CB4X4) {
int x, y;
assert(bsize == BLOCK_8X8);
for (y = 0; y < num_4x4_h; ++y)
......@@ -2928,8 +2952,8 @@ void av1_build_wedge_inter_predictor_from_buf(MACROBLOCKD *xd, BLOCK_SIZE bsize,
xd, plane, 4 * x, 4 * y, 4, 4, ext_dst0[plane],
ext_dst_stride0[plane], ext_dst1[plane], ext_dst_stride1[plane]);
} else {
const int bw = 4 * num_4x4_w;
const int bh = 4 * num_4x4_h;
const int bw = block_size_wide[plane_bsize];
const int bh = block_size_high[plane_bsize];
build_wedge_inter_predictor_from_buf(
xd, plane, 0, 0, bw, bh, ext_dst0[plane], ext_dst_stride0[plane],
ext_dst1[plane], ext_dst_stride1[plane]);
......
......@@ -1638,7 +1638,8 @@ static void read_inter_block_mode_info(AV1Decoder *const pbi,
}
#if CONFIG_EXT_INTER
if (is_compound && bsize >= BLOCK_8X8 && mbmi->mode != ZERO_ZEROMV) {
if (is_compound && (bsize >= BLOCK_8X8 || unify_bsize) &&
mbmi->mode != ZERO_ZEROMV) {
#else
if (is_compound && (bsize >= BLOCK_8X8 || unify_bsize) &&
mbmi->mode != NEWMV && mbmi->mode != ZEROMV) {
......
......@@ -5391,8 +5391,12 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
DECLARE_ALIGNED(16, uint8_t, second_pred[MAX_SB_SQUARE]);
#endif // CONFIG_AOM_HIGHBITDEPTH
#if CONFIG_EXT_INTER && CONFIG_CB4X4
(void)ref_mv_sub8x8;
#endif
for (ref = 0; ref < 2; ++ref) {
#if CONFIG_EXT_INTER
#if CONFIG_EXT_INTER && !CONFIG_CB4X4
if (bsize < BLOCK_8X8 && ref_mv_sub8x8 != NULL)
ref_mv[ref].as_int = ref_mv_sub8x8[ref]->as_int;
else
......@@ -5569,13 +5573,13 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
#if CONFIG_REF_MV
av1_set_mvcost(x, refs[ref], ref, mbmi->ref_mv_idx);
#endif
#if CONFIG_EXT_INTER
#if CONFIG_EXT_INTER && !CONFIG_CB4X4
if (bsize >= BLOCK_8X8)
#endif // CONFIG_EXT_INTER
*rate_mv += av1_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
&x->mbmi_ext->ref_mvs[refs[ref]][0].as_mv,
x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
#if CONFIG_EXT_INTER
#if CONFIG_EXT_INTER && !CONFIG_CB4X4
else
*rate_mv += av1_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
&ref_mv_sub8x8[ref]->as_mv, x->nmvjointcost,
......@@ -7146,8 +7150,8 @@ static int estimate_wedge_sign(const AV1_COMP *cpi, const MACROBLOCK *x,
const uint8_t *src = p->src.buf;
int src_stride = p->src.stride;
const int f_index = bsize - BLOCK_8X8;
const int bw = 4 << (b_width_log2_lookup[bsize]);
const int bh = 4 << (b_height_log2_lookup[bsize]);
const int bw = block_size_wide[bsize];
const int bh = block_size_high[bsize];
uint32_t esq[2][4], var;
int64_t tl, br;
......@@ -7340,14 +7344,25 @@ static int64_t pick_wedge(const AV1_COMP *const cpi, const MACROBLOCK *const x,
(int64_t)aom_sum_squares_i16(r1, N)) *
(1 << WEDGE_WEIGHT_BITS) / 2;
av1_wedge_compute_delta_squares(ds, r0, r1, N);
if (N < 64)
av1_wedge_compute_delta_squares_c(ds, r0, r1, N);
else
av1_wedge_compute_delta_squares(ds, r0, r1, N);
for (wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
mask = av1_get_contiguous_soft_mask(wedge_index, 0, bsize);
wedge_sign = av1_wedge_sign_from_residuals(ds, mask, N, sign_limit);
// TODO(jingning): Make sse2 functions support N = 16 case
if (N < 64)
wedge_sign = av1_wedge_sign_from_residuals_c(ds, mask, N, sign_limit);
else
wedge_sign = av1_wedge_sign_from_residuals(ds, mask, N, sign_limit);
mask = av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
sse = av1_wedge_sse_from_residuals(r1, d10, mask, N);
if (N < 64)
sse = av1_wedge_sse_from_residuals_c(r1, d10, mask, N);
else
sse = av1_wedge_sse_from_residuals(r1, d10, mask, N);
sse = ROUND_POWER_OF_TWO(sse, bd_round);
model_rd_from_sse(cpi, xd, bsize, 0, sse, &rate, &dist);
......@@ -7405,7 +7420,10 @@ static int64_t pick_wedge_fixed_sign(
for (wedge_index = 0; wedge_index < wedge_types; ++wedge_index) {
mask = av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
sse = av1_wedge_sse_from_residuals(r1, d10, mask, N);
if (N < 64)
sse = av1_wedge_sse_from_residuals_c(r1, d10, mask, N);
else
sse = av1_wedge_sse_from_residuals(r1, d10, mask, N);
sse = ROUND_POWER_OF_TWO(sse, bd_round);
model_rd_from_sse(cpi, xd, bsize, 0, sse, &rate, &dist);
......
......@@ -53,7 +53,7 @@ uint64_t av1_wedge_sse_from_residuals_c(const int16_t *r1, const int16_t *d,
const uint8_t *m, int N) {
uint64_t csse = 0;
int i;
assert(N % 64 == 0);
for (i = 0; i < N; i++) {
int32_t t = MAX_MASK_VALUE * r1[i] + m[i] * d[i];
t = clamp(t, INT16_MIN, INT16_MAX);
......@@ -97,8 +97,6 @@ int av1_wedge_sign_from_residuals_c(const int16_t *ds, const uint8_t *m, int N,
int64_t limit) {
int64_t acc = 0;
assert(N % 64 == 0);
do {
acc += *ds++ * *m++;
} while (--N);
......@@ -122,8 +120,6 @@ void av1_wedge_compute_delta_squares_c(int16_t *d, const int16_t *a,
const int16_t *b, int N) {
int i;
assert(N % 64 == 0);
for (i = 0; i < N; i++)
d[i] = clamp(a[i] * a[i] - b[i] * b[i], INT16_MIN, INT16_MAX);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment