Commit 5c31fd5c authored by Jingning Han's avatar Jingning Han Committed by Gerrit Code Review
Browse files

Merge "Enable sub8x8 inter block search for RTC coding mode"

parents 32d88c22 dad89d5c
......@@ -1410,6 +1410,11 @@ static void update_state_rt(VP9_COMP *cpi, ThreadData *td,
const int pred_ctx = vp9_get_pred_context_switchable_interp(xd);
++td->counts->switchable_interp[pred_ctx][mbmi->interp_filter];
}
if (mbmi->sb_type < BLOCK_8X8) {
mbmi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int;
mbmi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int;
}
}
if (cm->use_prev_frame_mvs) {
......@@ -2705,9 +2710,12 @@ static void nonrd_pick_sb_modes(VP9_COMP *cpi,
hybrid_intra_mode_search(cpi, x, rd_cost, bsize, ctx);
else if (vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP))
set_mode_info_seg_skip(x, cm->tx_mode, rd_cost, bsize);
else
else if (bsize >= BLOCK_8X8)
vp9_pick_inter_mode(cpi, x, tile_data, mi_row, mi_col,
rd_cost, bsize, ctx);
else
vp9_pick_inter_mode_sub8x8(cpi, x, tile_data, mi_row, mi_col,
rd_cost, bsize, ctx);
duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize);
......@@ -3312,9 +3320,10 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi,
// Set the partition type of the 64X64 block
switch (sf->partition_search_type) {
case VAR_BASED_PARTITION:
// TODO(jingning) Only key frame coding supports sub8x8 block at this
// point. To be continued to enable sub8x8 block mode decision for
// P frames.
// TODO(jingning, marpan): The mode decision and encoding process
// support both intra and inter sub8x8 block coding for RTC mode.
// Tune the thresholds accordingly to use sub8x8 block coding for
// coding performance improvement.
choose_partitioning(cpi, tile_info, x, mi_row, mi_col);
nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
BLOCK_64X64, 1, &dummy_rdc, td->pc_root);
......
......@@ -947,3 +947,253 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
*rd_cost = best_rdc;
}
void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
TileDataEnc *tile_data,
int mi_row, int mi_col, RD_COST *rd_cost,
BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) {
VP9_COMMON *const cm = &cpi->common;
TileInfo *const tile_info = &tile_data->tile_info;
SPEED_FEATURES *const sf = &cpi->sf;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi;
const struct segmentation *const seg = &cm->seg;
MV_REFERENCE_FRAME ref_frame, second_ref_frame = NONE;
MV_REFERENCE_FRAME best_ref_frame = NONE;
unsigned char segment_id = mbmi->segment_id;
int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
struct buf_2d yv12_mb[4][MAX_MB_PLANE];
static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
VP9_ALT_FLAG };
int64_t best_rd = INT64_MAX;
b_mode_info bsi[MAX_REF_FRAMES][4];
int ref_frame_skip_mask = 0;
const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
int idx, idy;
x->skip_encode = sf->skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
ctx->pred_pixel_ready = 0;
for (ref_frame = LAST_FRAME; ref_frame <= GOLDEN_FRAME; ref_frame++) {
x->pred_mv_sad[ref_frame] = INT_MAX;
frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
frame_mv[ZEROMV][ref_frame].as_int = 0;
if (cpi->ref_frame_flags & flag_list[ref_frame]) {
const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
int_mv *const candidates = mbmi->ref_mvs[ref_frame];
const struct scale_factors *const sf =
&cm->frame_refs[ref_frame - 1].sf;
vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col,
sf, sf);
vp9_find_mv_refs(cm, xd, tile_info, xd->mi[0].src_mi, ref_frame,
candidates, mi_row, mi_col);
vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates,
&frame_mv[NEARESTMV][ref_frame],
&frame_mv[NEARMV][ref_frame]);
} else {
ref_frame_skip_mask |= (1 << ref_frame);
}
frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
frame_mv[ZEROMV][ref_frame].as_int = 0;
}
mbmi->sb_type = bsize;
mbmi->tx_size = TX_4X4;
mbmi->uv_mode = DC_PRED;
mbmi->ref_frame[0] = LAST_FRAME;
mbmi->ref_frame[1] = NONE;
mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP
: cm->interp_filter;
for (ref_frame = LAST_FRAME; ref_frame <= GOLDEN_FRAME; ++ref_frame) {
int64_t this_rd = 0;
int plane;
if (ref_frame_skip_mask & (1 << ref_frame))
continue;
// TODO(jingning, agrange): Scaling reference frame not supported for
// sub8x8 blocks. Is this supported now?
if (ref_frame > INTRA_FRAME &&
vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf))
continue;
// If the segment reference frame feature is enabled....
// then do nothing if the current ref frame is not allowed..
if (vp9_segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
vp9_get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame)
continue;
mbmi->ref_frame[0] = ref_frame;
x->skip = 0;
set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
// Select prediction reference frames.
for (plane = 0; plane < MAX_MB_PLANE; plane++)
xd->plane[plane].pre[0] = yv12_mb[ref_frame][plane];
for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
int64_t b_best_rd = INT64_MAX;
const int i = idy * 2 + idx;
PREDICTION_MODE this_mode;
int b_rate = 0;
int64_t b_dist = 0;
RD_COST this_rdc;
unsigned int var_y, sse_y;
struct macroblock_plane *p = &x->plane[0];
struct macroblockd_plane *pd = &xd->plane[0];
const struct buf_2d orig_src = p->src;
const struct buf_2d orig_dst = pd->dst;
struct buf_2d orig_pre[2];
vpx_memcpy(orig_pre, xd->plane[0].pre, sizeof(orig_pre));
// set buffer pointers for sub8x8 motion search.
p->src.buf =
&p->src.buf[vp9_raster_block_offset(BLOCK_8X8, i, p->src.stride)];
pd->dst.buf =
&pd->dst.buf[vp9_raster_block_offset(BLOCK_8X8, i, pd->dst.stride)];
pd->pre[0].buf =
&pd->pre[0].buf[vp9_raster_block_offset(BLOCK_8X8,
i, pd->pre[0].stride)];
frame_mv[ZEROMV][ref_frame].as_int = 0;
frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
vp9_append_sub8x8_mvs_for_idx(cm, xd, tile_info, i, 0, mi_row, mi_col,
&frame_mv[NEARESTMV][ref_frame],
&frame_mv[NEARMV][ref_frame]);
for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
xd->mi[0].bmi[i].as_mv[0].as_int =
frame_mv[this_mode][ref_frame].as_int;
if (this_mode == NEWMV) {
const int step_param = cpi->sf.mv.fullpel_search_step_param;
MV mvp_full;
MV tmp_mv;
int cost_list[5];
const int tmp_col_min = x->mv_col_min;
const int tmp_col_max = x->mv_col_max;
const int tmp_row_min = x->mv_row_min;
const int tmp_row_max = x->mv_row_max;
int dummy_dist;
if (i == 0) {
mvp_full.row = frame_mv[NEARESTMV][ref_frame].as_mv.row >> 3;
mvp_full.col = frame_mv[NEARESTMV][ref_frame].as_mv.col >> 3;
} else {
mvp_full.row = xd->mi[0].bmi[0].as_mv[0].as_mv.row >> 3;
mvp_full.col = xd->mi[0].bmi[0].as_mv[0].as_mv.col >> 3;
}
vp9_set_mv_search_range(x, &mbmi->ref_mvs[0]->as_mv);
vp9_full_pixel_search(
cpi, x, bsize, &mvp_full, step_param, x->sadperbit4,
cond_cost_list(cpi, cost_list),
&mbmi->ref_mvs[ref_frame][0].as_mv, &tmp_mv,
INT_MAX, 0);
x->mv_col_min = tmp_col_min;
x->mv_col_max = tmp_col_max;
x->mv_row_min = tmp_row_min;
x->mv_row_max = tmp_row_max;
// calculate the bit cost on motion vector
mvp_full.row = tmp_mv.row * 8;
mvp_full.col = tmp_mv.col * 8;
b_rate += vp9_mv_bit_cost(&mvp_full,
&mbmi->ref_mvs[ref_frame][0].as_mv,
x->nmvjointcost, x->mvcost,
MV_COST_WEIGHT);
b_rate += cpi->inter_mode_cost[mbmi->mode_context[ref_frame]]
[INTER_OFFSET(NEWMV)];
if (RDCOST(x->rdmult, x->rddiv, b_rate, 0) > b_best_rd)
continue;
cpi->find_fractional_mv_step(x, &tmp_mv,
&mbmi->ref_mvs[ref_frame][0].as_mv,
cpi->common.allow_high_precision_mv,
x->errorperbit,
&cpi->fn_ptr[bsize],
cpi->sf.mv.subpel_force_stop,
cpi->sf.mv.subpel_iters_per_step,
cond_cost_list(cpi, cost_list),
x->nmvjointcost, x->mvcost,
&dummy_dist,
&x->pred_sse[ref_frame], NULL, 0, 0);
xd->mi[0].bmi[i].as_mv[0].as_mv = tmp_mv;
}
vp9_build_inter_predictor(pd->pre[0].buf, pd->pre[0].stride,
pd->dst.buf, pd->dst.stride,
&xd->mi[0].bmi[i].as_mv[0].as_mv,
&xd->block_refs[0]->sf,
4 * num_4x4_blocks_wide,
4 * num_4x4_blocks_high, 0,
vp9_get_interp_kernel(mbmi->interp_filter),
MV_PRECISION_Q3,
mi_col * MI_SIZE + 4 * (i & 0x01),
mi_row * MI_SIZE + 4 * (i >> 1));
model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc.rate, &this_rdc.dist,
&var_y, &sse_y);
this_rdc.rate += b_rate;
this_rdc.dist += b_dist;
this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
this_rdc.rate, this_rdc.dist);
if (this_rdc.rdcost < b_best_rd) {
b_best_rd = this_rdc.rdcost;
bsi[ref_frame][i].as_mode = this_mode;
bsi[ref_frame][i].as_mv[0].as_mv = xd->mi[0].bmi[i].as_mv[0].as_mv;
}
} // mode search
// restore source and prediction buffer pointers.
p->src = orig_src;
pd->pre[0] = orig_pre[0];
pd->dst = orig_dst;
this_rd += b_best_rd;
xd->mi[0].bmi[i] = bsi[ref_frame][i];
if (num_4x4_blocks_wide > 1)
xd->mi[0].bmi[i + 1] = xd->mi[0].bmi[i];
if (num_4x4_blocks_high > 1)
xd->mi[0].bmi[i + 2] = xd->mi[0].bmi[i];
}
} // loop through sub8x8 blocks
if (this_rd < best_rd) {
this_rd = best_rd;
best_ref_frame = ref_frame;
}
} // reference frames
mbmi->tx_size = TX_4X4;
mbmi->ref_frame[0] = best_ref_frame;
for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
const int block = idy * 2 + idx;
xd->mi[0].bmi[block] = bsi[best_ref_frame][block];
if (num_4x4_blocks_wide > 1)
xd->mi[0].bmi[block + 1] = bsi[best_ref_frame][block];
if (num_4x4_blocks_high > 1)
xd->mi[0].bmi[block + 2] = bsi[best_ref_frame][block];
}
}
mbmi->mode = xd->mi[0].bmi[3].as_mode;
ctx->mic = *(xd->mi[0].src_mi);
ctx->skip_txfm[0] = 0;
ctx->skip = 0;
// Dummy assignment for speed -5. No effect in speed -6.
rd_cost->rdcost = best_rd;
}
......@@ -26,6 +26,12 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
BLOCK_SIZE bsize,
PICK_MODE_CONTEXT *ctx);
void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x,
TileDataEnc *tile_data,
int mi_row, int mi_col, RD_COST *rd_cost,
BLOCK_SIZE bsize,
PICK_MODE_CONTEXT *ctx);
#ifdef __cplusplus
} // extern "C"
#endif
......
......@@ -516,6 +516,20 @@ void vp9_setup_pred_block(const MACROBLOCKD *xd,
}
}
int vp9_raster_block_offset(BLOCK_SIZE plane_bsize,
int raster_block, int stride) {
const int bw = b_width_log2_lookup[plane_bsize];
const int y = 4 * (raster_block >> bw);
const int x = 4 * (raster_block & ((1 << bw) - 1));
return y * stride + x;
}
int16_t* vp9_raster_block_offset_int16(BLOCK_SIZE plane_bsize,
int raster_block, int16_t *base) {
const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
return base + vp9_raster_block_offset(plane_bsize, raster_block, stride);
}
const YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi,
int ref_frame) {
const VP9_COMMON *const cm = &cpi->common;
......
......@@ -141,6 +141,12 @@ void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n,
int vp9_get_switchable_rate(const struct VP9_COMP *cpi,
const MACROBLOCKD *const xd);
int vp9_raster_block_offset(BLOCK_SIZE plane_bsize,
int raster_block, int stride);
int16_t* vp9_raster_block_offset_int16(BLOCK_SIZE plane_bsize,
int raster_block, int16_t *base);
const YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const struct VP9_COMP *cpi,
int ref_frame);
......
......@@ -129,19 +129,6 @@ static const REF_DEFINITION vp9_ref_order[MAX_REFS] = {
{{INTRA_FRAME, NONE}},
};
static int raster_block_offset(BLOCK_SIZE plane_bsize,
int raster_block, int stride) {
const int bw = b_width_log2_lookup[plane_bsize];
const int y = 4 * (raster_block >> bw);
const int x = 4 * (raster_block & ((1 << bw) - 1));
return y * stride + x;
}
static int16_t* raster_block_offset_int16(BLOCK_SIZE plane_bsize,
int raster_block, int16_t *base) {
const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
return base + raster_block_offset(plane_bsize, raster_block, stride);
}
static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
int m, int n, int min_plane, int max_plane) {
int i;
......@@ -773,10 +760,10 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
struct macroblockd_plane *pd = &xd->plane[0];
const int src_stride = p->src.stride;
const int dst_stride = pd->dst.stride;
const uint8_t *src_init = &p->src.buf[raster_block_offset(BLOCK_8X8, ib,
src_stride)];
uint8_t *dst_init = &pd->dst.buf[raster_block_offset(BLOCK_8X8, ib,
dst_stride)];
const uint8_t *src_init = &p->src.buf[vp9_raster_block_offset(BLOCK_8X8, ib,
src_stride)];
uint8_t *dst_init = &pd->dst.buf[vp9_raster_block_offset(BLOCK_8X8, ib,
dst_stride)];
ENTROPY_CONTEXT ta[2], tempa[2];
ENTROPY_CONTEXT tl[2], templ[2];
......@@ -820,8 +807,9 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
const int block = ib + idy * 2 + idx;
const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
int16_t *const src_diff = raster_block_offset_int16(BLOCK_8X8, block,
p->src_diff);
int16_t *const src_diff = vp9_raster_block_offset_int16(BLOCK_8X8,
block,
p->src_diff);
tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
xd->mi[0].src_mi->bmi[block].as_mode = mode;
vp9_predict_intra_block(xd, block, 1,
......@@ -920,8 +908,8 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib,
const int block = ib + idy * 2 + idx;
const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
int16_t *const src_diff = raster_block_offset_int16(BLOCK_8X8, block,
p->src_diff);
int16_t *const src_diff =
vp9_raster_block_offset_int16(BLOCK_8X8, block, p->src_diff);
tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
xd->mi[0].src_mi->bmi[block].as_mode = mode;
vp9_predict_intra_block(xd, block, 1,
......@@ -1353,10 +1341,10 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
const int height = 4 * num_4x4_blocks_high_lookup[plane_bsize];
int idx, idy;
const uint8_t *const src = &p->src.buf[raster_block_offset(BLOCK_8X8, i,
p->src.stride)];
uint8_t *const dst = &pd->dst.buf[raster_block_offset(BLOCK_8X8, i,
pd->dst.stride)];
const uint8_t *const src =
&p->src.buf[vp9_raster_block_offset(BLOCK_8X8, i, p->src.stride)];
uint8_t *const dst = &pd->dst.buf[vp9_raster_block_offset(BLOCK_8X8, i,
pd->dst.stride)];
int64_t thisdistortion = 0, thissse = 0;
int thisrate = 0, ref;
const scan_order *so = &vp9_default_scan_orders[TX_4X4];
......@@ -1364,7 +1352,7 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
const InterpKernel *kernel = vp9_get_interp_kernel(mi->mbmi.interp_filter);
for (ref = 0; ref < 1 + is_compound; ++ref) {
const uint8_t *pre = &pd->pre[ref].buf[raster_block_offset(BLOCK_8X8, i,
const uint8_t *pre = &pd->pre[ref].buf[vp9_raster_block_offset(BLOCK_8X8, i,
pd->pre[ref].stride)];
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
......@@ -1398,17 +1386,17 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
vp9_highbd_subtract_block(
height, width, raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 8,
src, p->src.stride, dst, pd->dst.stride, xd->bd);
height, width, vp9_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff),
8, src, p->src.stride, dst, pd->dst.stride, xd->bd);
} else {
vp9_subtract_block(
height, width, raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 8,
src, p->src.stride, dst, pd->dst.stride);
height, width, vp9_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff),
8, src, p->src.stride, dst, pd->dst.stride);
}
#else
vp9_subtract_block(height, width,
raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 8,
src, p->src.stride, dst, pd->dst.stride);
vp9_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff),
8, src, p->src.stride, dst, pd->dst.stride);
#endif // CONFIG_VP9_HIGHBITDEPTH
k = i;
......@@ -1419,7 +1407,7 @@ static int64_t encode_inter_mb_segment(VP9_COMP *cpi,
k += (idy * 2 + idx);
coeff = BLOCK_OFFSET(p->coeff, k);
x->fwd_txm4x4(raster_block_offset_int16(BLOCK_8X8, k, p->src_diff),
x->fwd_txm4x4(vp9_raster_block_offset_int16(BLOCK_8X8, k, p->src_diff),
coeff, 8);
vp9_regular_quantize_b_4x4(x, 0, k, so->scan, so->iscan);
#if CONFIG_VP9_HIGHBITDEPTH
......@@ -1492,13 +1480,14 @@ static INLINE void mi_buf_shift(MACROBLOCK *x, int i) {
struct macroblock_plane *const p = &x->plane[0];
struct macroblockd_plane *const pd = &x->e_mbd.plane[0];
p->src.buf = &p->src.buf[raster_block_offset(BLOCK_8X8, i, p->src.stride)];
p->src.buf = &p->src.buf[vp9_raster_block_offset(BLOCK_8X8, i,
p->src.stride)];
assert(((intptr_t)pd->pre[0].buf & 0x7) == 0);
pd->pre[0].buf = &pd->pre[0].buf[raster_block_offset(BLOCK_8X8, i,
pd->pre[0].stride)];
pd->pre[0].buf = &pd->pre[0].buf[vp9_raster_block_offset(BLOCK_8X8, i,
pd->pre[0].stride)];
if (has_second_ref(mbmi))
pd->pre[1].buf = &pd->pre[1].buf[raster_block_offset(BLOCK_8X8, i,
pd->pre[1].stride)];
pd->pre[1].buf = &pd->pre[1].buf[vp9_raster_block_offset(BLOCK_8X8, i,
pd->pre[1].stride)];
}
static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment