Commit d5be4a17 authored by Jacky Chen's avatar Jacky Chen Committed by Gerrit Code Review

Merge "vp9: Encoding cycle reduction for speed 8."

parents 19a4ce90 f9c05872
......@@ -146,9 +146,9 @@ struct macroblock {
uint8_t sb_is_skin;
// Used to save the status of whether a block has a low variance in
// choose_partitioning. 0 for 64x64, 1 2 for 64x32, 3 4 for 32x64, 5~8 for
// 32x32.
uint8_t variance_low[9];
// choose_partitioning. 0 for 64x64, 1~2 for 64x32, 3~4 for 32x64, 5~8 for
// 32x32, 9~24 for 16x16.
uint8_t variance_low[25];
void (*fwd_txm4x4)(const int16_t *input, tran_low_t *output, int stride);
void (*itxm_add)(const tran_low_t *input, uint8_t *dest, int stride, int eob);
......
......@@ -773,7 +773,7 @@ static int choose_partitioning(VP9_COMP *cpi,
}
}
for (i = 0; i < 9; i++) {
for (i = 0; i < 25; i++) {
x->variance_low[i] = 0;
}
......@@ -1083,28 +1083,53 @@ static int choose_partitioning(VP9_COMP *cpi,
}
if (cpi->sf.short_circuit_low_temp_var) {
// Set low variance flag, only for blocks >= 32x32 and if LAST_FRAME was
// selected.
if (ref_frame_partition == LAST_FRAME) {
int mv_thr = cm->width > 640 ? 8 : 4;
// Check temporal variance for bsize >= 16x16, if LAST_FRAME was selected
// and int_pro mv is small. If the temporal variance is small set the
// variance_low flag for the block. The variance threshold can be adjusted,
// the higher the more aggressive.
if (ref_frame_partition == LAST_FRAME &&
(cpi->sf.short_circuit_low_temp_var == 1 ||
(xd->mi[0]->mv[0].as_mv.col < mv_thr &&
xd->mi[0]->mv[0].as_mv.col > -mv_thr &&
xd->mi[0]->mv[0].as_mv.row < mv_thr &&
xd->mi[0]->mv[0].as_mv.row > -mv_thr))) {
if (xd->mi[0]->sb_type == BLOCK_64X64 &&
vt.part_variances.none.variance < (thresholds[0] >> 1)) {
x->variance_low[0] = 1;
} else if (xd->mi[0]->sb_type == BLOCK_64X32) {
if (vt.part_variances.horz[0].variance < (thresholds[0] >> 2))
x->variance_low[1] = 1;
if (vt.part_variances.horz[1].variance < (thresholds[0] >> 2))
x->variance_low[2] = 1;
for (j = 0; j < 2; j++) {
if (vt.part_variances.horz[j].variance < (thresholds[0] >> 2))
x->variance_low[j + 1] = 1;
}
} else if (xd->mi[0]->sb_type == BLOCK_32X64) {
if (vt.part_variances.vert[0].variance < (thresholds[0] >> 2))
x->variance_low[3] = 1;
if (vt.part_variances.vert[1].variance < (thresholds[0] >> 2))
x->variance_low[4] = 1;
for (j = 0; j < 2; j++) {
if (vt.part_variances.vert[j].variance < (thresholds[0] >> 2))
x->variance_low[j + 3] = 1;
}
} else {
// 32x32
for (i = 0; i < 4; i++) {
if (!force_split[i + 1] &&
vt.split[i].part_variances.none.variance < (thresholds[1] >> 1))
x->variance_low[i + 5] = 1;
if (!force_split[i + 1]) {
// 32x32
if (vt.split[i].part_variances.none.variance <
(thresholds[1] >> 1))
x->variance_low[i + 5] = 1;
} else if (cpi->sf.short_circuit_low_temp_var == 2) {
int idx[4] = {0, 4, xd->mi_stride << 2, (xd->mi_stride << 2) + 4};
const int idx_str = cm->mi_stride * mi_row + mi_col + idx[i];
MODE_INFO **this_mi = cm->mi_grid_visible + idx_str;
// For 32x16 and 16x32 blocks, the flag is set on each 16x16 block
// inside.
if ((*this_mi)->sb_type == BLOCK_16X16 ||
(*this_mi)->sb_type == BLOCK_32X16 ||
(*this_mi)->sb_type == BLOCK_16X32) {
for (j = 0; j < 4; j++) {
if (vt.split[i].split[j].part_variances.none.variance <
(thresholds[2] >> 8))
x->variance_low[(i << 2) + j + 9] = 1;
}
}
}
}
}
}
......
......@@ -40,6 +40,14 @@ typedef struct {
int in_use;
} PRED_BUFFER;
static const int pos_shift_16x16[4][4] = {
{9, 10, 13, 14},
{11, 12, 15, 16},
{17, 18, 21, 22},
{19, 20, 23, 24}
};
static int mv_refs_rt(VP9_COMP *cpi, const VP9_COMMON *cm,
const MACROBLOCK *x,
const MACROBLOCKD *xd,
......@@ -1274,6 +1282,8 @@ static INLINE int set_force_skip_low_temp_var(uint8_t *variance_low,
int mi_row, int mi_col,
BLOCK_SIZE bsize) {
int force_skip_low_temp_var = 0;
int i = (mi_row & 0x7) >> 1;
int j = (mi_col & 0x7) >> 1;
// Set force_skip_low_temp_var based on the block size and block offset.
if (bsize == BLOCK_64X64) {
force_skip_low_temp_var = variance_low[0];
......@@ -1299,6 +1309,19 @@ static INLINE int set_force_skip_low_temp_var(uint8_t *variance_low,
} else if ((mi_col & 0x7) && (mi_row & 0x7)) {
force_skip_low_temp_var = variance_low[8];
}
} else if (bsize == BLOCK_16X16) {
force_skip_low_temp_var = variance_low[pos_shift_16x16[i][j]];
} else if (bsize == BLOCK_32X16) {
// The col shift index for the second 16x16 block.
int j2 = ((mi_col + 2) & 0x7) >> 1;
// Only if each 16x16 block inside has low temporal variance.
force_skip_low_temp_var = variance_low[pos_shift_16x16[i][j]] &&
variance_low[pos_shift_16x16[i][j2]];
} else if (bsize == BLOCK_16X32) {
// The row shift index for the second 16x16 block.
int i2 = ((mi_row + 2) & 0x7) >> 1;
force_skip_low_temp_var = variance_low[pos_shift_16x16[i][j]] &&
variance_low[pos_shift_16x16[i2][j]];
}
return force_skip_low_temp_var;
}
......@@ -1503,6 +1526,12 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
continue;
}
if (cpi->sf.short_circuit_low_temp_var == 2 &&
force_skip_low_temp_var && ref_frame == LAST_FRAME &&
this_mode == NEWMV) {
continue;
}
if (cpi->use_svc) {
if (svc_force_zero_mode[ref_frame - 1] &&
frame_mv[this_mode][ref_frame].as_int != 0)
......@@ -1842,8 +1871,9 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
inter_mode_thresh = (inter_mode_thresh << 1) + inter_mode_thresh;
}
// Perform intra prediction search, if the best SAD is above a certain
// threshold. Skip intra prediction if force_skip_low_temp_var is set.
if (!force_skip_low_temp_var && perform_intra_pred &&
// threshold.
if ((!force_skip_low_temp_var || bsize < BLOCK_32X32) &&
perform_intra_pred &&
(best_rdc.rdcost == INT64_MAX ||
(!x->skip && best_rdc.rdcost > inter_mode_thresh &&
bsize <= cpi->sf.max_intra_bsize))) {
......
......@@ -429,7 +429,7 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
sf->mv.search_method = NSTEP;
sf->mv.reduce_first_step_size = 1;
sf->skip_encode_sb = 0;
if (!cpi->use_svc && cpi->oxcf.rc_mode == VPX_CBR && cpi->oxcf.pass == 0 &&
if (!cpi->use_svc && cpi->oxcf.rc_mode == VPX_CBR &&
content != VP9E_CONTENT_SCREEN) {
// Enable short circuit for low temporal variance.
sf->short_circuit_low_temp_var = 1;
......@@ -450,6 +450,17 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf,
sf->adaptive_rd_thresh = 4;
sf->mv.subpel_force_stop = (content == VP9E_CONTENT_SCREEN) ? 3 : 2;
sf->lpf_pick = LPF_PICK_MINIMAL_LPF;
// Only keep INTRA_DC mode for speed 8.
if (!is_keyframe) {
int i = 0;
for (i = 0; i < BLOCK_SIZES; ++i)
sf->intra_y_mode_bsize_mask[i] = INTRA_DC;
}
if (!cpi->use_svc && cpi->oxcf.rc_mode == VPX_CBR &&
content != VP9E_CONTENT_SCREEN) {
// More aggressive short circuit for speed 8.
sf->short_circuit_low_temp_var = 2;
}
}
}
......
......@@ -449,6 +449,10 @@ typedef struct SPEED_FEATURES {
// Skip a number of expensive mode evaluations for blocks with very low
// temporal variance.
// 1: Skip golden non-zeromv and ALL INTRA for bsize >= 32x32.
// 2: Skip golden non-zeromv and newmv-last for bsize >= 16x16, skip ALL
// INTRA for bsize >= 32x32 and vert/horz INTRA for bsize 16x16, 16x32 and
// 32x16.
int short_circuit_low_temp_var;
} SPEED_FEATURES;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment