Commit 74593c1e authored by Pengchong Jin's avatar Pengchong Jin

Directly split the block in partition search

This patch allows the encoder to directly split the block
in partition search, therefore skip searching NONE. It
computes a score which measures whether 16x16 motion vectors
from the first pass in the current block are consistent with
each others. If they are inconsistent and we have enough Q
to encode, split the block directly, and skip searching NONE.

This feature is under flag CONFIG_FP_MB_STATS. In speed 2,
it further gives a speedup of 3-8% on sample yt clips as
compared to the previous version under the same flag. Overall,
the features under the flag will give 7-15% on typical yt
clips at up to 6000kbps data rate. The speedup at very high
data rate is not significant.

For hard stdhd clips:
park_joy_1080p @ 15000kbps:       504541ms -> 506293ms (-0.35%)
pedestrian_area_1080p @ 2000kbps: 326610ms -> 290090ms (+11.2%)

The compression performance using the features under the flag:
derf: -0.068%
yt:   -0.189%
hd:   -0.318%
stdhd:-0.183%

To use the feature, set CONFIG_FP_MB_STATS and turn on
cpi->use_fp_mb_stats.

Change-Id: Iad58a2966515c8861aa9eb211565b1864048d47f
parent 5971e898
...@@ -1953,6 +1953,42 @@ const int num_16x16_blocks_high_lookup[BLOCK_SIZES] = ...@@ -1953,6 +1953,42 @@ const int num_16x16_blocks_high_lookup[BLOCK_SIZES] =
{1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 4, 2, 4}; {1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 4, 2, 4};
const int qindex_skip_threshold_lookup[BLOCK_SIZES] = const int qindex_skip_threshold_lookup[BLOCK_SIZES] =
{0, 10, 10, 30, 40, 40, 60, 80, 80, 90, 100, 100, 120}; {0, 10, 10, 30, 40, 40, 60, 80, 80, 90, 100, 100, 120};
const int qindex_split_threshold_lookup[BLOCK_SIZES] =
{0, 3, 3, 7, 15, 15, 30, 40, 40, 60, 80, 80, 120};
const int complexity_16x16_blocks_threshold[BLOCK_SIZES] =
{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 4, 6};
typedef enum {
MV_ZERO = 0,
MV_LEFT = 1,
MV_UP = 2,
MV_RIGHT = 3,
MV_DOWN = 4,
MV_INVALID
} MOTION_DIRECTION;
static INLINE MOTION_DIRECTION get_motion_direction_fp(uint8_t fp_byte) {
if (fp_byte & FPMB_MOTION_ZERO_MASK) {
return MV_ZERO;
} else if (fp_byte & FPMB_MOTION_LEFT_MASK) {
return MV_LEFT;
} else if (fp_byte & FPMB_MOTION_RIGHT_MASK) {
return MV_RIGHT;
} else if (fp_byte & FPMB_MOTION_UP_MASK) {
return MV_UP;
} else {
return MV_DOWN;
}
}
static INLINE int get_motion_inconsistency(MOTION_DIRECTION this_mv,
MOTION_DIRECTION that_mv) {
if (this_mv == that_mv) {
return 0;
} else {
return abs(this_mv - that_mv) == 2 ? 2 : 1;
}
}
#endif #endif
// TODO(jingning,jimbankoski,rbultje): properly skip partition types that are // TODO(jingning,jimbankoski,rbultje): properly skip partition types that are
...@@ -1978,6 +2014,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, ...@@ -1978,6 +2014,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
int64_t sum_rd = 0; int64_t sum_rd = 0;
int do_split = bsize >= BLOCK_8X8; int do_split = bsize >= BLOCK_8X8;
int do_rect = 1; int do_rect = 1;
// Override skipping rectangular partition operations for edge blocks // Override skipping rectangular partition operations for edge blocks
const int force_horz_split = (mi_row + mi_step >= cm->mi_rows); const int force_horz_split = (mi_row + mi_step >= cm->mi_rows);
const int force_vert_split = (mi_col + mi_step >= cm->mi_cols); const int force_vert_split = (mi_col + mi_step >= cm->mi_cols);
...@@ -1987,6 +2024,11 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, ...@@ -1987,6 +2024,11 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
BLOCK_SIZE min_size = cpi->sf.min_partition_size; BLOCK_SIZE min_size = cpi->sf.min_partition_size;
BLOCK_SIZE max_size = cpi->sf.max_partition_size; BLOCK_SIZE max_size = cpi->sf.max_partition_size;
#if CONFIG_FP_MB_STATS
unsigned int src_diff_var = UINT_MAX;
int none_complexity = 0;
#endif
int partition_none_allowed = !force_horz_split && !force_vert_split; int partition_none_allowed = !force_horz_split && !force_vert_split;
int partition_horz_allowed = !force_vert_split && yss <= xss && int partition_horz_allowed = !force_vert_split && yss <= xss &&
bsize >= BLOCK_8X8; bsize >= BLOCK_8X8;
...@@ -2038,6 +2080,65 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, ...@@ -2038,6 +2080,65 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
} }
} }
#if CONFIG_FP_MB_STATS
if (cpi->use_fp_mb_stats) {
set_offsets(cpi, tile, mi_row, mi_col, bsize);
src_diff_var = get_sby_perpixel_diff_variance(cpi, &cpi->mb.plane[0].src,
mi_row, mi_col, bsize);
}
#endif
#if CONFIG_FP_MB_STATS
// Decide whether we shall split directly and skip searching NONE by using
// the first pass block statistics
if (cpi->use_fp_mb_stats && bsize >= BLOCK_32X32 && do_split &&
partition_none_allowed && src_diff_var > 4 &&
cm->base_qindex < qindex_split_threshold_lookup[bsize]) {
int mb_row = mi_row >> 1;
int mb_col = mi_col >> 1;
int mb_row_end =
MIN(mb_row + num_16x16_blocks_high_lookup[bsize], cm->mb_rows);
int mb_col_end =
MIN(mb_col + num_16x16_blocks_wide_lookup[bsize], cm->mb_cols);
int r, c;
// compute a complexity measure, basically measure inconsistency of motion
// vectors obtained from the first pass in the current block
for (r = mb_row; r < mb_row_end ; r++) {
for (c = mb_col; c < mb_col_end; c++) {
const int mb_index = r * cm->mb_cols + c;
MOTION_DIRECTION this_mv;
MOTION_DIRECTION right_mv;
MOTION_DIRECTION bottom_mv;
this_mv =
get_motion_direction_fp(cpi->twopass.this_frame_mb_stats[mb_index]);
// to its right
if (c != mb_col_end - 1) {
right_mv = get_motion_direction_fp(
cpi->twopass.this_frame_mb_stats[mb_index + 1]);
none_complexity += get_motion_inconsistency(this_mv, right_mv);
}
// to its bottom
if (r != mb_row_end - 1) {
bottom_mv = get_motion_direction_fp(
cpi->twopass.this_frame_mb_stats[mb_index + cm->mb_cols]);
none_complexity += get_motion_inconsistency(this_mv, bottom_mv);
}
// do not count its left and top neighbors to avoid double counting
}
}
if (none_complexity > complexity_16x16_blocks_threshold[bsize]) {
partition_none_allowed = 0;
}
}
#endif
// PARTITION_NONE // PARTITION_NONE
if (partition_none_allowed) { if (partition_none_allowed) {
rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &this_rate, &this_dist, bsize, rd_pick_sb_modes(cpi, tile, mi_row, mi_col, &this_rate, &this_dist, bsize,
...@@ -2048,6 +2149,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, ...@@ -2048,6 +2149,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
this_rate += cpi->partition_cost[pl][PARTITION_NONE]; this_rate += cpi->partition_cost[pl][PARTITION_NONE];
} }
sum_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_dist); sum_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_dist);
if (sum_rd < best_rd) { if (sum_rd < best_rd) {
int64_t stop_thresh = 4096; int64_t stop_thresh = 4096;
int64_t stop_thresh_rd; int64_t stop_thresh_rd;
...@@ -2078,7 +2180,6 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, ...@@ -2078,7 +2180,6 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
// stop further splitting in RD optimization // stop further splitting in RD optimization
if (cpi->use_fp_mb_stats && do_split != 0 && if (cpi->use_fp_mb_stats && do_split != 0 &&
cm->base_qindex > qindex_skip_threshold_lookup[bsize]) { cm->base_qindex > qindex_skip_threshold_lookup[bsize]) {
VP9_COMMON *cm = &cpi->common;
int mb_row = mi_row >> 1; int mb_row = mi_row >> 1;
int mb_col = mi_col >> 1; int mb_col = mi_col >> 1;
int mb_row_end = int mb_row_end =
...@@ -2104,11 +2205,12 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, ...@@ -2104,11 +2205,12 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
} }
} }
if (skip) { if (skip) {
unsigned int var; if (src_diff_var == UINT_MAX) {
set_offsets(cpi, tile, mi_row, mi_col, bsize); set_offsets(cpi, tile, mi_row, mi_col, bsize);
var = get_sby_perpixel_diff_variance(cpi, &cpi->mb.plane[0].src, src_diff_var = get_sby_perpixel_diff_variance(
mi_row, mi_col, bsize); cpi, &cpi->mb.plane[0].src, mi_row, mi_col, bsize);
if (var < 8) { }
if (src_diff_var < 8) {
do_split = 0; do_split = 0;
do_rect = 0; do_rect = 0;
} }
...@@ -2171,6 +2273,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, ...@@ -2171,6 +2273,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
pl = partition_plane_context(xd, mi_row, mi_col, bsize); pl = partition_plane_context(xd, mi_row, mi_col, bsize);
sum_rate += cpi->partition_cost[pl][PARTITION_SPLIT]; sum_rate += cpi->partition_cost[pl][PARTITION_SPLIT];
sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist);
if (sum_rd < best_rd) { if (sum_rd < best_rd) {
best_rate = sum_rate; best_rate = sum_rate;
best_dist = sum_dist; best_dist = sum_dist;
...@@ -2283,6 +2386,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, ...@@ -2283,6 +2386,7 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile,
} }
restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize);
} }
// TODO(jbb): This code added so that we avoid static analysis // TODO(jbb): This code added so that we avoid static analysis
// warning related to the fact that best_rd isn't used after this // warning related to the fact that best_rd isn't used after this
// point. This code should be refactored so that the duplicate // point. This code should be refactored so that the duplicate
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment