Commit 2e7d3277 authored by Yunqing Wang's avatar Yunqing Wang Committed by Gerrit Code Review
Browse files

Merge "Use source frame difference to make partition decision"

parents a30ff345 4e66293f
......@@ -380,6 +380,10 @@ specialize qw/vp9_variance64x64/, "$sse2_x86inc", "$avx2_x86inc";
add_proto qw/unsigned int vp9_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_variance16x16 mmx/, "$sse2_x86inc", "$avx2_x86inc";
add_proto qw/void vp9_get_sse_sum_16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
specialize qw/vp9_get_sse_sum_16x16 sse2/;
$vp9_get_sse_sum_16x16_sse2=vp9_get16x16var_sse2;
add_proto qw/unsigned int vp9_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_variance16x8 mmx/, "$sse2_x86inc";
......
......@@ -1446,6 +1446,126 @@ static void copy_partitioning(VP9_COMMON *cm, MODE_INFO **mi_8x8,
}
}
const struct {
int row;
int col;
} coord_lookup[16] = {
// 32x32 index = 0
{0, 0}, {0, 2}, {2, 0}, {2, 2},
// 32x32 index = 1
{0, 4}, {0, 6}, {2, 4}, {2, 6},
// 32x32 index = 2
{4, 0}, {4, 2}, {6, 0}, {6, 2},
// 32x32 index = 3
{4, 4}, {4, 6}, {6, 4}, {6, 6},
};
static void set_source_var_based_partition(VP9_COMP *cpi,
const TileInfo *const tile,
MODE_INFO **mi_8x8,
int mi_row, int mi_col) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCK *x = &cpi->mb;
const int mis = cm->mode_info_stride;
int row8x8_remaining = tile->mi_row_end - mi_row;
int col8x8_remaining = tile->mi_col_end - mi_col;
int r, c;
MODE_INFO *mi_upper_left = cm->mi + mi_row * mis + mi_col;
assert((row8x8_remaining > 0) && (col8x8_remaining > 0));
// In-image SB64
if ((col8x8_remaining >= MI_BLOCK_SIZE) &&
(row8x8_remaining >= MI_BLOCK_SIZE)) {
const int src_stride = x->plane[0].src.stride;
const int pre_stride = cpi->Last_Source->y_stride;
const uint8_t *src = x->plane[0].src.buf;
const int pre_offset = (mi_row * MI_SIZE) * pre_stride +
(mi_col * MI_SIZE);
const uint8_t *pre_src = cpi->Last_Source->y_buffer + pre_offset;
const int thr_32x32 = cpi->sf.source_var_thresh;
const int thr_64x64 = thr_32x32 << 1;
int i, j;
int index;
diff d32[4];
int use16x16 = 0;
for (i = 0; i < 4; i++) {
diff d16[4];
for (j = 0; j < 4; j++) {
int b_mi_row = coord_lookup[i * 4 + j].row;
int b_mi_col = coord_lookup[i * 4 + j].col;
int b_offset = b_mi_row * MI_SIZE * src_stride +
b_mi_col * MI_SIZE;
vp9_get_sse_sum_16x16(src + b_offset,
src_stride,
pre_src + b_offset,
pre_stride, &d16[j].sse, &d16[j].sum);
d16[j].var = d16[j].sse -
(((uint32_t)d16[j].sum * d16[j].sum) >> 8);
index = b_mi_row * mis + b_mi_col;
mi_8x8[index] = mi_upper_left + index;
mi_8x8[index]->mbmi.sb_type = BLOCK_16X16;
// TODO(yunqingwang): If d16[j].var is very large, use 8x8 partition
// size to further improve quality.
}
if (d16[0].var < thr_32x32 && d16[1].var < thr_32x32 &&
d16[2].var < thr_32x32 && d16[3].var < thr_32x32) {
d32[i].sse = d16[0].sse;
d32[i].sum = d16[0].sum;
for (j = 1; j < 4; j++) {
d32[i].sse += d16[j].sse;
d32[i].sum += d16[j].sum;
}
d32[i].var = d32[i].sse - (((int64_t)d32[i].sum * d32[i].sum) >> 10);
index = coord_lookup[i*4].row * mis + coord_lookup[i*4].col;
mi_8x8[index] = mi_upper_left + index;
mi_8x8[index]->mbmi.sb_type = BLOCK_32X32;
if (!((cm->current_video_frame - 1) %
cpi->sf.search_type_check_frequency))
cpi->use_large_partition_rate += 1;
} else {
use16x16 = 1;
}
}
if (!use16x16) {
if (d32[0].var < thr_64x64 && d32[1].var < thr_64x64 &&
d32[2].var < thr_64x64 && d32[3].var < thr_64x64) {
mi_8x8[0] = mi_upper_left;
mi_8x8[0]->mbmi.sb_type = BLOCK_64X64;
}
}
} else { // partial in-image SB64
BLOCK_SIZE bsize = BLOCK_16X16;
int bh = num_8x8_blocks_high_lookup[bsize];
int bw = num_8x8_blocks_wide_lookup[bsize];
for (r = 0; r < MI_BLOCK_SIZE; r += bh) {
for (c = 0; c < MI_BLOCK_SIZE; c += bw) {
int index = r * mis + c;
// Find a partition size that fits
bsize = find_partition_size(bsize,
(row8x8_remaining - r),
(col8x8_remaining - c), &bh, &bw);
mi_8x8[index] = mi_upper_left + index;
mi_8x8[index]->mbmi.sb_type = bsize;
}
}
}
}
static int sb_has_motion(const VP9_COMMON *cm, MODE_INFO **prev_mi_8x8) {
const int mis = cm->mi_stride;
int block_row, block_col;
......@@ -3069,10 +3189,7 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
const int idx_str = cm->mi_stride * mi_row + mi_col;
MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str;
MODE_INFO **prev_mi_8x8 = cm->prev_mi_grid_visible + idx_str;
BLOCK_SIZE bsize = cpi->sf.partition_search_type == FIXED_PARTITION ?
cpi->sf.always_this_block_size :
get_nonrd_var_based_fixed_partition(cpi, mi_row, mi_col);
BLOCK_SIZE bsize;
cpi->mb.source_variance = UINT_MAX;
vp9_zero(cpi->mb.pred_mv);
......@@ -3084,8 +3201,17 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, const TileInfo *const tile,
nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
1, &dummy_rate, &dummy_dist);
break;
case SOURCE_VAR_BASED_PARTITION:
set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64);
set_source_var_based_partition(cpi, tile, mi_8x8, mi_row, mi_col);
nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
1, &dummy_rate, &dummy_dist);
break;
case VAR_BASED_FIXED_PARTITION:
case FIXED_PARTITION:
bsize = cpi->sf.partition_search_type == FIXED_PARTITION ?
cpi->sf.always_this_block_size :
get_nonrd_var_based_fixed_partition(cpi, mi_row, mi_col);
set_fixed_partitioning(cpi, tile, mi_8x8, mi_row, mi_col, bsize);
nonrd_use_partition(cpi, tile, mi_8x8, tp, mi_row, mi_col, BLOCK_64X64,
1, &dummy_rate, &dummy_dist);
......@@ -3158,6 +3284,29 @@ static void encode_frame_internal(VP9_COMP *cpi) {
p[i].eobs = ctx->eobs_pbuf[i][0];
}
vp9_zero(x->zcoeff_blk);
if (cpi->sf.partition_search_type == SOURCE_VAR_BASED_PARTITION &&
cm->current_video_frame > 0) {
int check_freq = cpi->sf.search_type_check_frequency;
if ((cm->current_video_frame - 1) % check_freq == 0) {
cpi->use_large_partition_rate = 0;
}
if ((cm->current_video_frame - 1) % check_freq == 1) {
const int mbs_in_b32x32 = 1 << ((b_width_log2_lookup[BLOCK_32X32] -
b_width_log2_lookup[BLOCK_16X16]) +
(b_height_log2_lookup[BLOCK_32X32] -
b_height_log2_lookup[BLOCK_16X16]));
cpi->use_large_partition_rate = cpi->use_large_partition_rate * 100 *
mbs_in_b32x32 / cm->MBs;
}
if ((cm->current_video_frame - 1) % check_freq >= 1) {
if (cpi->use_large_partition_rate < 15)
cpi->sf.partition_search_type = FIXED_PARTITION;
}
}
}
{
......
......@@ -20,6 +20,12 @@ struct macroblock;
struct yv12_buffer_config;
struct VP9_COMP;
typedef struct {
unsigned int sse;
int sum;
unsigned int var;
} diff;
void vp9_setup_src_planes(struct macroblock *x,
const struct yv12_buffer_config *src,
int mi_row, int mi_col);
......
......@@ -496,6 +496,8 @@ typedef struct VP9_COMP {
SVC svc;
int use_large_partition_rate;
#if CONFIG_MULTIPLE_ARF
// ARF tracking variables.
int multi_arf_enabled;
......
......@@ -265,7 +265,11 @@ static void set_rt_speed_feature(VP9_COMMON *cm, SPEED_FEATURES *sf,
}
if (speed >= 6) {
sf->partition_search_type = VAR_BASED_FIXED_PARTITION;
// Adaptively switch between SOURCE_VAR_BASED_PARTITION and FIXED_PARTITION.
sf->partition_search_type = SOURCE_VAR_BASED_PARTITION;
sf->search_type_check_frequency = 50;
sf->source_var_thresh = 360;
sf->use_nonrd_pick_mode = 1;
sf->search_method = FAST_DIAMOND;
}
......@@ -338,6 +342,8 @@ void vp9_set_speed_features(VP9_COMP *cpi) {
// This setting only takes effect when partition_search_type is set
// to FIXED_PARTITION.
sf->always_this_block_size = BLOCK_16X16;
sf->search_type_check_frequency = 50;
sf->source_var_thresh = 100;
// Recode loop tolerence %.
sf->recode_tolerance = 25;
......
......@@ -110,7 +110,10 @@ typedef enum {
// Use an arbitrary partitioning scheme based on source variance within
// a 64X64 SB
VAR_BASED_PARTITION
VAR_BASED_PARTITION,
// Use non-fixed partitions based on source variance
SOURCE_VAR_BASED_PARTITION
} PARTITION_SEARCH_TYPE;
typedef enum {
......@@ -335,6 +338,13 @@ typedef struct {
// used in inter frames.
// TODO(aconverse): Fold this into one of the other many mode skips
BLOCK_SIZE max_intra_bsize;
// The frequency that we check if SOURCE_VAR_BASED_PARTITION or
// FIXED_PARTITION search type should be used.
int search_type_check_frequency;
// The threshold used in SOURCE_VAR_BASED_PARTITION search type.
int source_var_thresh;
} SPEED_FEATURES;
struct VP9_COMP;
......
......@@ -417,6 +417,12 @@ unsigned int vp9_variance32x32_c(const uint8_t *src_ptr,
return (var - (((int64_t)avg * avg) >> 10));
}
void vp9_get_sse_sum_16x16_c(const uint8_t *src_ptr, int source_stride,
const uint8_t *ref_ptr, int ref_stride,
unsigned int *sse, int *sum) {
variance(src_ptr, source_stride, ref_ptr, ref_stride, 16, 16, sse, sum);
}
unsigned int vp9_variance16x16_c(const uint8_t *src_ptr,
int source_stride,
const uint8_t *ref_ptr,
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment