Commit a0043c6d authored by Yunqing Wang's avatar Yunqing Wang
Browse files

Enhance the transform skipping decision-making in non-rd mode

For large partition blocks(block_size > 32x32), the variance
calculation is modified so that every 8x8 block's variance
is stored during the calculation, which is used in the
following transform skipping test. Also, the variance for
every tx block is calculated. The skipping test checks all tx
blocks in the partition, and sets the skip flag only if all tx
blocks are skippable. If the skip flag of Y plane is 1, a
quick evaluation is done on UV planes. If the current partition
block is skippable in YUV planes, the mode search checks fewer
inter modes and doesn't check intra modes.

The rtc set borg test(at speed 6) showed that:
Overall psnr: -0.527%; Avg psnr: -0.510%; ssim: -0.573%.
Average single-thread speedup on rtc set was 3.5%.
For 720p clips, more speedups were seen.
gipsrecmotion: 13%
gipsrestat: 12%
vidyo: 5 - 9%
dark: 15%
niklas: 6%

Change-Id: I8d8ebec0cb305f1de016516400bf007c3042666e
parent fc981147
......@@ -202,6 +202,248 @@ static int combined_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
return rv;
}
static void block_variance(const uint8_t *src, int src_stride,
const uint8_t *ref, int ref_stride,
int w, int h, unsigned int *sse, int *sum,
int block_size, unsigned int *sse8x8,
int *sum8x8, unsigned int *var8x8) {
int i, j, k = 0;
*sse = 0;
*sum = 0;
for (i = 0; i < h; i += block_size) {
for (j = 0; j < w; j += block_size) {
vp9_get8x8var(src + src_stride * i + j, src_stride,
ref + ref_stride * i + j, ref_stride,
&sse8x8[k], &sum8x8[k]);
*sse += sse8x8[k];
*sum += sum8x8[k];
var8x8[k] = sse8x8[k] - (((unsigned int)sum8x8[k] * sum8x8[k]) >> 6);
k++;
}
}
}
static void calculate_variance(int bw, int bh, TX_SIZE tx_size,
unsigned int *sse_i, int *sum_i,
unsigned int *var_o, unsigned int *sse_o,
int *sum_o) {
const BLOCK_SIZE unit_size = txsize_to_bsize[tx_size];
const int nw = 1 << (bw - b_width_log2_lookup[unit_size]);
const int nh = 1 << (bh - b_height_log2_lookup[unit_size]);
int i, j, k = 0;
for (i = 0; i < nh; i += 2) {
for (j = 0; j < nw; j += 2) {
sse_o[k] = sse_i[i * nw + j] + sse_i[i * nw + j + 1] +
sse_i[(i + 1) * nw + j] + sse_i[(i + 1) * nw + j + 1];
sum_o[k] = sum_i[i * nw + j] + sum_i[i * nw + j + 1] +
sum_i[(i + 1) * nw + j] + sum_i[(i + 1) * nw + j + 1];
var_o[k] = sse_o[k] - (((unsigned int)sum_o[k] * sum_o[k]) >>
(b_width_log2_lookup[unit_size] +
b_height_log2_lookup[unit_size] + 6));
k++;
}
}
}
static void model_rd_for_sb_y_large(VP9_COMP *cpi, BLOCK_SIZE bsize,
MACROBLOCK *x, MACROBLOCKD *xd,
int *out_rate_sum, int64_t *out_dist_sum,
unsigned int *var_y, unsigned int *sse_y,
int mi_row, int mi_col, int *early_term) {
// Note our transform coeffs are 8 times an orthogonal transform.
// Hence quantizer step is also 8 times. To get effective quantizer
// we need to divide by 8 before sending to modeling function.
unsigned int sse;
int rate;
int64_t dist;
struct macroblock_plane *const p = &x->plane[0];
struct macroblockd_plane *const pd = &xd->plane[0];
const uint32_t dc_quant = pd->dequant[0];
const uint32_t ac_quant = pd->dequant[1];
const int64_t dc_thr = dc_quant * dc_quant >> 6;
const int64_t ac_thr = ac_quant * ac_quant >> 6;
unsigned int var;
int sum;
int skip_dc = 0;
const int bw = b_width_log2_lookup[bsize];
const int bh = b_height_log2_lookup[bsize];
const int num8x8 = 1 << (bw + bh - 2);
unsigned int sse8x8[64] = {0};
int sum8x8[64] = {0};
unsigned int var8x8[64] = {0};
TX_SIZE tx_size;
int i, k;
// Calculate variance for whole partition, and also save 8x8 blocks' variance
// to be used in following transform skipping test.
block_variance(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride,
4 << bw, 4 << bh, &sse, &sum, 8, sse8x8, sum8x8, var8x8);
var = sse - (((int64_t)sum * sum) >> (bw + bh + 4));
*var_y = var;
*sse_y = sse;
if (cpi->common.tx_mode == TX_MODE_SELECT) {
if (sse > (var << 2))
tx_size = MIN(max_txsize_lookup[bsize],
tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
else
tx_size = TX_8X8;
if (cpi->sf.partition_search_type == VAR_BASED_PARTITION) {
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&
cyclic_refresh_segment_id_boosted(xd->mi[0].src_mi->mbmi.segment_id))
tx_size = TX_8X8;
else if (tx_size > TX_16X16)
tx_size = TX_16X16;
}
} else {
tx_size = MIN(max_txsize_lookup[bsize],
tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
}
assert(tx_size >= TX_8X8);
xd->mi[0].src_mi->mbmi.tx_size = tx_size;
// Evaluate if the partition block is a skippable block in Y plane.
{
unsigned int sse16x16[16] = {0};
int sum16x16[16] = {0};
unsigned int var16x16[16] = {0};
const int num16x16 = num8x8 >> 2;
unsigned int sse32x32[4] = {0};
int sum32x32[4] = {0};
unsigned int var32x32[4] = {0};
const int num32x32 = num8x8 >> 4;
int ac_test = 1;
int dc_test = 1;
const int num = (tx_size == TX_8X8) ? num8x8 :
((tx_size == TX_16X16) ? num16x16 : num32x32);
const unsigned int *sse_tx = (tx_size == TX_8X8) ? sse8x8 :
((tx_size == TX_16X16) ? sse16x16 : sse32x32);
const unsigned int *var_tx = (tx_size == TX_8X8) ? var8x8 :
((tx_size == TX_16X16) ? var16x16 : var32x32);
// Calculate variance if tx_size > TX_8X8
if (tx_size >= TX_16X16)
calculate_variance(bw, bh, TX_8X8, sse8x8, sum8x8, var16x16, sse16x16,
sum16x16);
if (tx_size == TX_32X32)
calculate_variance(bw, bh, TX_16X16, sse16x16, sum16x16, var32x32,
sse32x32, sum32x32);
// Skipping test
x->skip_txfm[0] = 0;
for (k = 0; k < num; k++)
// Check if all ac coefficients can be quantized to zero.
if (!(var_tx[k] < ac_thr || var == 0)) {
ac_test = 0;
break;
}
for (k = 0; k < num; k++)
// Check if dc coefficient can be quantized to zero.
if (!(sse_tx[k] - var_tx[k] < dc_thr || sse == var)) {
dc_test = 0;
break;
}
if (ac_test) {
x->skip_txfm[0] = 2;
if (dc_test)
x->skip_txfm[0] = 1;
} else if (dc_test) {
skip_dc = 1;
}
}
if (x->skip_txfm[0] == 1) {
int skip_uv[2] = {0};
unsigned int var_uv[2];
unsigned int sse_uv[2];
*out_rate_sum = 0;
*out_dist_sum = sse << 4;
// Transform skipping test in UV planes.
for (i = 1; i <= 2; i++) {
struct macroblock_plane *const p = &x->plane[i];
struct macroblockd_plane *const pd = &xd->plane[i];
const TX_SIZE uv_tx_size = get_uv_tx_size(&xd->mi[0].src_mi->mbmi, pd);
const BLOCK_SIZE unit_size = txsize_to_bsize[uv_tx_size];
const int sf = (bw - b_width_log2_lookup[unit_size]) +
(bh - b_height_log2_lookup[unit_size]);
const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
const uint32_t uv_dc_thr = pd->dequant[0] * pd->dequant[0] >> (6 - sf);
const uint32_t uv_ac_thr = pd->dequant[1] * pd->dequant[1] >> (6 - sf);
int j = i - 1;
vp9_build_inter_predictors_sbp(xd, mi_row, mi_col, bsize, i);
var_uv[j] = cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
pd->dst.buf, pd->dst.stride, &sse_uv[j]);
if (var_uv[j] < uv_ac_thr || var_uv[j] == 0) {
if (sse_uv[j] - var_uv[j] < uv_dc_thr || sse_uv[j] == var_uv[j])
skip_uv[j] = 1;
}
}
// If the transform in YUV planes are skippable, the mode search checks
// fewer inter modes and doesn't check intra modes.
if (skip_uv[0] & skip_uv[1]) {
*early_term = 1;
}
return;
}
if (!skip_dc) {
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize],
dc_quant >> (xd->bd - 5), &rate, &dist);
} else {
vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize],
dc_quant >> 3, &rate, &dist);
}
#else
vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize],
dc_quant >> 3, &rate, &dist);
#endif // CONFIG_VP9_HIGHBITDEPTH
}
if (!skip_dc) {
*out_rate_sum = rate >> 1;
*out_dist_sum = dist << 3;
} else {
*out_rate_sum = 0;
*out_dist_sum = (sse - var) << 4;
}
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bsize],
ac_quant >> (xd->bd - 5), &rate, &dist);
} else {
vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bsize],
ac_quant >> 3, &rate, &dist);
}
#else
vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bsize],
ac_quant >> 3, &rate, &dist);
#endif // CONFIG_VP9_HIGHBITDEPTH
*out_rate_sum += rate;
*out_dist_sum += dist << 4;
}
static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize,
MACROBLOCK *x, MACROBLOCKD *xd,
int *out_rate_sum, int64_t *out_dist_sum,
......@@ -799,6 +1041,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
int ref_frame_skip_mask = 0;
int idx;
int best_pred_sad = INT_MAX;
int best_early_term = 0;
int ref_frame_cost[MAX_REF_FRAMES];
vp9_prob intra_inter_p = vp9_get_intra_inter_prob(cm, xd);
vp9_prob ref_single_p1 = vp9_get_pred_prob_single_ref_p1(cm, xd);
......@@ -906,6 +1149,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
PREDICTION_MODE this_mode = ref_mode_set[idx].pred_mode;
int64_t this_sse;
int is_skippable;
int this_early_term = 0;
if (!(cpi->sf.inter_mode_mask[bsize] & (1 << this_mode)))
continue;
......@@ -1066,8 +1310,18 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
} else {
mbmi->interp_filter = (filter_ref == SWITCHABLE) ? EIGHTTAP : filter_ref;
vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc.rate, &this_rdc.dist,
&var_y, &sse_y);
// For large partition blocks, extra testing is done.
if (bsize > BLOCK_32X32 && xd->mi[0].src_mi->mbmi.segment_id != 1 &&
cm->base_qindex) {
model_rd_for_sb_y_large(cpi, bsize, x, xd, &this_rdc.rate,
&this_rdc.dist, &var_y, &sse_y, mi_row, mi_col,
&this_early_term);
} else {
model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc.rate, &this_rdc.dist,
&var_y, &sse_y);
}
this_rdc.rate +=
cm->interp_filter == SWITCHABLE ?
vp9_get_switchable_rate(cpi, xd) : 0;
......@@ -1143,6 +1397,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
best_tx_size = mbmi->tx_size;
best_ref_frame = ref_frame;
best_mode_skip_txfm = x->skip_txfm[0];
best_early_term = this_early_term;
if (reuse_inter_pred) {
free_pred_buffer(best_pred);
......@@ -1155,6 +1410,13 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
if (x->skip)
break;
// If early termination flag is 1 and at least 2 modes are checked,
// the mode search is terminated.
if (best_early_term && idx > 0) {
x->skip = 1;
break;
}
}
mbmi->mode = best_mode;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment