Commit a517343c authored by Jingning Han's avatar Jingning Han
Browse files

Enable per transformed block zero coeffs forcing

This commit enables forcing all coefficients zero per transformed
block, when its rate-distortion cost is lower than regular coeff
quantization.

The overall performance improvement (including its parent patch on
calculating rd cost per transformed block) at speed 1:
derf:  0.298%
yt:    0.452%
hd:    0.741%
stdhd: 0.006%

Change-Id: I66005fe0fd7af192c3eba32e02fd6d77952accb5
parent 78fbb106
......@@ -34,6 +34,7 @@ typedef struct {
typedef struct {
MODE_INFO mic;
PARTITION_INFO partition_info;
unsigned char zcoeff_blk[256];
int skip;
int_mv best_ref_mv;
int_mv second_best_ref_mv;
......@@ -136,6 +137,7 @@ struct macroblock {
int mv_row_min;
int mv_row_max;
unsigned char zcoeff_blk[TX_SIZES][256];
int skip;
int encode_breakout;
......
......@@ -390,6 +390,9 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx,
}
x->skip = ctx->skip;
vpx_memcpy(x->zcoeff_blk[mbmi->tx_size], ctx->zcoeff_blk,
sizeof(ctx->zcoeff_blk));
if (!output_enabled)
return;
......@@ -2744,7 +2747,6 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled,
setup_pre_planes(xd, 1, second_ref_fb, mi_row, mi_col,
&xd->scale_factor[1]);
vp9_build_inter_predictors_sb(xd, mi_row, mi_col, MAX(bsize, BLOCK_8X8));
}
......
......@@ -482,6 +482,14 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
int16_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
uint8_t *const dst = raster_block_offset_uint8(plane_bsize, raster_block,
pd->dst.buf, pd->dst.stride);
// TODO(jingning): per transformed block zero forcing only enabled for
// luma component. will integrate chroma components as well.
if (x->zcoeff_blk[tx_size][block] && plane == 0) {
pd->eobs[block] = 0;
return;
}
vp9_xform_quant(plane, block, plane_bsize, tx_size, arg);
if (x->optimize)
......
......@@ -624,7 +624,12 @@ static void block_yrd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
rate_block(plane, block, plane_bsize, tx_size, args);
rd1 = RDCOST(x->rdmult, x->rddiv, args->rate[block], args->dist[block]);
rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse[block]);
// TODO(jingning): temporarily enabled only for luma component
rd = MIN(rd1, rd2);
if (plane == 0)
x->zcoeff_blk[tx_size][block] = rd1 > rd2;
args->this_rate += args->rate[block];
args->this_dist += args->dist[block];
args->this_sse += args->sse[block];
......@@ -2234,6 +2239,9 @@ static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
ctx->comp_pred_diff = (int)comp_pred_diff[COMP_PREDICTION_ONLY];
ctx->hybrid_pred_diff = (int)comp_pred_diff[HYBRID_PREDICTION];
vpx_memcpy(ctx->zcoeff_blk, x->zcoeff_blk[xd->this_mi->mbmi.tx_size],
sizeof(ctx->zcoeff_blk));
// FIXME(rbultje) does this memcpy the whole array? I believe sizeof()
// doesn't actually work this way
memcpy(ctx->tx_rd_diff, tx_size_diff, sizeof(ctx->tx_rd_diff));
......@@ -3153,8 +3161,11 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
const int bws = num_8x8_blocks_wide_lookup[bsize] / 2;
const int bhs = num_8x8_blocks_high_lookup[bsize] / 2;
int best_skip2 = 0;
unsigned char best_zcoeff_blk[256] = { 0 };
x->skip_encode = cpi->sf.skip_encode_frame && xd->q_index < QIDX_SKIP_THRESH;
vpx_memset(x->zcoeff_blk, 0, sizeof(x->zcoeff_blk));
vpx_memset(ctx->zcoeff_blk, 0, sizeof(ctx->zcoeff_blk));
for (i = 0; i < 4; i++) {
int j;
......@@ -3826,6 +3837,8 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
best_mbmode = *mbmi;
best_skip2 = this_skip2;
best_partition = *x->partition_info;
vpx_memcpy(best_zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
sizeof(best_zcoeff_blk));
if (this_mode == RD_I4X4_PRED || this_mode == RD_SPLITMV)
for (i = 0; i < 4; i++)
......@@ -4021,6 +4034,9 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x,
mbmi->mv[1].as_int = xd->this_mi->bmi[3].as_mv[1].as_int;
}
vpx_memcpy(x->zcoeff_blk[mbmi->tx_size], best_zcoeff_blk,
sizeof(best_zcoeff_blk));
for (i = 0; i < NB_PREDICTION_TYPES; ++i) {
if (best_pred_rd[i] == INT64_MAX)
best_pred_diff[i] = INT_MIN;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment