diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index e29fbd64ad0ada1ae74d91e8dc4953d57acc7eee..435abddbe936e4d31d7286518bafcfd9d0226ff7 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -2010,7 +2010,6 @@ static void encode_rd_sb_row(VP9_COMP *cpi, const TileInfo *const tile, const int idx_str = cm->mode_info_stride * mi_row + mi_col; MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str; MODE_INFO **prev_mi_8x8 = cm->prev_mi_grid_visible + idx_str; - cpi->mb.source_variance = UINT_MAX; if (cpi->sf.partition_search_type == FIXED_PARTITION) { set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64); @@ -2492,7 +2491,7 @@ static void encode_frame_internal(VP9_COMP *cpi) { vp9_tile_init(&tile, cm, tile_row, tile_col); for (mi_row = tile.mi_row_start; mi_row < tile.mi_row_end; mi_row += MI_BLOCK_SIZE) { - if (cpi->sf.use_nonrd_pick_mode) + if (cpi->sf.use_nonrd_pick_mode && cm->frame_type != KEY_FRAME) encode_nonrd_sb_row(cpi, &tile, mi_row, &tp); else encode_rd_sb_row(cpi, &tile, mi_row, &tp); diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index 75122bc6fe5777eab3a9a8f9818a3c7fe43dde62..4df48998816f881fd094629f1a0e579488c9eb18 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -187,6 +187,38 @@ static void sub_pixel_motion_search(VP9_COMP *cpi, MACROBLOCK *x, } } +static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize, + MACROBLOCK *x, MACROBLOCKD *xd, + int *out_rate_sum, int64_t *out_dist_sum) { + // Note our transform coeffs are 8 times an orthogonal transform. + // Hence quantizer step is also 8 times. To get effective quantizer + // we need to divide by 8 before sending to modeling function. + int64_t rate_sum = 0; + int64_t dist_sum = 0; + unsigned int sse; + + + struct macroblock_plane *const p = &x->plane[0]; + struct macroblockd_plane *const pd = &xd->plane[0]; + const BLOCK_SIZE bs = get_plane_block_size(bsize, pd); + + (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, + pd->dst.buf, pd->dst.stride, &sse); + + { + int rate; + int64_t dist; + vp9_model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs], + pd->dequant[1] >> 3, &rate, &dist); + rate_sum += rate; + dist_sum += dist; + } + + + *out_rate_sum = (int)rate_sum; + *out_dist_sum = dist_sum << 4; +} + // TODO(jingning) placeholder for inter-frame non-RD mode decision. // this needs various further optimizations. to be continued.. int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, @@ -208,11 +240,13 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, VP9_ALT_FLAG }; int64_t best_rd = INT64_MAX; int64_t this_rd = INT64_MAX; - static const int cost[4]= { 0, 2, 4, 6 }; const int64_t inter_mode_thresh = 300; const int64_t intra_mode_cost = 50; + int rate = INT_MAX; + int64_t dist = INT64_MAX; + x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; x->skip = 0; @@ -245,7 +279,6 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } for (ref_frame = LAST_FRAME; ref_frame <= LAST_FRAME ; ++ref_frame) { - int rate_mv = 0; if (!(cpi->ref_frame_flags & flag_list[ref_frame])) continue; @@ -258,9 +291,8 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, mbmi->ref_frame[0] = ref_frame; for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) { - int rate = cost[INTER_OFFSET(this_mode)] - << (num_pels_log2_lookup[bsize] - 4); - int64_t dist; + int rate_mv = 0; + if (cpi->sf.disable_inter_mode_mask[bsize] & (1 << INTER_OFFSET(this_mode))) continue; @@ -280,22 +312,15 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, &frame_mv[NEWMV][ref_frame].as_mv); } - if (frame_mv[this_mode][ref_frame].as_int == 0) { - dist = x->mode_sad[ref_frame][INTER_OFFSET(ZEROMV)]; - } else if (this_mode != NEARESTMV && - frame_mv[NEARESTMV][ref_frame].as_int == - frame_mv[this_mode][ref_frame].as_int) { - dist = x->mode_sad[ref_frame][INTER_OFFSET(NEARESTMV)]; - } else { - mbmi->mode = this_mode; - mbmi->mv[0].as_int = frame_mv[this_mode][ref_frame].as_int; - vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); - dist = x->mode_sad[ref_frame][INTER_OFFSET(this_mode)] = - cpi->fn_ptr[bsize].sdf(p->src.buf, p->src.stride, - pd->dst.buf, pd->dst.stride, INT_MAX); - } + mbmi->mode = this_mode; + mbmi->mv[0].as_int = frame_mv[this_mode][ref_frame].as_int; + vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); - this_rd = rate + dist; + model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist); + rate += rate_mv; + rate += x->inter_mode_cost[mbmi->mode_context[ref_frame]] + [INTER_OFFSET(this_mode)]; + this_rd = RDCOST(x->rdmult, x->rddiv, rate, dist); if (this_rd < best_rd) { best_rd = this_rd; @@ -319,10 +344,9 @@ int64_t vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, &p->src.buf[0], p->src.stride, &pd->dst.buf[0], pd->dst.stride, 0, 0, 0); - this_rd = cpi->fn_ptr[bsize].sdf(p->src.buf, - p->src.stride, - pd->dst.buf, - pd->dst.stride, INT_MAX); + model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist); + rate += x->mbmode_cost[this_mode]; + this_rd = RDCOST(x->rdmult, x->rddiv, rate, dist); if (this_rd + intra_mode_cost < best_rd) { best_rd = this_rd; diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index e1230cabce3aef98ddada33382786bdc2e7657e8..37390e41ab22981eb8232b9f6c592f0c02712a5f 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -287,7 +287,7 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) { set_block_thresholds(cpi); - if (!cpi->sf.use_nonrd_pick_mode) { + if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME) { fill_token_costs(x->token_costs, cm->fc.coef_probs); for (i = 0; i < PARTITION_CONTEXTS; i++) @@ -295,7 +295,8 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi) { vp9_partition_tree); } - if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1) { + if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1 || + cm->frame_type == KEY_FRAME) { fill_mode_costs(cpi); if (!frame_is_intra_only(cm)) { @@ -394,9 +395,9 @@ static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) { *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10; } -static void model_rd_from_var_lapndz(unsigned int var, unsigned int n, - unsigned int qstep, int *rate, - int64_t *dist) { +void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n, + unsigned int qstep, int *rate, + int64_t *dist) { // This function models the rate and distortion for a Laplacian // source with given variance when quantized with a uniform quantizer // with given stepsize. The closed form expressions are in: @@ -458,8 +459,8 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize, } else { int rate; int64_t dist; - model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs], - pd->dequant[1] >> 3, &rate, &dist); + vp9_model_rd_from_var_lapndz(sse, 1 << num_pels_log2_lookup[bs], + pd->dequant[1] >> 3, &rate, &dist); rate_sum += rate; dist_sum += dist; } @@ -506,7 +507,8 @@ static void model_rd_for_sb_y_tx(VP9_COMP *cpi, BLOCK_SIZE bsize, &pd->dst.buf[j * pd->dst.stride + k], pd->dst.stride, &sse); // sse works better than var, since there is no dc prediction used - model_rd_from_var_lapndz(sse, t * t, pd->dequant[1] >> 3, &rate, &dist); + vp9_model_rd_from_var_lapndz(sse, t * t, pd->dequant[1] >> 3, + &rate, &dist); rate_sum += rate; dist_sum += dist; *out_skip &= (rate < 1024); diff --git a/vp9/encoder/vp9_rdopt.h b/vp9/encoder/vp9_rdopt.h index b5baa337d0331b692f7c8d2e2d65621778f5cd5e..7b88e582b20790c5c97cf2d07c46f038b2e3f61b 100644 --- a/vp9/encoder/vp9_rdopt.h +++ b/vp9/encoder/vp9_rdopt.h @@ -41,6 +41,10 @@ void vp9_initialize_rd_consts(VP9_COMP *cpi); void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex); +void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n, + unsigned int qstep, int *rate, + int64_t *dist); + void vp9_setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, const TileInfo *const tile, MV_REFERENCE_FRAME ref_frame,