From 14fc50452d5be0c416e6f5466a90f98013d8ad80 Mon Sep 17 00:00:00 2001 From: Luc Trudeau Date: Fri, 16 Jun 2017 12:40:29 -0400 Subject: [PATCH] [CFL] RDO Loop Rework CfL performs an extra loop iteration during luma mode selection. Recent changes have broken the extra iteration. Remove previous approach. New approach adds the extra iteration right before uv parameter selection. Interesting fact, If the best luma intra mode already has worse RD performance than the best inter mode found so far (if any), then the entire chroma intra search is skipped, including the extra iteration. Results on Subset1 (compared to 3e18e4a with CfL) PSNR | PSNR Cb | PSNR Cr | PSNR HVS | SSIM | MS SSIM | CIEDE 2000 -0.3090 | -2.7271 | -2.3521 | -0.3369 | -0.3463 | -0.3525 | -1.1868 Change-Id: If67b0badd2c8ea25c61685483d39d622c1729b18 --- av1/encoder/encodeframe.c | 10 +++++----- av1/encoder/rdopt.c | 38 +++++++++++++++++++++----------------- 2 files changed, 26 insertions(+), 22 deletions(-) diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c index 7964e0349..1a9add153 100644 --- a/av1/encoder/encodeframe.c +++ b/av1/encoder/encodeframe.c @@ -1341,10 +1341,6 @@ static void rd_pick_sb_modes(const AV1_COMP *const cpi, TileDataEnc *tile_data, x->pvq_speed = 1; x->pvq_coded = 0; #endif -#if CONFIG_CFL - // Don't store luma during RDO (we will store the best mode later). - x->cfl_store_y = 0; -#endif set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); mbmi = &xd->mi[0]->mbmi; @@ -1353,6 +1349,10 @@ static void rd_pick_sb_modes(const AV1_COMP *const cpi, TileDataEnc *tile_data, mbmi->mi_row = mi_row; mbmi->mi_col = mi_col; #endif +#if CONFIG_CFL + // Don't store luma during RDO. Only store luma when best luma is known + x->cfl_store_y = 0; +#endif #if CONFIG_SUPERTX // We set tx_size here as skip blocks would otherwise not set it. // tx_size needs to be set at this point as supertx_enable in @@ -5653,7 +5653,7 @@ static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td, x->pvq_coded = (dry_run == OUTPUT_ENABLED) ? 1 : 0; #endif #if CONFIG_CFL - x->cfl_store_y = (dry_run == OUTPUT_ENABLED) ? 1 : 0; + x->cfl_store_y = 1; #endif if (!is_inter) { diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c index 4695f26af..34e439625 100644 --- a/av1/encoder/rdopt.c +++ b/av1/encoder/rdopt.c @@ -3871,16 +3871,6 @@ static int64_t rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x, od_encode_rollback(&x->daala_enc, &post_buf); #endif // CONFIG_PVQ -#if CONFIG_CFL - // Perform one extra txfm_rd_in_plane() call, this time with the best value so - // we can store reconstructed luma values - RD_STATS this_rd_stats; - x->cfl_store_y = 1; - txfm_rd_in_plane(x, cpi, &this_rd_stats, INT64_MAX, 0, bsize, - mic->mbmi.tx_size, cpi->sf.use_fast_coef_costing); - x->cfl_store_y = 0; -#endif - #if CONFIG_PALETTE if (try_palette) { rd_pick_palette_intra_sby(cpi, x, bsize, palette_y_mode_ctx, @@ -8718,6 +8708,7 @@ void av1_rd_pick_intra_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int64_t best_rd) { const AV1_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; struct macroblockd_plane *const pd = xd->plane; int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0; int y_skip = 0, uv_skip = 0; @@ -8726,11 +8717,11 @@ void av1_rd_pick_intra_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x, const int unify_bsize = CONFIG_CB4X4; ctx->skip = 0; - xd->mi[0]->mbmi.ref_frame[0] = INTRA_FRAME; - xd->mi[0]->mbmi.ref_frame[1] = NONE_FRAME; + mbmi->ref_frame[0] = INTRA_FRAME; + mbmi->ref_frame[1] = NONE_FRAME; #if CONFIG_INTRABC - xd->mi[0]->mbmi.use_intrabc = 0; - xd->mi[0]->mbmi.mv[0].as_int = 0; + mbmi->use_intrabc = 0; + mbmi->mv[0].as_int = 0; #endif // CONFIG_INTRABC const int64_t intra_yrd = @@ -8741,9 +8732,22 @@ void av1_rd_pick_intra_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x, &dist_y, &y_skip, best_rd); if (intra_yrd < best_rd) { - max_uv_tx_size = uv_txsize_lookup[bsize][xd->mi[0]->mbmi.tx_size] - [pd[1].subsampling_x][pd[1].subsampling_y]; - init_sbuv_mode(&xd->mi[0]->mbmi); +#if CONFIG_CFL + // Perform one extra txfm_rd_in_plane() call, this time with the best value + // so we can store reconstructed luma values + RD_STATS this_rd_stats; + + x->cfl_store_y = 1; + + txfm_rd_in_plane(x, cpi, &this_rd_stats, INT64_MAX, AOM_PLANE_Y, + mbmi->sb_type, mbmi->tx_size, + cpi->sf.use_fast_coef_costing); + + x->cfl_store_y = 0; +#endif + max_uv_tx_size = uv_txsize_lookup[bsize][mbmi->tx_size][pd[1].subsampling_x] + [pd[1].subsampling_y]; + init_sbuv_mode(mbmi); #if CONFIG_CB4X4 if (!x->skip_chroma_rd) rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, &dist_uv, -- GitLab