From b05eeaef6843c8d089fbeb960e3a89a9805ce9e8 Mon Sep 17 00:00:00 2001 From: Luc Trudeau Date: Fri, 18 Aug 2017 15:14:30 -0400 Subject: [PATCH] [CFL] Store Reconstructed Luma for Intra In Inter Like for intra block in intra frames, an extra call to txfm_rd_in_plane is added to the RDO of intra blocks in inter frames. This extra call is performed using the best parameters found during RDO and the reconstructed luma pixel are stored. Results on objective-1-fast (compared to CfL on Intra frames only) PSNR | PSNR Cb | PSNR Cr | PSNR HVS | SSIM | MS SSIM | CIEDE 2000 -0.2497 | -3.5526 | -3.5048 | -0.2456 | -0.2392 | -0.2508 | -1.4811 https://arewecompressedyet.com/?job=cfl-no-inter%402017-09-13&job=cfl-inter%402017-09-13T14%3A13%3A13.918Z Change-Id: I70ea2c01859b6c55d7c3eb9680d492c0bfc2aad4 --- av1/common/cfl.c | 146 +++++++++++++++++++++++--------------- av1/common/cfl.h | 6 +- av1/decoder/decodeframe.c | 39 +++++----- av1/decoder/decodemv.c | 20 +++--- av1/encoder/encodeframe.c | 31 +++++--- av1/encoder/encodemb.c | 26 +++---- av1/encoder/rdopt.c | 77 +++++++++++--------- 7 files changed, 197 insertions(+), 148 deletions(-) diff --git a/av1/common/cfl.c b/av1/common/cfl.c index 298820088..60c5118c8 100644 --- a/av1/common/cfl.c +++ b/av1/common/cfl.c @@ -276,58 +276,11 @@ void cfl_predict_block(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride, } } -void cfl_store(CFL_CTX *cfl, const uint8_t *input, int input_stride, int row, - int col, TX_SIZE tx_size, BLOCK_SIZE bsize) { - const int tx_width = tx_size_wide[tx_size]; - const int tx_height = tx_size_high[tx_size]; +static INLINE void cfl_store(CFL_CTX *cfl, const uint8_t *input, + int input_stride, int row, int col, int width, + int height) { const int tx_off_log2 = tx_size_wide_log2[0]; -#if CONFIG_CHROMA_SUB8X8 - if (bsize < BLOCK_8X8) { - // Transform cannot be smaller than - assert(tx_width >= 4); - assert(tx_height >= 4); - - const int bw = block_size_wide[bsize]; - const int bh = block_size_high[bsize]; - - // For chroma_sub8x8, the CfL prediction for prediction blocks smaller than - // 8X8 uses non chroma reference reconstructed luma pixels. To do so, we - // combine the 4X4 non chroma reference into the CfL pixel buffers based on - // their row and column index. - - // The following code is adapted from the is_chroma_reference() function. - if ((cfl->mi_row & - 0x01) // Increment the row index for odd indexed 4X4 blocks - && (bh == 4) // But not for 4X8 blocks - && cfl->subsampling_y) { // And only when chroma is subsampled - assert(row == 0); - row++; - } - - if ((cfl->mi_col & - 0x01) // Increment the col index for odd indexed 4X4 blocks - && (bw == 4) // But not for 8X4 blocks - && cfl->subsampling_x) { // And only when chroma is subsampled - assert(col == 0); - col++; - } -#if CONFIG_DEBUG - for (int unit_r = 0; unit_r < tx_size_high_unit[tx_size]; unit_r++) { - assert(row + unit_r < CFL_SUB8X8_VAL_MI_SIZE); - int row_off = (row + unit_r) * CFL_SUB8X8_VAL_MI_SIZE; - for (int unit_c = 0; unit_c < tx_size_wide_unit[tx_size]; unit_c++) { - assert(col + unit_c < CFL_SUB8X8_VAL_MI_SIZE); - assert(cfl->sub8x8_val[row_off + col + unit_c] == 0); - cfl->sub8x8_val[row_off + col + unit_c] = 1; - } - } -#endif // CONFIG_DEBUG - } -#else - (void)bsize; -#endif // CONFIG_CHROMA_SUB8X8 - // Invalidate current parameters cfl->are_parameters_computed = 0; @@ -335,29 +288,104 @@ void cfl_store(CFL_CTX *cfl, const uint8_t *input, int input_stride, int row, // can manage chroma overrun (e.g. when the chroma surfaces goes beyond the // frame boundary) if (col == 0 && row == 0) { - cfl->y_width = tx_width; - cfl->y_height = tx_height; + cfl->y_width = width; + cfl->y_height = height; } else { - cfl->y_width = OD_MAXI((col << tx_off_log2) + tx_width, cfl->y_width); - cfl->y_height = OD_MAXI((row << tx_off_log2) + tx_height, cfl->y_height); + cfl->y_width = OD_MAXI((col << tx_off_log2) + width, cfl->y_width); + cfl->y_height = OD_MAXI((row << tx_off_log2) + height, cfl->y_height); } // Check that we will remain inside the pixel buffer. - assert((row << tx_off_log2) + tx_height <= MAX_SB_SIZE); - assert((col << tx_off_log2) + tx_width <= MAX_SB_SIZE); + assert((row << tx_off_log2) + height <= MAX_SB_SIZE); + assert((col << tx_off_log2) + width <= MAX_SB_SIZE); // Store the input into the CfL pixel buffer uint8_t *y_pix = &cfl->y_pix[(row * MAX_SB_SIZE + col) << tx_off_log2]; // TODO(ltrudeau) Speedup possible by moving the downsampling to cfl_store - for (int j = 0; j < tx_height; j++) { - for (int i = 0; i < tx_width; i++) { + for (int j = 0; j < height; j++) { + for (int i = 0; i < width; i++) { y_pix[i] = input[i]; } y_pix += MAX_SB_SIZE; input += input_stride; } } +#if CONFIG_CHROMA_SUB8X8 +// Adjust the row and column of blocks smaller than 8X8, as chroma-referenced +// and non-chroma-referenced blocks are stored together in the CfL buffer. +static INLINE void sub8x8_adjust_offset(const CFL_CTX *cfl, int *row_out, + int *col_out) { + // Increment row index for bottom: 8x4, 16x4 or both bottom 4x4s. + if ((cfl->mi_row & 0x01) && cfl->subsampling_y) { + assert(*row_out == 0); + (*row_out)++; + } + + // Increment col index for right: 4x8, 4x16 or both right 4x4s. + if ((cfl->mi_col & 0x01) && cfl->subsampling_x) { + assert(*col_out == 0); + (*col_out)++; + } +} +#if CONFIG_DEBUG +static INLINE void sub8x8_set_val(CFL_CTX *cfl, int row, int col, int val_high, + int val_wide) { + for (int val_r = 0; val_r < val_high; val_r++) { + assert(row + val_r < CFL_SUB8X8_VAL_MI_SIZE); + int row_off = (row + val_r) * CFL_SUB8X8_VAL_MI_SIZE; + for (int val_c = 0; val_c < val_wide; val_c++) { + assert(col + val_c < CFL_SUB8X8_VAL_MI_SIZE); + assert(cfl->sub8x8_val[row_off + col + val_c] == 0); + cfl->sub8x8_val[row_off + col + val_c]++; + } + } +} +#endif // CONFIG_DEBUG +#endif // CONFIG_CHROMA_SUB8X8 + +void cfl_store_tx(MACROBLOCKD *const xd, int row, int col, TX_SIZE tx_size, + BLOCK_SIZE bsize) { + CFL_CTX *const cfl = xd->cfl; + struct macroblockd_plane *const pd = &xd->plane[AOM_PLANE_Y]; + uint8_t *dst = + &pd->dst.buf[(row * pd->dst.stride + col) << tx_size_wide_log2[0]]; + (void)bsize; +#if CONFIG_CHROMA_SUB8X8 + + if (block_size_high[bsize] == 4 || block_size_wide[bsize] == 4) { + // Only dimensions of size 4 can have an odd offset. + assert(!((col & 1) && tx_size_wide[tx_size] != 4)); + assert(!((row & 1) && tx_size_high[tx_size] != 4)); + sub8x8_adjust_offset(cfl, &row, &col); +#if CONFIG_DEBUG + sub8x8_set_val(cfl, row, col, tx_size_high_unit[tx_size], + tx_size_wide_unit[tx_size]); +#endif // CONFIG_DEBUG + } +#endif + cfl_store(cfl, dst, pd->dst.stride, row, col, tx_size_wide[tx_size], + tx_size_high[tx_size]); +} + +void cfl_store_block(MACROBLOCKD *const xd, BLOCK_SIZE bsize, TX_SIZE tx_size) { + CFL_CTX *const cfl = xd->cfl; + struct macroblockd_plane *const pd = &xd->plane[AOM_PLANE_Y]; + int row = 0; + int col = 0; +#if CONFIG_CHROMA_SUB8X8 + bsize = AOMMAX(BLOCK_4X4, bsize); + if (block_size_high[bsize] == 4 || block_size_wide[bsize] == 4) { + sub8x8_adjust_offset(cfl, &row, &col); +#if CONFIG_DEBUG + sub8x8_set_val(cfl, row, col, mi_size_high[bsize], mi_size_wide[bsize]); +#endif // CONFIG_DEBUG + } +#endif // CONFIG_CHROMA_SUB8X8 + const int width = max_intra_block_width(xd, bsize, AOM_PLANE_Y, tx_size); + const int height = max_intra_block_height(xd, bsize, AOM_PLANE_Y, tx_size); + cfl_store(cfl, pd->dst.buf, pd->dst.stride, row, col, width, height); +} void cfl_compute_parameters(MACROBLOCKD *const xd, TX_SIZE tx_size) { CFL_CTX *const cfl = xd->cfl; @@ -393,7 +421,7 @@ void cfl_compute_parameters(MACROBLOCKD *const xd, TX_SIZE tx_size) { assert(cfl->y_width <= cfl->uv_width << cfl->subsampling_x); assert(cfl->y_height <= cfl->uv_height << cfl->subsampling_y); } -#endif +#endif // CONFIG_DEBUG // Compute block-level DC_PRED for both chromatic planes. // DC_PRED replaces beta in the linear model. diff --git a/av1/common/cfl.h b/av1/common/cfl.h index 0cefd0614..7a56a4946 100644 --- a/av1/common/cfl.h +++ b/av1/common/cfl.h @@ -22,8 +22,10 @@ static INLINE int get_scaled_luma_q0(int alpha_q3, int y_pix, int avg_q3) { void cfl_predict_block(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride, int row, int col, TX_SIZE tx_size, int plane); -void cfl_store(CFL_CTX *cfl, const uint8_t *input, int input_stride, int row, - int col, TX_SIZE tx_size, BLOCK_SIZE bsize); +void cfl_store_block(MACROBLOCKD *const xd, BLOCK_SIZE bsize, TX_SIZE tx_size); + +void cfl_store_tx(MACROBLOCKD *const xd, int row, int col, TX_SIZE tx_size, + BLOCK_SIZE bsize); void cfl_compute_parameters(MACROBLOCKD *const xd, TX_SIZE tx_size); diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c index 0080c2d19..949a69d7c 100644 --- a/av1/decoder/decodeframe.c +++ b/av1/decoder/decodeframe.c @@ -519,20 +519,9 @@ static void predict_and_reconstruct_intra_block( } #if CONFIG_CFL if (plane == AOM_PLANE_Y && xd->cfl->store_y) { - struct macroblockd_plane *const pd = &xd->plane[plane]; -#if CONFIG_CHROMA_SUB8X8 - const BLOCK_SIZE plane_bsize = - AOMMAX(BLOCK_4X4, get_plane_block_size(mbmi->sb_type, pd)); -#else - const BLOCK_SIZE plane_bsize = get_plane_block_size(mbmi->sb_type, pd); -#endif // CONFIG_CHROMA_SUB8X8 - uint8_t *dst = - &pd->dst.buf[(row * pd->dst.stride + col) << tx_size_wide_log2[0]]; - // TODO (ltrudeau) Store sub-8x8 inter blocks when bottom right block is - // intra predicted. - cfl_store(xd->cfl, dst, pd->dst.stride, row, col, tx_size, plane_bsize); - } -#endif // CONFIG_CFL + cfl_store_tx(xd, row, col, tx_size, mbmi->sb_type); + } +#endif // CONFIG_CFL && CONFIG_COEFF_INTERLEAVE } #if CONFIG_VAR_TX && !CONFIG_COEF_INTERLEAVE @@ -1769,6 +1758,11 @@ static void decode_token_and_recon_block(AV1Decoder *const pbi, set_offsets(cm, xd, bsize, mi_row, mi_col, bw, bh, x_mis, y_mis); MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; +#if CONFIG_CFL && CONFIG_CHROMA_SUB8X8 + CFL_CTX *const cfl = xd->cfl; + cfl->is_chroma_reference = is_chroma_reference( + mi_row, mi_col, bsize, cfl->subsampling_x, cfl->subsampling_y); +#endif // CONFIG_CFL && CONFIG_CHROMA_SUB8X8 #if CONFIG_DELTA_Q if (cm->delta_q_present_flag) { @@ -1966,11 +1960,6 @@ static void decode_token_and_recon_block(AV1Decoder *const pbi, } } } -#if CONFIG_CFL && CONFIG_CB4X4 && CONFIG_DEBUG - if (xd->cfl->is_chroma_reference) { - cfl_clear_sub8x8_val(xd->cfl); - } -#endif // CONFIG_CFL && CONFIG_CB4X4 && CONFIG_DEBUG } else { int ref; @@ -2103,6 +2092,18 @@ static void decode_token_and_recon_block(AV1Decoder *const pbi, } } } +#if CONFIG_CFL && CONFIG_CHROMA_SUB8X8 + if (mbmi->uv_mode != UV_CFL_PRED) { +#if CONFIG_DEBUG + if (cfl->is_chroma_reference) { + cfl_clear_sub8x8_val(cfl); + } +#endif + if (!cfl->is_chroma_reference && is_inter_block(mbmi)) { + cfl_store_block(xd, mbmi->sb_type, mbmi->tx_size); + } + } +#endif // CONFIG_CFL && CONFIG_CHROMA_SUB8X8 #endif // CONFIG_COEF_INTERLEAVE int reader_corrupted_flag = aom_reader_has_error(r); diff --git a/av1/decoder/decodemv.c b/av1/decoder/decodemv.c index be62400c9..81e1e064a 100644 --- a/av1/decoder/decodemv.c +++ b/av1/decoder/decodemv.c @@ -1214,9 +1214,7 @@ static void read_intra_frame_mode_info(AV1_COMMON *const cm, #if CONFIG_CFL if (mbmi->uv_mode == UV_CFL_PRED) { mbmi->cfl_alpha_idx = read_cfl_alphas(ec_ctx, r, &mbmi->cfl_alpha_signs); - // TODO(ltrudeau) Remove key_frame check (used to test CfL only in Intra - // frame). - xd->cfl->store_y = cm->frame_type == KEY_FRAME; + xd->cfl->store_y = 1; } else { xd->cfl->store_y = 0; } @@ -1228,9 +1226,7 @@ static void read_intra_frame_mode_info(AV1_COMMON *const cm, mbmi->uv_mode = UV_DC_PRED; #if CONFIG_CFL xd->cfl->is_chroma_reference = 0; - // TODO(ltrudeau) Remove key_frame check (used to test CfL only in Intra - // frame). - xd->cfl->store_y = cm->frame_type == KEY_FRAME; + xd->cfl->store_y = 1; #endif } #endif @@ -1811,15 +1807,20 @@ static void read_intra_block_mode_info(AV1_COMMON *const cm, const int mi_row, if (mbmi->uv_mode == UV_CFL_PRED) { mbmi->cfl_alpha_idx = read_cfl_alphas(xd->tile_ctx, r, &mbmi->cfl_alpha_signs); - // TODO(ltrudeau) Remove key_frame check (used to test CfL only in Intra - // frame). - xd->cfl->store_y = cm->frame_type == KEY_FRAME; + xd->cfl->store_y = 1; } else { xd->cfl->store_y = 0; } #endif // CONFIG_CFL #if CONFIG_CB4X4 + } else { + // Avoid decoding angle_info if there is is no chroma prediction + mbmi->uv_mode = UV_DC_PRED; +#if CONFIG_CFL + xd->cfl->is_chroma_reference = 0; + xd->cfl->store_y = 1; +#endif } #endif @@ -2275,6 +2276,7 @@ static void read_inter_block_mode_info(AV1Decoder *const pbi, assert(NELEMENTS(mode_2_counter) == MB_MODE_COUNT); + mbmi->uv_mode = UV_DC_PRED; mbmi->palette_mode_info.palette_size[0] = 0; mbmi->palette_mode_info.palette_size[1] = 0; diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c index b4d5a7ebf..04d2f79e1 100644 --- a/av1/encoder/encodeframe.c +++ b/av1/encoder/encodeframe.c @@ -4109,9 +4109,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, #endif // CONFIG_SUPERTX #if CONFIG_CFL && CONFIG_CHROMA_SUB8X8 && CONFIG_DEBUG - if (sum_rdc.rdcost >= best_rdc.rdcost) { - cfl_clear_sub8x8_val(xd->cfl); - } + cfl_clear_sub8x8_val(xd->cfl); #endif // CONFIG_CFL && CONFIG_CHROMA_SUB8X8 && CONFIG_DEBUG if (sum_rdc.rdcost < best_rdc.rdcost) { sum_rdc.rate += partition_cost[PARTITION_HORZ]; @@ -4272,9 +4270,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, #endif // CONFIG_SUPERTX #if CONFIG_CFL && CONFIG_CHROMA_SUB8X8 && CONFIG_DEBUG - if (sum_rdc.rdcost >= best_rdc.rdcost) { - cfl_clear_sub8x8_val(xd->cfl); - } + cfl_clear_sub8x8_val(xd->cfl); #endif // CONFIG_CFL && CONFIG_CHROMA_SUB8X8 && CONFIG_DEBUG if (sum_rdc.rdcost < best_rdc.rdcost) { @@ -6105,9 +6101,7 @@ static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td, if (!is_inter) { #if CONFIG_CFL - // TODO(ltrudeau) Remove key_frame check (used to test CfL only in Intra - // frame). - xd->cfl->store_y = cm->frame_type == KEY_FRAME; + xd->cfl->store_y = 1; #endif // CONFIG_CFL int plane; mbmi->skip = 1; @@ -6117,13 +6111,13 @@ static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td, } #if CONFIG_CFL xd->cfl->store_y = 0; -#if CONFIG_CB4X4 && CONFIG_DEBUG +#if CONFIG_CHROMA_SUB8X8 && CONFIG_DEBUG if (is_chroma_reference(mi_row, mi_col, bsize, xd->cfl->subsampling_x, xd->cfl->subsampling_y) && !xd->cfl->are_parameters_computed) { cfl_clear_sub8x8_val(xd->cfl); } -#endif // CONFIG_CB4X4 && CONFIG_DEBUG +#endif // CONFIG_CHROMA_SUB8X8 && CONFIG_DEBUG #endif // CONFIG_CFL if (!dry_run) { sum_intra_stats(td->counts, xd, mi, xd->above_mi, xd->left_mi, @@ -6334,6 +6328,21 @@ static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td, set_txfm_ctxs(tx_size, xd->n8_w, xd->n8_h, (mbmi->skip || seg_skip), xd); } #endif // CONFIG_VAR_TX +#if CONFIG_CFL && CONFIG_CHROMA_SUB8X8 + CFL_CTX *const cfl = xd->cfl; +#if CONFIG_DEBUG + if (is_chroma_reference(mi_row, mi_col, bsize, cfl->subsampling_x, + cfl->subsampling_y) && + !cfl->are_parameters_computed) { + cfl_clear_sub8x8_val(cfl); + } +#endif // CONFIG_DEBUG + if (is_inter_block(mbmi) && + !is_chroma_reference(mi_row, mi_col, bsize, cfl->subsampling_x, + cfl->subsampling_y)) { + cfl_store_block(xd, mbmi->sb_type, mbmi->tx_size); + } +#endif // CONFIG_CFL && CONFIG_CHROMA_SUB8X8 } #if CONFIG_SUPERTX diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c index 565f420cd..ada6b8b9f 100644 --- a/av1/encoder/encodemb.c +++ b/av1/encoder/encodemb.c @@ -741,27 +741,29 @@ static void encode_block(int plane, int block, int blk_row, int blk_col, if (p->eobs[block]) *(args->skip) = 0; - if (p->eobs[block] == 0) return; + if (p->eobs[block] != 0) #else (void)ctx; if (!x->pvq_skip[plane]) *(args->skip) = 0; - if (x->pvq_skip[plane]) return; + if (!x->pvq_skip[plane]) #endif + { #if CONFIG_LGT - PREDICTION_MODE mode = xd->mi[0]->mbmi.mode; + PREDICTION_MODE mode = xd->mi[0]->mbmi.mode; #endif // CONFIG_LGT - TX_TYPE tx_type = - av1_get_tx_type(pd->plane_type, xd, blk_row, blk_col, block, tx_size); - av1_inverse_transform_block(xd, dqcoeff, + TX_TYPE tx_type = + av1_get_tx_type(pd->plane_type, xd, blk_row, blk_col, block, tx_size); + av1_inverse_transform_block(xd, dqcoeff, #if CONFIG_LGT - mode, + mode, #endif // CONFIG_LGT #if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK - mrc_mask, + mrc_mask, #endif // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK - tx_type, tx_size, dst, pd->dst.stride, - p->eobs[block]); + tx_type, tx_size, dst, pd->dst.stride, + p->eobs[block]); + } } #if CONFIG_VAR_TX @@ -1113,9 +1115,7 @@ void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col, #endif #if CONFIG_CFL if (plane == AOM_PLANE_Y && xd->cfl->store_y) { - // TODO (ltrudeau) Store sub-8x8 inter blocks when bottom right block is - // intra predicted. - cfl_store(xd->cfl, dst, dst_stride, blk_row, blk_col, tx_size, plane_bsize); + cfl_store_tx(xd, blk_row, blk_col, tx_size, plane_bsize); } #endif // CONFIG_CFL } diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c index c6dd0766d..acee6de11 100644 --- a/av1/encoder/rdopt.c +++ b/av1/encoder/rdopt.c @@ -2081,15 +2081,14 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col, } #if CONFIG_CFL if (plane == AOM_PLANE_Y && xd->cfl->store_y) { - struct macroblockd_plane *const pd = &xd->plane[plane]; - const int dst_stride = pd->dst.stride; - uint8_t *dst = - &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]]; - // TODO (ltrudeau) Store sub-8x8 inter blocks when bottom right block is - // intra predicted. - cfl_store(xd->cfl, dst, dst_stride, blk_row, blk_col, tx_size, plane_bsize); +#if CONFIG_CHROMA_SUB8X8 + assert(!is_inter_block(mbmi) || plane_bsize < BLOCK_8X8); +#else + assert(!is_inter_block(mbmi)); +#endif // CONFIG_CHROMA_SUB8X8 + cfl_store_tx(xd, blk_row, blk_col, tx_size, plane_bsize); } -#endif +#endif // CONFIG_CFL rd = RDCOST(x->rdmult, 0, this_rd_stats.dist); if (args->this_rd + rd > args->best_rd) { args->exit_early = 1; @@ -6027,18 +6026,11 @@ static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x, mbmi->uv_mode = mode; #if CONFIG_CFL - const AV1_COMMON *const cm = &cpi->common; int cfl_alpha_rate = 0; if (mode == UV_CFL_PRED) { assert(!is_directional_mode); - // TODO(ltrudeau) Remove key_frame check (used to test CfL only in Intra - // frame). - if (cm->frame_type == KEY_FRAME) { - const TX_SIZE uv_tx_size = av1_get_uv_tx_size(mbmi, &xd->plane[1]); - cfl_alpha_rate = cfl_rd_pick_alpha(x, uv_tx_size); - } else { - continue; - } + const TX_SIZE uv_tx_size = av1_get_uv_tx_size(mbmi, &xd->plane[1]); + cfl_alpha_rate = cfl_rd_pick_alpha(x, uv_tx_size); } #endif #if CONFIG_EXT_INTRA @@ -6124,9 +6116,11 @@ static void choose_intra_uv_mode(const AV1_COMP *const cpi, MACROBLOCK *const x, int *rate_uv, int *rate_uv_tokenonly, int64_t *dist_uv, int *skip_uv, UV_PREDICTION_MODE *mode_uv) { + MACROBLOCKD *xd = &x->e_mbd; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; // Use an estimated rd for uv_intra based on DC_PRED if the // appropriate speed flag is set. - init_sbuv_mode(&x->e_mbd.mi[0]->mbmi); + init_sbuv_mode(mbmi); #if CONFIG_CB4X4 #if !CONFIG_CHROMA_2X2 if (x->skip_chroma_rd) { @@ -6137,15 +6131,34 @@ static void choose_intra_uv_mode(const AV1_COMP *const cpi, MACROBLOCK *const x, *mode_uv = UV_DC_PRED; return; } - bsize = scale_chroma_bsize(bsize, x->e_mbd.plane[AOM_PLANE_U].subsampling_x, - x->e_mbd.plane[AOM_PLANE_U].subsampling_y); + bsize = scale_chroma_bsize(bsize, xd->plane[AOM_PLANE_U].subsampling_x, + xd->plane[AOM_PLANE_U].subsampling_y); #endif // !CONFIG_CHROMA_2X2 +#if CONFIG_CFL + // Only store reconstructed luma when there's chroma RDO. When there's no + // chroma RDO, the reconstructed luma will be stored in encode_superblock(). + xd->cfl->store_y = !x->skip_chroma_rd; +#endif // CONFIG_CFL #else bsize = bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize; +#if CONFIG_CFL + xd->cfl->store_y = 1; +#endif // CONFIG_CFL #endif // CONFIG_CB4X4 +#if CONFIG_CFL + if (xd->cfl->store_y) { + // Perform one extra call to txfm_rd_in_plane(), with the values chosen + // during luma RDO, so we can store reconstructed luma values + RD_STATS this_rd_stats; + txfm_rd_in_plane(x, cpi, &this_rd_stats, INT64_MAX, AOM_PLANE_Y, + mbmi->sb_type, mbmi->tx_size, + cpi->sf.use_fast_coef_costing); + xd->cfl->store_y = 0; + } +#endif // CONFIG_CFL rd_pick_intra_sbuv_mode(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv, bsize, max_tx_size); - *mode_uv = x->e_mbd.mi[0]->mbmi.uv_mode; + *mode_uv = mbmi->uv_mode; } static int cost_mv_ref(const MACROBLOCK *const x, PREDICTION_MODE mode, @@ -9933,23 +9946,17 @@ void av1_rd_pick_intra_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x, if (intra_yrd < best_rd) { #if CONFIG_CFL - // Perform one extra txfm_rd_in_plane() call, this time with the best value - // so we can store reconstructed luma values - RD_STATS this_rd_stats; - #if CONFIG_CB4X4 - // Don't store the luma value if no chroma is associated. - // Don't worry, we will store this reconstructed luma in the following - // encode dry-run the chroma plane will never know. - // TODO(ltrudeau) Delete frame type check (only used to test key-frame only - // CfL) - xd->cfl->store_y = !x->skip_chroma_rd && cm->frame_type == KEY_FRAME; -#else - // TODO(ltrudeau) Delete frame type check (only used to test key-frame only - // CfL) - xd->cfl->store_y = cm->frame_type == KEY_FRAME; + // Only store reconstructed luma when there's chroma RDO. When there's no + // chroma RDO, the reconstructed luma will be stored in encode_superblock(). + xd->cfl->store_y = !x->skip_chroma_rd; +#else + xd->cfl->store_y = 1; #endif // CONFIG_CB4X4 if (xd->cfl->store_y) { + // Perform one extra call to txfm_rd_in_plane(), with the values chosen + // during luma RDO, so we can store reconstructed luma values + RD_STATS this_rd_stats; txfm_rd_in_plane(x, cpi, &this_rd_stats, INT64_MAX, AOM_PLANE_Y, mbmi->sb_type, mbmi->tx_size, cpi->sf.use_fast_coef_costing); -- GitLab