diff --git a/av1/common/cfl.c b/av1/common/cfl.c index 29882008860122357b8d0a8abab6d1c01ba61d56..60c5118c8e605e3312a989b592e4c11ef22a6529 100644 --- a/av1/common/cfl.c +++ b/av1/common/cfl.c @@ -276,58 +276,11 @@ void cfl_predict_block(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride, } } -void cfl_store(CFL_CTX *cfl, const uint8_t *input, int input_stride, int row, - int col, TX_SIZE tx_size, BLOCK_SIZE bsize) { - const int tx_width = tx_size_wide[tx_size]; - const int tx_height = tx_size_high[tx_size]; +static INLINE void cfl_store(CFL_CTX *cfl, const uint8_t *input, + int input_stride, int row, int col, int width, + int height) { const int tx_off_log2 = tx_size_wide_log2[0]; -#if CONFIG_CHROMA_SUB8X8 - if (bsize < BLOCK_8X8) { - // Transform cannot be smaller than - assert(tx_width >= 4); - assert(tx_height >= 4); - - const int bw = block_size_wide[bsize]; - const int bh = block_size_high[bsize]; - - // For chroma_sub8x8, the CfL prediction for prediction blocks smaller than - // 8X8 uses non chroma reference reconstructed luma pixels. To do so, we - // combine the 4X4 non chroma reference into the CfL pixel buffers based on - // their row and column index. - - // The following code is adapted from the is_chroma_reference() function. - if ((cfl->mi_row & - 0x01) // Increment the row index for odd indexed 4X4 blocks - && (bh == 4) // But not for 4X8 blocks - && cfl->subsampling_y) { // And only when chroma is subsampled - assert(row == 0); - row++; - } - - if ((cfl->mi_col & - 0x01) // Increment the col index for odd indexed 4X4 blocks - && (bw == 4) // But not for 8X4 blocks - && cfl->subsampling_x) { // And only when chroma is subsampled - assert(col == 0); - col++; - } -#if CONFIG_DEBUG - for (int unit_r = 0; unit_r < tx_size_high_unit[tx_size]; unit_r++) { - assert(row + unit_r < CFL_SUB8X8_VAL_MI_SIZE); - int row_off = (row + unit_r) * CFL_SUB8X8_VAL_MI_SIZE; - for (int unit_c = 0; unit_c < tx_size_wide_unit[tx_size]; unit_c++) { - assert(col + unit_c < CFL_SUB8X8_VAL_MI_SIZE); - assert(cfl->sub8x8_val[row_off + col + unit_c] == 0); - cfl->sub8x8_val[row_off + col + unit_c] = 1; - } - } -#endif // CONFIG_DEBUG - } -#else - (void)bsize; -#endif // CONFIG_CHROMA_SUB8X8 - // Invalidate current parameters cfl->are_parameters_computed = 0; @@ -335,29 +288,104 @@ void cfl_store(CFL_CTX *cfl, const uint8_t *input, int input_stride, int row, // can manage chroma overrun (e.g. when the chroma surfaces goes beyond the // frame boundary) if (col == 0 && row == 0) { - cfl->y_width = tx_width; - cfl->y_height = tx_height; + cfl->y_width = width; + cfl->y_height = height; } else { - cfl->y_width = OD_MAXI((col << tx_off_log2) + tx_width, cfl->y_width); - cfl->y_height = OD_MAXI((row << tx_off_log2) + tx_height, cfl->y_height); + cfl->y_width = OD_MAXI((col << tx_off_log2) + width, cfl->y_width); + cfl->y_height = OD_MAXI((row << tx_off_log2) + height, cfl->y_height); } // Check that we will remain inside the pixel buffer. - assert((row << tx_off_log2) + tx_height <= MAX_SB_SIZE); - assert((col << tx_off_log2) + tx_width <= MAX_SB_SIZE); + assert((row << tx_off_log2) + height <= MAX_SB_SIZE); + assert((col << tx_off_log2) + width <= MAX_SB_SIZE); // Store the input into the CfL pixel buffer uint8_t *y_pix = &cfl->y_pix[(row * MAX_SB_SIZE + col) << tx_off_log2]; // TODO(ltrudeau) Speedup possible by moving the downsampling to cfl_store - for (int j = 0; j < tx_height; j++) { - for (int i = 0; i < tx_width; i++) { + for (int j = 0; j < height; j++) { + for (int i = 0; i < width; i++) { y_pix[i] = input[i]; } y_pix += MAX_SB_SIZE; input += input_stride; } } +#if CONFIG_CHROMA_SUB8X8 +// Adjust the row and column of blocks smaller than 8X8, as chroma-referenced +// and non-chroma-referenced blocks are stored together in the CfL buffer. +static INLINE void sub8x8_adjust_offset(const CFL_CTX *cfl, int *row_out, + int *col_out) { + // Increment row index for bottom: 8x4, 16x4 or both bottom 4x4s. + if ((cfl->mi_row & 0x01) && cfl->subsampling_y) { + assert(*row_out == 0); + (*row_out)++; + } + + // Increment col index for right: 4x8, 4x16 or both right 4x4s. + if ((cfl->mi_col & 0x01) && cfl->subsampling_x) { + assert(*col_out == 0); + (*col_out)++; + } +} +#if CONFIG_DEBUG +static INLINE void sub8x8_set_val(CFL_CTX *cfl, int row, int col, int val_high, + int val_wide) { + for (int val_r = 0; val_r < val_high; val_r++) { + assert(row + val_r < CFL_SUB8X8_VAL_MI_SIZE); + int row_off = (row + val_r) * CFL_SUB8X8_VAL_MI_SIZE; + for (int val_c = 0; val_c < val_wide; val_c++) { + assert(col + val_c < CFL_SUB8X8_VAL_MI_SIZE); + assert(cfl->sub8x8_val[row_off + col + val_c] == 0); + cfl->sub8x8_val[row_off + col + val_c]++; + } + } +} +#endif // CONFIG_DEBUG +#endif // CONFIG_CHROMA_SUB8X8 + +void cfl_store_tx(MACROBLOCKD *const xd, int row, int col, TX_SIZE tx_size, + BLOCK_SIZE bsize) { + CFL_CTX *const cfl = xd->cfl; + struct macroblockd_plane *const pd = &xd->plane[AOM_PLANE_Y]; + uint8_t *dst = + &pd->dst.buf[(row * pd->dst.stride + col) << tx_size_wide_log2[0]]; + (void)bsize; +#if CONFIG_CHROMA_SUB8X8 + + if (block_size_high[bsize] == 4 || block_size_wide[bsize] == 4) { + // Only dimensions of size 4 can have an odd offset. + assert(!((col & 1) && tx_size_wide[tx_size] != 4)); + assert(!((row & 1) && tx_size_high[tx_size] != 4)); + sub8x8_adjust_offset(cfl, &row, &col); +#if CONFIG_DEBUG + sub8x8_set_val(cfl, row, col, tx_size_high_unit[tx_size], + tx_size_wide_unit[tx_size]); +#endif // CONFIG_DEBUG + } +#endif + cfl_store(cfl, dst, pd->dst.stride, row, col, tx_size_wide[tx_size], + tx_size_high[tx_size]); +} + +void cfl_store_block(MACROBLOCKD *const xd, BLOCK_SIZE bsize, TX_SIZE tx_size) { + CFL_CTX *const cfl = xd->cfl; + struct macroblockd_plane *const pd = &xd->plane[AOM_PLANE_Y]; + int row = 0; + int col = 0; +#if CONFIG_CHROMA_SUB8X8 + bsize = AOMMAX(BLOCK_4X4, bsize); + if (block_size_high[bsize] == 4 || block_size_wide[bsize] == 4) { + sub8x8_adjust_offset(cfl, &row, &col); +#if CONFIG_DEBUG + sub8x8_set_val(cfl, row, col, mi_size_high[bsize], mi_size_wide[bsize]); +#endif // CONFIG_DEBUG + } +#endif // CONFIG_CHROMA_SUB8X8 + const int width = max_intra_block_width(xd, bsize, AOM_PLANE_Y, tx_size); + const int height = max_intra_block_height(xd, bsize, AOM_PLANE_Y, tx_size); + cfl_store(cfl, pd->dst.buf, pd->dst.stride, row, col, width, height); +} void cfl_compute_parameters(MACROBLOCKD *const xd, TX_SIZE tx_size) { CFL_CTX *const cfl = xd->cfl; @@ -393,7 +421,7 @@ void cfl_compute_parameters(MACROBLOCKD *const xd, TX_SIZE tx_size) { assert(cfl->y_width <= cfl->uv_width << cfl->subsampling_x); assert(cfl->y_height <= cfl->uv_height << cfl->subsampling_y); } -#endif +#endif // CONFIG_DEBUG // Compute block-level DC_PRED for both chromatic planes. // DC_PRED replaces beta in the linear model. diff --git a/av1/common/cfl.h b/av1/common/cfl.h index 0cefd06149e772457b2a27075da64f46a32f1c5c..7a56a494668eb2e2115003ea65371dba62c6a92d 100644 --- a/av1/common/cfl.h +++ b/av1/common/cfl.h @@ -22,8 +22,10 @@ static INLINE int get_scaled_luma_q0(int alpha_q3, int y_pix, int avg_q3) { void cfl_predict_block(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride, int row, int col, TX_SIZE tx_size, int plane); -void cfl_store(CFL_CTX *cfl, const uint8_t *input, int input_stride, int row, - int col, TX_SIZE tx_size, BLOCK_SIZE bsize); +void cfl_store_block(MACROBLOCKD *const xd, BLOCK_SIZE bsize, TX_SIZE tx_size); + +void cfl_store_tx(MACROBLOCKD *const xd, int row, int col, TX_SIZE tx_size, + BLOCK_SIZE bsize); void cfl_compute_parameters(MACROBLOCKD *const xd, TX_SIZE tx_size); diff --git a/av1/decoder/decodeframe.c b/av1/decoder/decodeframe.c index 0080c2d19b833994f783bdd63a0e11ac4fc57da3..949a69d7cd834a3b33b69b4c1a9ec412cd6057ec 100644 --- a/av1/decoder/decodeframe.c +++ b/av1/decoder/decodeframe.c @@ -519,20 +519,9 @@ static void predict_and_reconstruct_intra_block( } #if CONFIG_CFL if (plane == AOM_PLANE_Y && xd->cfl->store_y) { - struct macroblockd_plane *const pd = &xd->plane[plane]; -#if CONFIG_CHROMA_SUB8X8 - const BLOCK_SIZE plane_bsize = - AOMMAX(BLOCK_4X4, get_plane_block_size(mbmi->sb_type, pd)); -#else - const BLOCK_SIZE plane_bsize = get_plane_block_size(mbmi->sb_type, pd); -#endif // CONFIG_CHROMA_SUB8X8 - uint8_t *dst = - &pd->dst.buf[(row * pd->dst.stride + col) << tx_size_wide_log2[0]]; - // TODO (ltrudeau) Store sub-8x8 inter blocks when bottom right block is - // intra predicted. - cfl_store(xd->cfl, dst, pd->dst.stride, row, col, tx_size, plane_bsize); - } -#endif // CONFIG_CFL + cfl_store_tx(xd, row, col, tx_size, mbmi->sb_type); + } +#endif // CONFIG_CFL && CONFIG_COEFF_INTERLEAVE } #if CONFIG_VAR_TX && !CONFIG_COEF_INTERLEAVE @@ -1769,6 +1758,11 @@ static void decode_token_and_recon_block(AV1Decoder *const pbi, set_offsets(cm, xd, bsize, mi_row, mi_col, bw, bh, x_mis, y_mis); MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; +#if CONFIG_CFL && CONFIG_CHROMA_SUB8X8 + CFL_CTX *const cfl = xd->cfl; + cfl->is_chroma_reference = is_chroma_reference( + mi_row, mi_col, bsize, cfl->subsampling_x, cfl->subsampling_y); +#endif // CONFIG_CFL && CONFIG_CHROMA_SUB8X8 #if CONFIG_DELTA_Q if (cm->delta_q_present_flag) { @@ -1966,11 +1960,6 @@ static void decode_token_and_recon_block(AV1Decoder *const pbi, } } } -#if CONFIG_CFL && CONFIG_CB4X4 && CONFIG_DEBUG - if (xd->cfl->is_chroma_reference) { - cfl_clear_sub8x8_val(xd->cfl); - } -#endif // CONFIG_CFL && CONFIG_CB4X4 && CONFIG_DEBUG } else { int ref; @@ -2103,6 +2092,18 @@ static void decode_token_and_recon_block(AV1Decoder *const pbi, } } } +#if CONFIG_CFL && CONFIG_CHROMA_SUB8X8 + if (mbmi->uv_mode != UV_CFL_PRED) { +#if CONFIG_DEBUG + if (cfl->is_chroma_reference) { + cfl_clear_sub8x8_val(cfl); + } +#endif + if (!cfl->is_chroma_reference && is_inter_block(mbmi)) { + cfl_store_block(xd, mbmi->sb_type, mbmi->tx_size); + } + } +#endif // CONFIG_CFL && CONFIG_CHROMA_SUB8X8 #endif // CONFIG_COEF_INTERLEAVE int reader_corrupted_flag = aom_reader_has_error(r); diff --git a/av1/decoder/decodemv.c b/av1/decoder/decodemv.c index be62400c9d692f8263f31a77fe1ce87f35dd8400..81e1e064a1f6ee8c3873b32a641a94172ef07ff8 100644 --- a/av1/decoder/decodemv.c +++ b/av1/decoder/decodemv.c @@ -1214,9 +1214,7 @@ static void read_intra_frame_mode_info(AV1_COMMON *const cm, #if CONFIG_CFL if (mbmi->uv_mode == UV_CFL_PRED) { mbmi->cfl_alpha_idx = read_cfl_alphas(ec_ctx, r, &mbmi->cfl_alpha_signs); - // TODO(ltrudeau) Remove key_frame check (used to test CfL only in Intra - // frame). - xd->cfl->store_y = cm->frame_type == KEY_FRAME; + xd->cfl->store_y = 1; } else { xd->cfl->store_y = 0; } @@ -1228,9 +1226,7 @@ static void read_intra_frame_mode_info(AV1_COMMON *const cm, mbmi->uv_mode = UV_DC_PRED; #if CONFIG_CFL xd->cfl->is_chroma_reference = 0; - // TODO(ltrudeau) Remove key_frame check (used to test CfL only in Intra - // frame). - xd->cfl->store_y = cm->frame_type == KEY_FRAME; + xd->cfl->store_y = 1; #endif } #endif @@ -1811,15 +1807,20 @@ static void read_intra_block_mode_info(AV1_COMMON *const cm, const int mi_row, if (mbmi->uv_mode == UV_CFL_PRED) { mbmi->cfl_alpha_idx = read_cfl_alphas(xd->tile_ctx, r, &mbmi->cfl_alpha_signs); - // TODO(ltrudeau) Remove key_frame check (used to test CfL only in Intra - // frame). - xd->cfl->store_y = cm->frame_type == KEY_FRAME; + xd->cfl->store_y = 1; } else { xd->cfl->store_y = 0; } #endif // CONFIG_CFL #if CONFIG_CB4X4 + } else { + // Avoid decoding angle_info if there is is no chroma prediction + mbmi->uv_mode = UV_DC_PRED; +#if CONFIG_CFL + xd->cfl->is_chroma_reference = 0; + xd->cfl->store_y = 1; +#endif } #endif @@ -2275,6 +2276,7 @@ static void read_inter_block_mode_info(AV1Decoder *const pbi, assert(NELEMENTS(mode_2_counter) == MB_MODE_COUNT); + mbmi->uv_mode = UV_DC_PRED; mbmi->palette_mode_info.palette_size[0] = 0; mbmi->palette_mode_info.palette_size[1] = 0; diff --git a/av1/encoder/encodeframe.c b/av1/encoder/encodeframe.c index b4d5a7ebf9d56cd14503c16e139cc9a3ce1d110a..04d2f79e173a49ef10b280b0a3fd685a05078db9 100644 --- a/av1/encoder/encodeframe.c +++ b/av1/encoder/encodeframe.c @@ -4109,9 +4109,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, #endif // CONFIG_SUPERTX #if CONFIG_CFL && CONFIG_CHROMA_SUB8X8 && CONFIG_DEBUG - if (sum_rdc.rdcost >= best_rdc.rdcost) { - cfl_clear_sub8x8_val(xd->cfl); - } + cfl_clear_sub8x8_val(xd->cfl); #endif // CONFIG_CFL && CONFIG_CHROMA_SUB8X8 && CONFIG_DEBUG if (sum_rdc.rdcost < best_rdc.rdcost) { sum_rdc.rate += partition_cost[PARTITION_HORZ]; @@ -4272,9 +4270,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, #endif // CONFIG_SUPERTX #if CONFIG_CFL && CONFIG_CHROMA_SUB8X8 && CONFIG_DEBUG - if (sum_rdc.rdcost >= best_rdc.rdcost) { - cfl_clear_sub8x8_val(xd->cfl); - } + cfl_clear_sub8x8_val(xd->cfl); #endif // CONFIG_CFL && CONFIG_CHROMA_SUB8X8 && CONFIG_DEBUG if (sum_rdc.rdcost < best_rdc.rdcost) { @@ -6105,9 +6101,7 @@ static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td, if (!is_inter) { #if CONFIG_CFL - // TODO(ltrudeau) Remove key_frame check (used to test CfL only in Intra - // frame). - xd->cfl->store_y = cm->frame_type == KEY_FRAME; + xd->cfl->store_y = 1; #endif // CONFIG_CFL int plane; mbmi->skip = 1; @@ -6117,13 +6111,13 @@ static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td, } #if CONFIG_CFL xd->cfl->store_y = 0; -#if CONFIG_CB4X4 && CONFIG_DEBUG +#if CONFIG_CHROMA_SUB8X8 && CONFIG_DEBUG if (is_chroma_reference(mi_row, mi_col, bsize, xd->cfl->subsampling_x, xd->cfl->subsampling_y) && !xd->cfl->are_parameters_computed) { cfl_clear_sub8x8_val(xd->cfl); } -#endif // CONFIG_CB4X4 && CONFIG_DEBUG +#endif // CONFIG_CHROMA_SUB8X8 && CONFIG_DEBUG #endif // CONFIG_CFL if (!dry_run) { sum_intra_stats(td->counts, xd, mi, xd->above_mi, xd->left_mi, @@ -6334,6 +6328,21 @@ static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td, set_txfm_ctxs(tx_size, xd->n8_w, xd->n8_h, (mbmi->skip || seg_skip), xd); } #endif // CONFIG_VAR_TX +#if CONFIG_CFL && CONFIG_CHROMA_SUB8X8 + CFL_CTX *const cfl = xd->cfl; +#if CONFIG_DEBUG + if (is_chroma_reference(mi_row, mi_col, bsize, cfl->subsampling_x, + cfl->subsampling_y) && + !cfl->are_parameters_computed) { + cfl_clear_sub8x8_val(cfl); + } +#endif // CONFIG_DEBUG + if (is_inter_block(mbmi) && + !is_chroma_reference(mi_row, mi_col, bsize, cfl->subsampling_x, + cfl->subsampling_y)) { + cfl_store_block(xd, mbmi->sb_type, mbmi->tx_size); + } +#endif // CONFIG_CFL && CONFIG_CHROMA_SUB8X8 } #if CONFIG_SUPERTX diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c index 565f420cd703e6777fc2057939826896181bedce..ada6b8b9fc8861bd28d9cc63f5a9dd4788c421ef 100644 --- a/av1/encoder/encodemb.c +++ b/av1/encoder/encodemb.c @@ -741,27 +741,29 @@ static void encode_block(int plane, int block, int blk_row, int blk_col, if (p->eobs[block]) *(args->skip) = 0; - if (p->eobs[block] == 0) return; + if (p->eobs[block] != 0) #else (void)ctx; if (!x->pvq_skip[plane]) *(args->skip) = 0; - if (x->pvq_skip[plane]) return; + if (!x->pvq_skip[plane]) #endif + { #if CONFIG_LGT - PREDICTION_MODE mode = xd->mi[0]->mbmi.mode; + PREDICTION_MODE mode = xd->mi[0]->mbmi.mode; #endif // CONFIG_LGT - TX_TYPE tx_type = - av1_get_tx_type(pd->plane_type, xd, blk_row, blk_col, block, tx_size); - av1_inverse_transform_block(xd, dqcoeff, + TX_TYPE tx_type = + av1_get_tx_type(pd->plane_type, xd, blk_row, blk_col, block, tx_size); + av1_inverse_transform_block(xd, dqcoeff, #if CONFIG_LGT - mode, + mode, #endif // CONFIG_LGT #if CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK - mrc_mask, + mrc_mask, #endif // CONFIG_MRC_TX && SIGNAL_ANY_MRC_MASK - tx_type, tx_size, dst, pd->dst.stride, - p->eobs[block]); + tx_type, tx_size, dst, pd->dst.stride, + p->eobs[block]); + } } #if CONFIG_VAR_TX @@ -1113,9 +1115,7 @@ void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col, #endif #if CONFIG_CFL if (plane == AOM_PLANE_Y && xd->cfl->store_y) { - // TODO (ltrudeau) Store sub-8x8 inter blocks when bottom right block is - // intra predicted. - cfl_store(xd->cfl, dst, dst_stride, blk_row, blk_col, tx_size, plane_bsize); + cfl_store_tx(xd, blk_row, blk_col, tx_size, plane_bsize); } #endif // CONFIG_CFL } diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c index c6dd0766d649d449dc4067b6cdb00f972a0dbfc3..acee6de1146de42c1bc2c57d5740f7766d1a52c6 100644 --- a/av1/encoder/rdopt.c +++ b/av1/encoder/rdopt.c @@ -2081,15 +2081,14 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col, } #if CONFIG_CFL if (plane == AOM_PLANE_Y && xd->cfl->store_y) { - struct macroblockd_plane *const pd = &xd->plane[plane]; - const int dst_stride = pd->dst.stride; - uint8_t *dst = - &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]]; - // TODO (ltrudeau) Store sub-8x8 inter blocks when bottom right block is - // intra predicted. - cfl_store(xd->cfl, dst, dst_stride, blk_row, blk_col, tx_size, plane_bsize); +#if CONFIG_CHROMA_SUB8X8 + assert(!is_inter_block(mbmi) || plane_bsize < BLOCK_8X8); +#else + assert(!is_inter_block(mbmi)); +#endif // CONFIG_CHROMA_SUB8X8 + cfl_store_tx(xd, blk_row, blk_col, tx_size, plane_bsize); } -#endif +#endif // CONFIG_CFL rd = RDCOST(x->rdmult, 0, this_rd_stats.dist); if (args->this_rd + rd > args->best_rd) { args->exit_early = 1; @@ -6027,18 +6026,11 @@ static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x, mbmi->uv_mode = mode; #if CONFIG_CFL - const AV1_COMMON *const cm = &cpi->common; int cfl_alpha_rate = 0; if (mode == UV_CFL_PRED) { assert(!is_directional_mode); - // TODO(ltrudeau) Remove key_frame check (used to test CfL only in Intra - // frame). - if (cm->frame_type == KEY_FRAME) { - const TX_SIZE uv_tx_size = av1_get_uv_tx_size(mbmi, &xd->plane[1]); - cfl_alpha_rate = cfl_rd_pick_alpha(x, uv_tx_size); - } else { - continue; - } + const TX_SIZE uv_tx_size = av1_get_uv_tx_size(mbmi, &xd->plane[1]); + cfl_alpha_rate = cfl_rd_pick_alpha(x, uv_tx_size); } #endif #if CONFIG_EXT_INTRA @@ -6124,9 +6116,11 @@ static void choose_intra_uv_mode(const AV1_COMP *const cpi, MACROBLOCK *const x, int *rate_uv, int *rate_uv_tokenonly, int64_t *dist_uv, int *skip_uv, UV_PREDICTION_MODE *mode_uv) { + MACROBLOCKD *xd = &x->e_mbd; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; // Use an estimated rd for uv_intra based on DC_PRED if the // appropriate speed flag is set. - init_sbuv_mode(&x->e_mbd.mi[0]->mbmi); + init_sbuv_mode(mbmi); #if CONFIG_CB4X4 #if !CONFIG_CHROMA_2X2 if (x->skip_chroma_rd) { @@ -6137,15 +6131,34 @@ static void choose_intra_uv_mode(const AV1_COMP *const cpi, MACROBLOCK *const x, *mode_uv = UV_DC_PRED; return; } - bsize = scale_chroma_bsize(bsize, x->e_mbd.plane[AOM_PLANE_U].subsampling_x, - x->e_mbd.plane[AOM_PLANE_U].subsampling_y); + bsize = scale_chroma_bsize(bsize, xd->plane[AOM_PLANE_U].subsampling_x, + xd->plane[AOM_PLANE_U].subsampling_y); #endif // !CONFIG_CHROMA_2X2 +#if CONFIG_CFL + // Only store reconstructed luma when there's chroma RDO. When there's no + // chroma RDO, the reconstructed luma will be stored in encode_superblock(). + xd->cfl->store_y = !x->skip_chroma_rd; +#endif // CONFIG_CFL #else bsize = bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize; +#if CONFIG_CFL + xd->cfl->store_y = 1; +#endif // CONFIG_CFL #endif // CONFIG_CB4X4 +#if CONFIG_CFL + if (xd->cfl->store_y) { + // Perform one extra call to txfm_rd_in_plane(), with the values chosen + // during luma RDO, so we can store reconstructed luma values + RD_STATS this_rd_stats; + txfm_rd_in_plane(x, cpi, &this_rd_stats, INT64_MAX, AOM_PLANE_Y, + mbmi->sb_type, mbmi->tx_size, + cpi->sf.use_fast_coef_costing); + xd->cfl->store_y = 0; + } +#endif // CONFIG_CFL rd_pick_intra_sbuv_mode(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv, bsize, max_tx_size); - *mode_uv = x->e_mbd.mi[0]->mbmi.uv_mode; + *mode_uv = mbmi->uv_mode; } static int cost_mv_ref(const MACROBLOCK *const x, PREDICTION_MODE mode, @@ -9933,23 +9946,17 @@ void av1_rd_pick_intra_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x, if (intra_yrd < best_rd) { #if CONFIG_CFL - // Perform one extra txfm_rd_in_plane() call, this time with the best value - // so we can store reconstructed luma values - RD_STATS this_rd_stats; - #if CONFIG_CB4X4 - // Don't store the luma value if no chroma is associated. - // Don't worry, we will store this reconstructed luma in the following - // encode dry-run the chroma plane will never know. - // TODO(ltrudeau) Delete frame type check (only used to test key-frame only - // CfL) - xd->cfl->store_y = !x->skip_chroma_rd && cm->frame_type == KEY_FRAME; -#else - // TODO(ltrudeau) Delete frame type check (only used to test key-frame only - // CfL) - xd->cfl->store_y = cm->frame_type == KEY_FRAME; + // Only store reconstructed luma when there's chroma RDO. When there's no + // chroma RDO, the reconstructed luma will be stored in encode_superblock(). + xd->cfl->store_y = !x->skip_chroma_rd; +#else + xd->cfl->store_y = 1; #endif // CONFIG_CB4X4 if (xd->cfl->store_y) { + // Perform one extra call to txfm_rd_in_plane(), with the values chosen + // during luma RDO, so we can store reconstructed luma values + RD_STATS this_rd_stats; txfm_rd_in_plane(x, cpi, &this_rd_stats, INT64_MAX, AOM_PLANE_Y, mbmi->sb_type, mbmi->tx_size, cpi->sf.use_fast_coef_costing);