diff --git a/av1/common/cfl.c b/av1/common/cfl.c index aa9a55710234d6d3b4d834608afde62cc1f21aac..0feabe4b4b5636959231c11ac04cab57306a567f 100644 --- a/av1/common/cfl.c +++ b/av1/common/cfl.c @@ -24,11 +24,97 @@ void cfl_init(CFL_CTX *cfl, AV1_COMMON *cm) { memset(&cfl->y_pix, 0, sizeof(uint8_t) * MAX_SB_SQUARE); cfl->subsampling_x = cm->subsampling_x; cfl->subsampling_y = cm->subsampling_y; + cfl->are_parameters_computed = 0; +} + +// Load from the CfL pixel buffer into output +static void cfl_load(CFL_CTX *cfl, int row, int col, int width, int height) { + const int sub_x = cfl->subsampling_x; + const int sub_y = cfl->subsampling_y; + const int off_log2 = tx_size_wide_log2[0]; + + // TODO(ltrudeau) convert to uint16 to add HBD support + const uint8_t *y_pix; + // TODO(ltrudeau) convert to uint16 to add HBD support + uint8_t *output = cfl->y_down_pix; + + int pred_row_offset = 0; + int output_row_offset = 0; + + // TODO(ltrudeau) should be faster to downsample when we store the values + // TODO(ltrudeau) add support for 4:2:2 + if (sub_y == 0 && sub_x == 0) { + y_pix = &cfl->y_pix[(row * MAX_SB_SIZE + col) << off_log2]; + for (int j = 0; j < height; j++) { + for (int i = 0; i < width; i++) { + // In 4:4:4, pixels match 1 to 1 + output[output_row_offset + i] = y_pix[pred_row_offset + i]; + } + pred_row_offset += MAX_SB_SIZE; + output_row_offset += MAX_SB_SIZE; + } + } else if (sub_y == 1 && sub_x == 1) { + y_pix = &cfl->y_pix[(row * MAX_SB_SIZE + col) << (off_log2 + sub_y)]; + for (int j = 0; j < height; j++) { + for (int i = 0; i < width; i++) { + int top_left = (pred_row_offset + i) << sub_y; + int bot_left = top_left + MAX_SB_SIZE; + // In 4:2:0, average pixels in 2x2 grid + output[output_row_offset + i] = OD_SHR_ROUND( + y_pix[top_left] + y_pix[top_left + 1] // Top row + + y_pix[bot_left] + y_pix[bot_left + 1] // Bottom row + , + 2); + } + pred_row_offset += MAX_SB_SIZE; + output_row_offset += MAX_SB_SIZE; + } + } else { + assert(0); // Unsupported chroma subsampling + } + // Due to frame boundary issues, it is possible that the total area of + // covered by Chroma exceeds that of Luma. When this happens, we write over + // the broken data by repeating the last columns and/or rows. + // + // Note that in order to manage the case where both rows and columns + // overrun, + // we apply rows first. This way, when the rows overrun the bottom of the + // frame, the columns will be copied over them. + const int uv_width = (col << off_log2) + width; + const int uv_height = (row << off_log2) + height; + + const int diff_width = uv_width - (cfl->y_width >> sub_x); + const int diff_height = uv_height - (cfl->y_height >> sub_y); + + if (diff_width > 0) { + int last_pixel; + output_row_offset = width - diff_width; + + for (int j = 0; j < height; j++) { + last_pixel = output_row_offset - 1; + for (int i = 0; i < diff_width; i++) { + output[output_row_offset + i] = output[last_pixel]; + } + output_row_offset += MAX_SB_SIZE; + } + } + + if (diff_height > 0) { + output_row_offset = (height - diff_height) * MAX_SB_SIZE; + const int last_row_offset = output_row_offset - MAX_SB_SIZE; + + for (int j = 0; j < diff_height; j++) { + for (int i = 0; i < width; i++) { + output[output_row_offset + i] = output[last_row_offset + i]; + } + output_row_offset += MAX_SB_SIZE; + } + } } // CfL computes its own block-level DC_PRED. This is required to compute both // alpha_cb and alpha_cr before the prediction are computed. -void cfl_dc_pred(MACROBLOCKD *xd, int width, int height) { +static void cfl_dc_pred(MACROBLOCKD *xd) { const struct macroblockd_plane *const pd_u = &xd->plane[AOM_PLANE_U]; const struct macroblockd_plane *const pd_v = &xd->plane[AOM_PLANE_V]; @@ -38,6 +124,9 @@ void cfl_dc_pred(MACROBLOCKD *xd, int width, int height) { const int dst_u_stride = pd_u->dst.stride; const int dst_v_stride = pd_v->dst.stride; + CFL_CTX *const cfl = xd->cfl; + const int width = cfl->uv_width; + const int height = cfl->uv_height; // Number of pixel on the top and left borders. const double num_pel = width + height; @@ -83,37 +172,70 @@ void cfl_dc_pred(MACROBLOCKD *xd, int width, int height) { sum_v += height * 129; } - xd->cfl->dc_pred[CFL_PRED_U] = sum_u / num_pel; - xd->cfl->dc_pred[CFL_PRED_V] = sum_v / num_pel; + cfl->dc_pred[CFL_PRED_U] = sum_u / num_pel; + cfl->dc_pred[CFL_PRED_V] = sum_v / num_pel; } -double cfl_compute_average(uint8_t *y_pix, int y_stride, int width, - int height) { +static void cfl_compute_average(CFL_CTX *cfl) { + const int width = cfl->uv_width; + const int height = cfl->uv_height; + const double num_pel = width * height; + // TODO(ltrudeau) Convert to uint16 for HBD support + const uint8_t *y_pix = cfl->y_down_pix; + // TODO(ltrudeau) Convert to uint16 for HBD support + + cfl_load(cfl, 0, 0, width, height); + int sum = 0; for (int j = 0; j < height; j++) { for (int i = 0; i < width; i++) { sum += y_pix[i]; } - y_pix += y_stride; + y_pix += MAX_SB_SIZE; + } + cfl->y_average = sum / num_pel; +} + +static INLINE double cfl_idx_to_alpha(int alpha_idx, CFL_SIGN_TYPE alpha_sign, + CFL_PRED_TYPE pred_type) { + const int mag_idx = cfl_alpha_codes[alpha_idx][pred_type]; + const double abs_alpha = cfl_alpha_mags[mag_idx]; + if (alpha_sign == CFL_SIGN_POS) { + return abs_alpha; + } else { + assert(abs_alpha != 0.0); + assert(cfl_alpha_mags[mag_idx + 1] == -abs_alpha); + return -abs_alpha; } - return sum / (double)(width * height); } // Predict the current transform block using CfL. -void cfl_predict_block(const CFL_CTX *cfl, uint8_t *dst, int dst_stride, - int row, int col, TX_SIZE tx_size, double dc_pred, - double alpha) { +void cfl_predict_block(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride, + int row, int col, TX_SIZE tx_size, int plane) { + CFL_CTX *const cfl = xd->cfl; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; + + // CfL parameters must be computed before prediction can be done. + assert(cfl->are_parameters_computed == 1); + const int width = tx_size_wide[tx_size]; const int height = tx_size_high[tx_size]; - const double y_avg = cfl->y_avg; + // TODO(ltrudeau) Convert to uint16 to support HBD + const uint8_t *y_pix = cfl->y_down_pix; + + const double dc_pred = cfl->dc_pred[plane - 1]; + const double alpha = cfl_idx_to_alpha( + mbmi->cfl_alpha_idx, mbmi->cfl_alpha_signs[plane - 1], plane - 1); - cfl_load(cfl, dst, dst_stride, row, col, width, height); + const double avg = cfl->y_average; + cfl_load(cfl, row, col, width, height); for (int j = 0; j < height; j++) { for (int i = 0; i < width; i++) { - dst[i] = (uint8_t)(alpha * (dst[i] - y_avg) + dc_pred + 0.5); + dst[i] = (uint8_t)(alpha * (y_pix[i] - avg) + dc_pred + 0.5); } dst += dst_stride; + y_pix += MAX_SB_SIZE; } } @@ -130,6 +252,7 @@ void cfl_store(CFL_CTX *cfl, const uint8_t *input, int input_stride, int row, assert(MAX_SB_SIZE * (row + tx_height - 1) + col + tx_width - 1 < MAX_SB_SQUARE); + // TODO(ltrudeau) Speedup possible by moving the downsampling to cfl_store for (int j = 0; j < tx_height; j++) { for (int i = 0; i < tx_width; i++) { y_pix[i] = input[i]; @@ -148,85 +271,34 @@ void cfl_store(CFL_CTX *cfl, const uint8_t *input, int input_stride, int row, cfl->y_width = OD_MAXI((col << tx_off_log2) + tx_width, cfl->y_width); cfl->y_height = OD_MAXI((row << tx_off_log2) + tx_height, cfl->y_height); } -} - -// Load from the CfL pixel buffer into output -void cfl_load(const CFL_CTX *cfl, uint8_t *output, int output_stride, int row, - int col, int width, int height) { - const int sub_x = cfl->subsampling_x; - const int sub_y = cfl->subsampling_y; - const int off_log2 = tx_size_wide_log2[0]; - - const uint8_t *y_pix; - - int pred_row_offset = 0; - int output_row_offset = 0; - // TODO(ltrudeau) add support for 4:2:2 - if (sub_y == 0 && sub_x == 0) { - y_pix = &cfl->y_pix[(row * MAX_SB_SIZE + col) << off_log2]; - for (int j = 0; j < height; j++) { - for (int i = 0; i < width; i++) { - // In 4:4:4, pixels match 1 to 1 - output[output_row_offset + i] = y_pix[pred_row_offset + i]; - } - pred_row_offset += MAX_SB_SIZE; - output_row_offset += output_stride; - } - } else if (sub_y == 1 && sub_x == 1) { - y_pix = &cfl->y_pix[(row * MAX_SB_SIZE + col) << (off_log2 + sub_y)]; - for (int j = 0; j < height; j++) { - for (int i = 0; i < width; i++) { - int top_left = (pred_row_offset + i) << sub_y; - int bot_left = top_left + MAX_SB_SIZE; - // In 4:2:0, average pixels in 2x2 grid - output[output_row_offset + i] = OD_SHR_ROUND( - y_pix[top_left] + y_pix[top_left + 1] // Top row - + y_pix[bot_left] + y_pix[bot_left + 1] // Bottom row - , - 2); - } - pred_row_offset += MAX_SB_SIZE; - output_row_offset += output_stride; - } - } else { - assert(0); // Unsupported chroma subsampling - } - // Due to frame boundary issues, it is possible that the total area of - // covered by Chroma exceeds that of Luma. When this happens, we write over - // the broken data by repeating the last columns and/or rows. - // - // Note that in order to manage the case where both rows and columns - // overrun, - // we apply rows first. This way, when the rows overrun the bottom of the - // frame, the columns will be copied over them. - const int uv_width = (col << off_log2) + width; - const int uv_height = (row << off_log2) + height; + // Invalidate current parameters + cfl->are_parameters_computed = 0; +} - const int diff_width = uv_width - (cfl->y_width >> sub_x); - const int diff_height = uv_height - (cfl->y_height >> sub_y); +void cfl_compute_parameters(MACROBLOCKD *const xd, TX_SIZE tx_size) { + CFL_CTX *const cfl = xd->cfl; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; - if (diff_width > 0) { - int last_pixel; - output_row_offset = width - diff_width; + // Do not call cfl_compute_parameters multiple time on the same values. + assert(cfl->are_parameters_computed == 0); - for (int j = 0; j < height; j++) { - last_pixel = output_row_offset - 1; - for (int i = 0; i < diff_width; i++) { - output[output_row_offset + i] = output[last_pixel]; - } - output_row_offset += output_stride; - } - } +#if CONFIG_CHROMA_SUB8X8 + const BLOCK_SIZE plane_bsize = AOMMAX( + BLOCK_4X4, get_plane_block_size(mbmi->sb_type, &xd->plane[AOM_PLANE_U])); +#else + const BLOCK_SIZE plane_bsize = + get_plane_block_size(mbmi->sb_type, &xd->plane[AOM_PLANE_U]); +#endif + // AOM_PLANE_U is used, but both planes will have the same sizes. + cfl->uv_width = max_intra_block_width(xd, plane_bsize, AOM_PLANE_U, tx_size); + cfl->uv_height = + max_intra_block_height(xd, plane_bsize, AOM_PLANE_U, tx_size); - if (diff_height > 0) { - output_row_offset = (height - diff_height) * output_stride; - const int last_row_offset = output_row_offset - output_stride; - for (int j = 0; j < diff_height; j++) { - for (int i = 0; i < width; i++) { - output[output_row_offset + i] = output[last_row_offset + i]; - } - output_row_offset += output_stride; - } - } + // Compute block-level DC_PRED for both chromatic planes. + // DC_PRED replaces beta in the linear model. + cfl_dc_pred(xd); + // Compute block-level average on reconstructed luma input. + cfl_compute_average(cfl); + cfl->are_parameters_computed = 1; } diff --git a/av1/common/cfl.h b/av1/common/cfl.h index dcc896d2610f5c99d1861fb38bac88a9c0c9a4bd..7c11c4bacf454624c5e7200224c6a6a6952ec12c 100644 --- a/av1/common/cfl.h +++ b/av1/common/cfl.h @@ -26,13 +26,25 @@ typedef struct macroblockd MACROBLOCKD; typedef struct { // Pixel buffer containing the luma pixels used as prediction for chroma + // TODO(ltrudeau) Convert to uint16 for HBD support uint8_t y_pix[MAX_SB_SQUARE]; + // Pixel buffer containing the downsampled luma pixels used as prediction for + // chroma + // TODO(ltrudeau) Convert to uint16 for HBD support + uint8_t y_down_pix[MAX_SB_SQUARE]; + // Height and width of the luma prediction block currently in the pixel buffer int y_height, y_width; + // Height and width of the chroma prediction block currently associated with + // this context + int uv_height, uv_width; + // Average of the luma reconstructed values over the entire prediction unit - double y_avg; + double y_average; + + int are_parameters_computed; // Chroma subsampling int subsampling_x, subsampling_y; @@ -57,30 +69,12 @@ static const int cfl_alpha_codes[CFL_ALPHABET_SIZE][CFL_PRED_PLANES] = { void cfl_init(CFL_CTX *cfl, AV1_COMMON *cm); -void cfl_dc_pred(MACROBLOCKD *xd, int width, int height); - -double cfl_compute_average(uint8_t *y_pix, int y_stride, int height, int width); - -static INLINE double cfl_idx_to_alpha(int alpha_idx, CFL_SIGN_TYPE alpha_sign, - CFL_PRED_TYPE pred_type) { - const int mag_idx = cfl_alpha_codes[alpha_idx][pred_type]; - const double abs_alpha = cfl_alpha_mags[mag_idx]; - if (alpha_sign == CFL_SIGN_POS) { - return abs_alpha; - } else { - assert(abs_alpha != 0.0); - assert(cfl_alpha_mags[mag_idx + 1] == -abs_alpha); - return -abs_alpha; - } -} - -void cfl_predict_block(const CFL_CTX *cfl, uint8_t *dst, int dst_stride, - int row, int col, TX_SIZE tx_size, double dc_pred, - double alpha); +void cfl_predict_block(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride, + int row, int col, TX_SIZE tx_size, int plane); void cfl_store(CFL_CTX *cfl, const uint8_t *input, int input_stride, int row, int col, TX_SIZE tx_size); -void cfl_load(const CFL_CTX *cfl, uint8_t *output, int output_stride, int row, - int col, int width, int height); +void cfl_compute_parameters(MACROBLOCKD *const xd, TX_SIZE tx_size); + #endif // AV1_COMMON_CFL_H_ diff --git a/av1/common/reconintra.c b/av1/common/reconintra.c index f5719beaa34b463f940c7018c43996ad86b3d8e2..723d3d9e54ba36661b81d917bf8fcd416738d899 100644 --- a/av1/common/reconintra.c +++ b/av1/common/reconintra.c @@ -2719,37 +2719,16 @@ void av1_predict_intra_block_facade(MACROBLOCKD *xd, int plane, int block_idx, mode, dst, dst_stride, dst, dst_stride, blk_col, blk_row, plane); #if CONFIG_CFL - CFL_CTX *const cfl = xd->cfl; if (plane != AOM_PLANE_Y && mbmi->uv_mode == DC_PRED) { if (plane == AOM_PLANE_U && blk_col == 0 && blk_row == 0) { -#if CONFIG_CHROMA_SUB8X8 - const BLOCK_SIZE plane_bsize = - AOMMAX(BLOCK_4X4, get_plane_block_size(mbmi->sb_type, pd)); -#else - const BLOCK_SIZE plane_bsize = get_plane_block_size(mbmi->sb_type, pd); -#endif - const int width = - max_intra_block_width(xd, plane_bsize, AOM_PLANE_U, tx_size); - const int height = - max_intra_block_height(xd, plane_bsize, AOM_PLANE_U, tx_size); - - // Temporary pixel buffer used to store the CfL prediction when we compute - // the average over the reconstructed and downsampled luma pixels - // TODO(ltrudeau) Convert to uint16 when adding HBD support - uint8_t tmp_pix[MAX_SB_SQUARE]; - - // Compute the block-level DC_PRED for both chromatic planes. DC_PRED - // replaces beta in the linear model. - cfl_dc_pred(xd, width, height); - cfl_load(cfl, tmp_pix, MAX_SB_SIZE, 0, 0, width, height); - cfl->y_avg = cfl_compute_average(tmp_pix, MAX_SB_SIZE, width, height); + // Avoid computing the CfL parameters twice, if they have already been + // computed in the encoder_facade + if (!xd->cfl->are_parameters_computed) + cfl_compute_parameters(xd, tx_size); } - cfl_predict_block( - cfl, dst, pd->dst.stride, blk_row, blk_col, tx_size, - cfl->dc_pred[plane - 1], - cfl_idx_to_alpha(mbmi->cfl_alpha_idx, mbmi->cfl_alpha_signs[plane - 1], - plane - 1)); + cfl_predict_block(xd, dst, pd->dst.stride, blk_row, blk_col, tx_size, + plane); } #endif } diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c index 4f8e0cd620fc048c398c71a986984d2a0cb8fc96..1a6098580dc702e55f5c11bf0873d0d7c90196ba 100644 --- a/av1/encoder/encodemb.c +++ b/av1/encoder/encodemb.c @@ -1364,8 +1364,22 @@ void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col, FRAME_CONTEXT *const ec_ctx = cm->fc; #endif // CONFIG_EC_ADAPT +#if CONFIG_DEBUG + const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; +// av1_predict_intra_block_facade does not pass plane_bsize, we need to validate +// that we will get the same value of plane_bsize on the other side. +#if CONFIG_CHROMA_SUB8X8 + const BLOCK_SIZE plane_bsize_val = + AOMMAX(BLOCK_4X4, get_plane_block_size(mbmi->sb_type, &xd->plane[plane])); +#else + const BLOCK_SIZE plane_bsize_val = + get_plane_block_size(mbmi->sb_type, &xd->plane[plane]); +#endif // CONFIG_CHROMA_SUB8X8 + assert(plane_bsize == plane_bsize_val); +#endif // CONFIG_DEBUG + av1_predict_intra_block_encoder_facade(x, ec_ctx, plane, block, blk_col, - blk_row, tx_size, plane_bsize); + blk_row, tx_size); #else av1_predict_intra_block_facade(xd, plane, block, blk_col, blk_row, tx_size); #endif @@ -1418,10 +1432,11 @@ void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col, } #if CONFIG_CFL -static int cfl_alpha_dist(const uint8_t *y_pix, int y_stride, double y_avg, - const uint8_t *src, int src_stride, int width, - int height, TX_SIZE tx_size, double dc_pred, - double alpha, int *dist_neg_out) { +static int cfl_alpha_dist(const uint8_t *y_pix, int y_stride, + const double y_average, const uint8_t *src, + int src_stride, int width, int height, + TX_SIZE tx_size, double dc_pred, double alpha, + int *dist_neg_out) { const double dc_pred_bias = dc_pred + 0.5; int dist = 0; int diff; @@ -1444,6 +1459,8 @@ static int cfl_alpha_dist(const uint8_t *y_pix, int y_stride, double y_avg, int dist_neg = 0; const int tx_height = tx_size_high[tx_size]; const int tx_width = tx_size_wide[tx_size]; + const int y_block_row_off = y_stride * tx_height; + const int src_block_row_off = src_stride * tx_height; const uint8_t *t_y_pix; const uint8_t *t_src; for (int b_j = 0; b_j < height; b_j += tx_height) { @@ -1454,7 +1471,7 @@ static int cfl_alpha_dist(const uint8_t *y_pix, int y_stride, double y_avg, t_src = src; for (int t_j = b_j; t_j < h; t_j++) { for (int t_i = b_i; t_i < w; t_i++) { - const double scaled_luma = alpha * (t_y_pix[t_i] - y_avg); + const double scaled_luma = alpha * (t_y_pix[t_i] - y_average); const int uv = t_src[t_i]; diff = uv - (int)(scaled_luma + dc_pred_bias); dist += diff * diff; @@ -1465,8 +1482,8 @@ static int cfl_alpha_dist(const uint8_t *y_pix, int y_stride, double y_avg, t_src += src_stride; } } - y_pix += y_stride * tx_height; - src += src_stride * tx_height; + y_pix += y_block_row_off; + src += src_block_row_off; } if (dist_neg_out) *dist_neg_out = dist_neg; @@ -1474,35 +1491,64 @@ static int cfl_alpha_dist(const uint8_t *y_pix, int y_stride, double y_avg, return dist; } -static int cfl_compute_alpha_ind(MACROBLOCK *const x, const CFL_CTX *const cfl, - int width, int height, TX_SIZE tx_size, - uint8_t y_pix[MAX_SB_SQUARE], - CFL_SIGN_TYPE signs_out[CFL_SIGNS]) { +static inline void cfl_update_costs(CFL_CTX *cfl, FRAME_CONTEXT *ec_ctx) { + assert(ec_ctx->cfl_alpha_cdf[CFL_ALPHABET_SIZE - 1] == + AOM_ICDF(CDF_PROB_TOP)); + const int prob_den = CDF_PROB_TOP; + + int prob_num = AOM_ICDF(ec_ctx->cfl_alpha_cdf[0]); + cfl->costs[0] = av1_cost_zero(get_prob(prob_num, prob_den)); + + for (int c = 1; c < CFL_ALPHABET_SIZE; c++) { + int sign_bit_cost = (cfl_alpha_codes[c][CFL_PRED_U] != 0) + + (cfl_alpha_codes[c][CFL_PRED_V] != 0); + prob_num = AOM_ICDF(ec_ctx->cfl_alpha_cdf[c]) - + AOM_ICDF(ec_ctx->cfl_alpha_cdf[c - 1]); + cfl->costs[c] = av1_cost_zero(get_prob(prob_num, prob_den)) + + av1_cost_literal(sign_bit_cost); + } +} + +static void cfl_compute_alpha_ind(MACROBLOCK *const x, FRAME_CONTEXT *ec_ctx, + TX_SIZE tx_size) { const struct macroblock_plane *const p_u = &x->plane[AOM_PLANE_U]; const struct macroblock_plane *const p_v = &x->plane[AOM_PLANE_V]; const uint8_t *const src_u = p_u->src.buf; const uint8_t *const src_v = p_v->src.buf; const int src_stride_u = p_u->src.stride; const int src_stride_v = p_v->src.stride; + + MACROBLOCKD *const xd = &x->e_mbd; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; + + CFL_CTX *const cfl = xd->cfl; + cfl_compute_parameters(xd, tx_size); + const int width = cfl->uv_width; + const int height = cfl->uv_height; const double dc_pred_u = cfl->dc_pred[CFL_PRED_U]; const double dc_pred_v = cfl->dc_pred[CFL_PRED_V]; - const double y_avg = cfl->y_avg; + const double y_average = cfl->y_average; + const uint8_t *y_pix = cfl->y_down_pix; + + CFL_SIGN_TYPE *signs = mbmi->cfl_alpha_signs; + + cfl_update_costs(cfl, ec_ctx); int sse[CFL_PRED_PLANES][CFL_MAGS_SIZE]; sse[CFL_PRED_U][0] = - cfl_alpha_dist(y_pix, MAX_SB_SIZE, y_avg, src_u, src_stride_u, width, + cfl_alpha_dist(y_pix, MAX_SB_SIZE, y_average, src_u, src_stride_u, width, height, tx_size, dc_pred_u, 0, NULL); sse[CFL_PRED_V][0] = - cfl_alpha_dist(y_pix, MAX_SB_SIZE, y_avg, src_v, src_stride_v, width, + cfl_alpha_dist(y_pix, MAX_SB_SIZE, y_average, src_v, src_stride_v, width, height, tx_size, dc_pred_v, 0, NULL); for (int m = 1; m < CFL_MAGS_SIZE; m += 2) { assert(cfl_alpha_mags[m + 1] == -cfl_alpha_mags[m]); sse[CFL_PRED_U][m] = cfl_alpha_dist( - y_pix, MAX_SB_SIZE, y_avg, src_u, src_stride_u, width, height, tx_size, - dc_pred_u, cfl_alpha_mags[m], &sse[CFL_PRED_U][m + 1]); + y_pix, MAX_SB_SIZE, y_average, src_u, src_stride_u, width, height, + tx_size, dc_pred_u, cfl_alpha_mags[m], &sse[CFL_PRED_U][m + 1]); sse[CFL_PRED_V][m] = cfl_alpha_dist( - y_pix, MAX_SB_SIZE, y_avg, src_v, src_stride_v, width, height, tx_size, - dc_pred_v, cfl_alpha_mags[m], &sse[CFL_PRED_V][m + 1]); + y_pix, MAX_SB_SIZE, y_average, src_v, src_stride_v, width, height, + tx_size, dc_pred_v, cfl_alpha_mags[m], &sse[CFL_PRED_V][m + 1]); } int dist; @@ -1512,8 +1558,8 @@ static int cfl_compute_alpha_ind(MACROBLOCK *const x, const CFL_CTX *const cfl, // Compute least squares parameter of the entire block // IMPORTANT: We assume that the first code is 0,0 int ind = 0; - signs_out[CFL_PRED_U] = CFL_SIGN_POS; - signs_out[CFL_PRED_V] = CFL_SIGN_POS; + signs[CFL_PRED_U] = CFL_SIGN_POS; + signs[CFL_PRED_V] = CFL_SIGN_POS; dist = sse[CFL_PRED_U][0] + sse[CFL_PRED_V][0]; dist *= 16; @@ -1531,72 +1577,27 @@ static int cfl_compute_alpha_ind(MACROBLOCK *const x, const CFL_CTX *const cfl, if (cost < best_cost) { best_cost = cost; ind = c; - signs_out[CFL_PRED_U] = sign_u; - signs_out[CFL_PRED_V] = sign_v; + signs[CFL_PRED_U] = sign_u; + signs[CFL_PRED_V] = sign_v; } } } } - return ind; -} - -static inline void cfl_update_costs(CFL_CTX *cfl, FRAME_CONTEXT *ec_ctx) { - assert(ec_ctx->cfl_alpha_cdf[CFL_ALPHABET_SIZE - 1] == - AOM_ICDF(CDF_PROB_TOP)); - const int prob_den = CDF_PROB_TOP; - - int prob_num = AOM_ICDF(ec_ctx->cfl_alpha_cdf[0]); - cfl->costs[0] = av1_cost_zero(get_prob(prob_num, prob_den)); - - for (int c = 1; c < CFL_ALPHABET_SIZE; c++) { - int sign_bit_cost = (cfl_alpha_codes[c][CFL_PRED_U] != 0) + - (cfl_alpha_codes[c][CFL_PRED_V] != 0); - prob_num = AOM_ICDF(ec_ctx->cfl_alpha_cdf[c]) - - AOM_ICDF(ec_ctx->cfl_alpha_cdf[c - 1]); - cfl->costs[c] = av1_cost_zero(get_prob(prob_num, prob_den)) + - av1_cost_literal(sign_bit_cost); - } + mbmi->cfl_alpha_idx = ind; } void av1_predict_intra_block_encoder_facade(MACROBLOCK *x, FRAME_CONTEXT *ec_ctx, int plane, int block_idx, int blk_col, - int blk_row, TX_SIZE tx_size, - BLOCK_SIZE plane_bsize) { + int blk_row, TX_SIZE tx_size) { MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; if (plane != AOM_PLANE_Y && mbmi->uv_mode == DC_PRED) { if (blk_col == 0 && blk_row == 0 && plane == AOM_PLANE_U) { - const int width = - max_intra_block_width(xd, plane_bsize, AOM_PLANE_U, tx_size); - const int height = - max_intra_block_height(xd, plane_bsize, AOM_PLANE_U, tx_size); - - uint8_t tmp_pix[MAX_SB_SQUARE]; - CFL_CTX *const cfl = xd->cfl; - - cfl_update_costs(cfl, ec_ctx); - cfl_dc_pred(xd, width, height); - // Load CfL Prediction over the entire block - cfl_load(cfl, tmp_pix, MAX_SB_SIZE, 0, 0, width, height); - cfl->y_avg = cfl_compute_average(tmp_pix, MAX_SB_SIZE, width, height); - mbmi->cfl_alpha_idx = cfl_compute_alpha_ind( - x, cfl, width, height, tx_size, tmp_pix, mbmi->cfl_alpha_signs); + cfl_compute_alpha_ind(x, ec_ctx, tx_size); } } -#if CONFIG_DEBUG -// av1_predict_intra_block_facade does not pass plane_bsize, we need to validate -// that we will get the same value of plane_bsize on the other side. -#if CONFIG_CHROMA_SUB8X8 - const BLOCK_SIZE plane_bsize_val = - AOMMAX(BLOCK_4X4, get_plane_block_size(mbmi->sb_type, &xd->plane[plane])); -#else - const BLOCK_SIZE plane_bsize_val = - get_plane_block_size(mbmi->sb_type, &xd->plane[plane]); -#endif // CONFIG_CHROMA_SUB8X8 - assert(plane_bsize == plane_bsize_val); -#endif // CONFIG_DEBUG av1_predict_intra_block_facade(xd, plane, block_idx, blk_col, blk_row, tx_size); } diff --git a/av1/encoder/encodemb.h b/av1/encoder/encodemb.h index 35a2c157020d51e8672e6918260d63851be3037e..7292ce070bcd5fa7465e4c54b9fa5c3ff3f411f8 100644 --- a/av1/encoder/encodemb.h +++ b/av1/encoder/encodemb.h @@ -90,8 +90,7 @@ void av1_store_pvq_enc_info(PVQ_INFO *pvq_info, int *qg, int *theta, int *k, void av1_predict_intra_block_encoder_facade(MACROBLOCK *x, FRAME_CONTEXT *ec_ctx, int plane, int block_idx, int blk_col, - int blk_row, TX_SIZE tx_size, - BLOCK_SIZE plane_bsize); + int blk_row, TX_SIZE tx_size); #endif #if CONFIG_DPCM_INTRA diff --git a/av1/encoder/rdopt.c b/av1/encoder/rdopt.c index 79fe13eb3487b767d4cce97ccfbc13f43f286dbe..0729dbe114ce0c9f313ad2cd245f1ff0f9593e19 100644 --- a/av1/encoder/rdopt.c +++ b/av1/encoder/rdopt.c @@ -1755,8 +1755,20 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col, FRAME_CONTEXT *const ec_ctx = cm->fc; #endif // CONFIG_EC_ADAPT +#if CONFIG_DEBUG +// av1_predict_intra_block_facade does not pass plane_bsize, we need to validate +// that we will get the same value of plane_bsize on the other side. +#if CONFIG_CHROMA_SUB8X8 + const BLOCK_SIZE plane_bsize_val = AOMMAX( + BLOCK_4X4, get_plane_block_size(mbmi->sb_type, &xd->plane[plane])); +#else + const BLOCK_SIZE plane_bsize_val = + get_plane_block_size(mbmi->sb_type, &xd->plane[plane]); +#endif // CONFIG_CHROMA_SUB8X8 + assert(plane_bsize == plane_bsize_val); +#endif // CONFIG_DEBUG av1_predict_intra_block_encoder_facade(x, ec_ctx, plane, block, blk_col, - blk_row, tx_size, plane_bsize); + blk_row, tx_size); #else av1_predict_intra_block_facade(xd, plane, block, blk_col, blk_row, tx_size); #endif @@ -2631,9 +2643,6 @@ static int64_t intra_model_yrd(const AV1_COMP *const cpi, MACROBLOCK *const x, for (row = 0; row < max_blocks_high; row += stepr) { for (col = 0; col < max_blocks_wide; col += stepc) { #if CONFIG_CFL - const struct macroblockd_plane *const pd = &xd->plane[0]; - const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); - #if CONFIG_EC_ADAPT FRAME_CONTEXT *const ec_ctx = xd->tile_ctx; #else @@ -2641,7 +2650,7 @@ static int64_t intra_model_yrd(const AV1_COMP *const cpi, MACROBLOCK *const x, #endif // CONFIG_EC_ADAPT av1_predict_intra_block_encoder_facade(x, ec_ctx, 0, block, col, row, - tx_size, plane_bsize); + tx_size); #else av1_predict_intra_block_facade(xd, 0, block, col, row, tx_size); #endif