Commit 2510f64e authored by David Michael Barr's avatar David Michael Barr

[CFL] Move alpha picking code to rdopt.c

This simplifies the path from rd_pick_intra_sbuv_mode()

Results on Subset1 (compared to  dff41923 with CfL enabled)

  PSNR | PSNR Cb | PSNR Cr | PSNR HVS |   SSIM | MS SSIM | CIEDE 2000
0.0000 |  0.0000 |  0.0000 |   0.0000 | 0.0000 |  0.0000 |     0.0000

Change-Id: I0bade9d347c626a78ba7077b960afdb318ecca69
Signed-off-by: default avatarDavid Michael Barr <b@rr-dav.id.au>
parent eeb08a9b
......@@ -1351,29 +1351,8 @@ void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col,
const int dst_stride = pd->dst.stride;
uint8_t *dst =
&pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]];
#if CONFIG_CFL
FRAME_CONTEXT *const ec_ctx = xd->tile_ctx;
#if CONFIG_DEBUG
const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
// av1_predict_intra_block_facade does not pass plane_bsize, we need to validate
// that we will get the same value of plane_bsize on the other side.
#if CONFIG_CHROMA_SUB8X8
const BLOCK_SIZE plane_bsize_val =
AOMMAX(BLOCK_4X4, get_plane_block_size(mbmi->sb_type, &xd->plane[plane]));
#else
const BLOCK_SIZE plane_bsize_val =
get_plane_block_size(mbmi->sb_type, &xd->plane[plane]);
#endif // CONFIG_CHROMA_SUB8X8
assert(plane_bsize == plane_bsize_val);
#endif // CONFIG_DEBUG
av1_predict_intra_block_encoder_facade(x, ec_ctx, plane, block, blk_col,
blk_row, tx_size);
#else
av1_predict_intra_block_facade(xd, plane, block, blk_col, blk_row, tx_size);
#endif
#if CONFIG_DPCM_INTRA || CONFIG_LGT
const PREDICTION_MODE mode =
......@@ -1429,184 +1408,6 @@ void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col,
#endif
}
#if CONFIG_CFL
static int cfl_alpha_dist(const uint8_t *y_pix, int y_stride,
const int y_averages_q3[MAX_NUM_TXB],
const uint8_t *src, int src_stride, int width,
int height, TX_SIZE tx_size, int dc_pred,
int alpha_q3, int *dist_neg_out) {
int dist = 0;
int diff;
if (alpha_q3 == 0) {
for (int j = 0; j < height; j++) {
for (int i = 0; i < width; i++) {
diff = src[i] - dc_pred;
dist += diff * diff;
}
src += src_stride;
}
if (dist_neg_out) *dist_neg_out = dist;
return dist;
}
int dist_neg = 0;
const int tx_height = tx_size_high[tx_size];
const int tx_width = tx_size_wide[tx_size];
const int y_block_row_off = y_stride * tx_height;
const int src_block_row_off = src_stride * tx_height;
const uint8_t *t_y_pix;
const uint8_t *t_src;
int a = 0;
for (int b_j = 0; b_j < height; b_j += tx_height) {
const int h = b_j + tx_height;
for (int b_i = 0; b_i < width; b_i += tx_width) {
const int w = b_i + tx_width;
const int tx_avg_q3 = y_averages_q3[a++];
t_y_pix = y_pix;
t_src = src;
for (int t_j = b_j; t_j < h; t_j++) {
for (int t_i = b_i; t_i < w; t_i++) {
const int uv = t_src[t_i];
const int scaled_luma =
get_scaled_luma_q0(alpha_q3, t_y_pix[t_i], tx_avg_q3);
// TODO(ltrudeau) add support for HBD.
diff = uv - clamp(scaled_luma + dc_pred, 0, 255);
dist += diff * diff;
// TODO(ltrudeau) add support for HBD.
diff = uv - clamp(-scaled_luma + dc_pred, 0, 255);
dist_neg += diff * diff;
}
t_y_pix += y_stride;
t_src += src_stride;
}
}
y_pix += y_block_row_off;
src += src_block_row_off;
}
if (dist_neg_out) *dist_neg_out = dist_neg;
return dist;
}
static inline void cfl_update_costs(CFL_CTX *cfl, FRAME_CONTEXT *ec_ctx) {
assert(ec_ctx->cfl_alpha_cdf[CFL_ALPHABET_SIZE - 1] ==
AOM_ICDF(CDF_PROB_TOP));
aom_cdf_prob prev_cdf = 0;
for (int c = 0; c < CFL_ALPHABET_SIZE; c++) {
const int sign_bit_cost = (cfl_alpha_codes[c][CFL_PRED_U] != 0) +
(cfl_alpha_codes[c][CFL_PRED_V] != 0);
aom_cdf_prob prob = AOM_ICDF(ec_ctx->cfl_alpha_cdf[c]) - prev_cdf;
prev_cdf = AOM_ICDF(ec_ctx->cfl_alpha_cdf[c]);
cfl->costs[c] = av1_cost_symbol(prob) + av1_cost_literal(sign_bit_cost);
}
}
static void cfl_compute_alpha_ind(MACROBLOCK *const x, FRAME_CONTEXT *ec_ctx,
TX_SIZE tx_size) {
const struct macroblock_plane *const p_u = &x->plane[AOM_PLANE_U];
const struct macroblock_plane *const p_v = &x->plane[AOM_PLANE_V];
const uint8_t *const src_u = p_u->src.buf;
const uint8_t *const src_v = p_v->src.buf;
const int src_stride_u = p_u->src.stride;
const int src_stride_v = p_v->src.stride;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
CFL_CTX *const cfl = xd->cfl;
cfl_compute_parameters(xd, tx_size);
const int width = cfl->uv_width;
const int height = cfl->uv_height;
const int dc_pred_u = cfl->dc_pred[CFL_PRED_U];
const int dc_pred_v = cfl->dc_pred[CFL_PRED_V];
const int *y_averages_q3 = cfl->y_averages_q3;
const uint8_t *y_pix = cfl->y_down_pix;
CFL_SIGN_TYPE *signs = mbmi->cfl_alpha_signs;
cfl_update_costs(cfl, ec_ctx);
int sse[CFL_PRED_PLANES][CFL_MAGS_SIZE];
sse[CFL_PRED_U][0] =
cfl_alpha_dist(y_pix, MAX_SB_SIZE, y_averages_q3, src_u, src_stride_u,
width, height, tx_size, dc_pred_u, 0, NULL);
sse[CFL_PRED_V][0] =
cfl_alpha_dist(y_pix, MAX_SB_SIZE, y_averages_q3, src_v, src_stride_v,
width, height, tx_size, dc_pred_v, 0, NULL);
for (int m = 1; m < CFL_MAGS_SIZE; m += 2) {
assert(cfl_alpha_mags_q3[m + 1] == -cfl_alpha_mags_q3[m]);
sse[CFL_PRED_U][m] = cfl_alpha_dist(
y_pix, MAX_SB_SIZE, y_averages_q3, src_u, src_stride_u, width, height,
tx_size, dc_pred_u, cfl_alpha_mags_q3[m], &sse[CFL_PRED_U][m + 1]);
sse[CFL_PRED_V][m] = cfl_alpha_dist(
y_pix, MAX_SB_SIZE, y_averages_q3, src_v, src_stride_v, width, height,
tx_size, dc_pred_v, cfl_alpha_mags_q3[m], &sse[CFL_PRED_V][m + 1]);
}
int dist;
int64_t cost;
int64_t best_cost;
// Compute least squares parameter of the entire block
// IMPORTANT: We assume that the first code is 0,0
int ind = 0;
signs[CFL_PRED_U] = CFL_SIGN_POS;
signs[CFL_PRED_V] = CFL_SIGN_POS;
dist = sse[CFL_PRED_U][0] + sse[CFL_PRED_V][0];
dist *= 16;
best_cost = RDCOST(x->rdmult, cfl->costs[0], dist);
for (int c = 1; c < CFL_ALPHABET_SIZE; c++) {
const int idx_u = cfl_alpha_codes[c][CFL_PRED_U];
const int idx_v = cfl_alpha_codes[c][CFL_PRED_V];
for (CFL_SIGN_TYPE sign_u = idx_u == 0; sign_u < CFL_SIGNS; sign_u++) {
for (CFL_SIGN_TYPE sign_v = idx_v == 0; sign_v < CFL_SIGNS; sign_v++) {
dist = sse[CFL_PRED_U][idx_u + (sign_u == CFL_SIGN_NEG)] +
sse[CFL_PRED_V][idx_v + (sign_v == CFL_SIGN_NEG)];
dist *= 16;
cost = RDCOST(x->rdmult, cfl->costs[c], dist);
if (cost < best_cost) {
best_cost = cost;
ind = c;
signs[CFL_PRED_U] = sign_u;
signs[CFL_PRED_V] = sign_v;
}
}
}
}
mbmi->cfl_alpha_idx = ind;
}
void av1_predict_intra_block_encoder_facade(MACROBLOCK *x,
FRAME_CONTEXT *ec_ctx, int plane,
int block_idx, int blk_col,
int blk_row, TX_SIZE tx_size) {
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
if (plane != AOM_PLANE_Y && mbmi->uv_mode == DC_PRED) {
if (blk_col == 0 && blk_row == 0 && plane == AOM_PLANE_U) {
cfl_compute_alpha_ind(x, ec_ctx, tx_size);
}
}
av1_predict_intra_block_facade(xd, plane, block_idx, blk_col, blk_row,
tx_size);
}
#endif
void av1_encode_intra_block_plane(AV1_COMMON *cm, MACROBLOCK *x,
BLOCK_SIZE bsize, int plane,
int enable_optimize_b, int mi_row,
......
......@@ -87,13 +87,6 @@ void av1_store_pvq_enc_info(PVQ_INFO *pvq_info, int *qg, int *theta, int *k,
int *size, int skip_rest, int skip_dir, int bs);
#endif
#if CONFIG_CFL
void av1_predict_intra_block_encoder_facade(MACROBLOCK *x,
FRAME_CONTEXT *ec_ctx, int plane,
int block_idx, int blk_col,
int blk_row, TX_SIZE tx_size);
#endif
#if CONFIG_DPCM_INTRA
void av1_encode_block_intra_dpcm(const AV1_COMMON *cm, MACROBLOCK *x,
PREDICTION_MODE mode, int plane, int block,
......
......@@ -1815,27 +1815,7 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
if (args->exit_early) return;
if (!is_inter_block(mbmi)) {
#if CONFIG_CFL
FRAME_CONTEXT *const ec_ctx = xd->tile_ctx;
#if CONFIG_DEBUG
// av1_predict_intra_block_facade does not pass plane_bsize, we need to validate
// that we will get the same value of plane_bsize on the other side.
#if CONFIG_CHROMA_SUB8X8
const BLOCK_SIZE plane_bsize_val = AOMMAX(
BLOCK_4X4, get_plane_block_size(mbmi->sb_type, &xd->plane[plane]));
#else
const BLOCK_SIZE plane_bsize_val =
get_plane_block_size(mbmi->sb_type, &xd->plane[plane]);
#endif // CONFIG_CHROMA_SUB8X8
assert(plane_bsize == plane_bsize_val);
#endif // CONFIG_DEBUG
av1_predict_intra_block_encoder_facade(x, ec_ctx, plane, block, blk_col,
blk_row, tx_size);
#else
av1_predict_intra_block_facade(xd, plane, block, blk_col, blk_row, tx_size);
#endif
#if CONFIG_DPCM_INTRA
const int block_raster_idx =
av1_block_index_to_raster_order(tx_size, block);
......@@ -2719,14 +2699,7 @@ static int64_t intra_model_yrd(const AV1_COMP *const cpi, MACROBLOCK *const x,
int block = 0;
for (row = 0; row < max_blocks_high; row += stepr) {
for (col = 0; col < max_blocks_wide; col += stepc) {
#if CONFIG_CFL
FRAME_CONTEXT *const ec_ctx = xd->tile_ctx;
av1_predict_intra_block_encoder_facade(x, ec_ctx, 0, block, col, row,
tx_size);
#else
av1_predict_intra_block_facade(xd, 0, block, col, row, tx_size);
#endif
block += step;
}
}
......@@ -5344,6 +5317,170 @@ static int rd_pick_intra_angle_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
}
#endif // CONFIG_EXT_INTRA
#if CONFIG_CFL
static int cfl_alpha_dist(const uint8_t *y_pix, int y_stride,
const int y_averages_q3[MAX_NUM_TXB],
const uint8_t *src, int src_stride, int width,
int height, TX_SIZE tx_size, int dc_pred,
int alpha_q3, int *dist_neg_out) {
int dist = 0;
int diff;
if (alpha_q3 == 0) {
for (int j = 0; j < height; j++) {
for (int i = 0; i < width; i++) {
diff = src[i] - dc_pred;
dist += diff * diff;
}
src += src_stride;
}
if (dist_neg_out) *dist_neg_out = dist;
return dist;
}
int dist_neg = 0;
const int tx_height = tx_size_high[tx_size];
const int tx_width = tx_size_wide[tx_size];
const int y_block_row_off = y_stride * tx_height;
const int src_block_row_off = src_stride * tx_height;
const uint8_t *t_y_pix;
const uint8_t *t_src;
int a = 0;
for (int b_j = 0; b_j < height; b_j += tx_height) {
const int h = b_j + tx_height;
for (int b_i = 0; b_i < width; b_i += tx_width) {
const int w = b_i + tx_width;
const int tx_avg_q3 = y_averages_q3[a++];
t_y_pix = y_pix;
t_src = src;
for (int t_j = b_j; t_j < h; t_j++) {
for (int t_i = b_i; t_i < w; t_i++) {
const int uv = t_src[t_i];
const int scaled_luma =
get_scaled_luma_q0(alpha_q3, t_y_pix[t_i], tx_avg_q3);
// TODO(ltrudeau) add support for HBD.
diff = uv - clamp(scaled_luma + dc_pred, 0, 255);
dist += diff * diff;
// TODO(ltrudeau) add support for HBD.
diff = uv - clamp(-scaled_luma + dc_pred, 0, 255);
dist_neg += diff * diff;
}
t_y_pix += y_stride;
t_src += src_stride;
}
}
y_pix += y_block_row_off;
src += src_block_row_off;
}
if (dist_neg_out) *dist_neg_out = dist_neg;
return dist;
}
static inline void cfl_update_costs(CFL_CTX *cfl, FRAME_CONTEXT *ec_ctx) {
assert(ec_ctx->cfl_alpha_cdf[CFL_ALPHABET_SIZE - 1] ==
AOM_ICDF(CDF_PROB_TOP));
aom_cdf_prob prev_cdf = 0;
for (int c = 0; c < CFL_ALPHABET_SIZE; c++) {
const int sign_bit_cost = (cfl_alpha_codes[c][CFL_PRED_U] != 0) +
(cfl_alpha_codes[c][CFL_PRED_V] != 0);
aom_cdf_prob prob = AOM_ICDF(ec_ctx->cfl_alpha_cdf[c]) - prev_cdf;
prev_cdf = AOM_ICDF(ec_ctx->cfl_alpha_cdf[c]);
cfl->costs[c] = av1_cost_symbol(prob) + av1_cost_literal(sign_bit_cost);
}
}
static int cfl_rd_pick_alpha(MACROBLOCK *const x, TX_SIZE tx_size) {
const struct macroblock_plane *const p_u = &x->plane[AOM_PLANE_U];
const struct macroblock_plane *const p_v = &x->plane[AOM_PLANE_V];
const uint8_t *const src_u = p_u->src.buf;
const uint8_t *const src_v = p_v->src.buf;
const int src_stride_u = p_u->src.stride;
const int src_stride_v = p_v->src.stride;
MACROBLOCKD *const xd = &x->e_mbd;
FRAME_CONTEXT *const ec_ctx = xd->tile_ctx;
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
CFL_CTX *const cfl = xd->cfl;
cfl_compute_parameters(xd, tx_size);
const int width = cfl->uv_width;
const int height = cfl->uv_height;
const int dc_pred_u = cfl->dc_pred[CFL_PRED_U];
const int dc_pred_v = cfl->dc_pred[CFL_PRED_V];
const int *y_averages_q3 = cfl->y_averages_q3;
const uint8_t *y_pix = cfl->y_down_pix;
CFL_SIGN_TYPE *signs = mbmi->cfl_alpha_signs;
cfl_update_costs(cfl, ec_ctx);
int sse[CFL_PRED_PLANES][CFL_MAGS_SIZE];
sse[CFL_PRED_U][0] =
cfl_alpha_dist(y_pix, MAX_SB_SIZE, y_averages_q3, src_u, src_stride_u,
width, height, tx_size, dc_pred_u, 0, NULL);
sse[CFL_PRED_V][0] =
cfl_alpha_dist(y_pix, MAX_SB_SIZE, y_averages_q3, src_v, src_stride_v,
width, height, tx_size, dc_pred_v, 0, NULL);
for (int m = 1; m < CFL_MAGS_SIZE; m += 2) {
assert(cfl_alpha_mags_q3[m + 1] == -cfl_alpha_mags_q3[m]);
sse[CFL_PRED_U][m] = cfl_alpha_dist(
y_pix, MAX_SB_SIZE, y_averages_q3, src_u, src_stride_u, width, height,
tx_size, dc_pred_u, cfl_alpha_mags_q3[m], &sse[CFL_PRED_U][m + 1]);
sse[CFL_PRED_V][m] = cfl_alpha_dist(
y_pix, MAX_SB_SIZE, y_averages_q3, src_v, src_stride_v, width, height,
tx_size, dc_pred_v, cfl_alpha_mags_q3[m], &sse[CFL_PRED_V][m + 1]);
}
int dist;
int64_t cost;
int64_t best_cost;
// Compute least squares parameter of the entire block
// IMPORTANT: We assume that the first code is 0,0
int ind = 0;
signs[CFL_PRED_U] = CFL_SIGN_POS;
signs[CFL_PRED_V] = CFL_SIGN_POS;
dist = sse[CFL_PRED_U][0] + sse[CFL_PRED_V][0];
dist *= 16;
best_cost = RDCOST(x->rdmult, cfl->costs[0], dist);
for (int c = 1; c < CFL_ALPHABET_SIZE; c++) {
const int idx_u = cfl_alpha_codes[c][CFL_PRED_U];
const int idx_v = cfl_alpha_codes[c][CFL_PRED_V];
for (CFL_SIGN_TYPE sign_u = idx_u == 0; sign_u < CFL_SIGNS; sign_u++) {
for (CFL_SIGN_TYPE sign_v = idx_v == 0; sign_v < CFL_SIGNS; sign_v++) {
dist = sse[CFL_PRED_U][idx_u + (sign_u == CFL_SIGN_NEG)] +
sse[CFL_PRED_V][idx_v + (sign_v == CFL_SIGN_NEG)];
dist *= 16;
cost = RDCOST(x->rdmult, cfl->costs[c], dist);
if (cost < best_cost) {
best_cost = cost;
ind = c;
signs[CFL_PRED_U] = sign_u;
signs[CFL_PRED_V] = sign_v;
}
}
}
}
mbmi->cfl_alpha_idx = ind;
return cfl->costs[ind];
}
#endif // CONFIG_CFL
static void init_sbuv_mode(MB_MODE_INFO *const mbmi) {
mbmi->uv_mode = DC_PRED;
#if CONFIG_PALETTE
......@@ -5384,6 +5521,13 @@ static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
continue;
mbmi->uv_mode = mode;
#if CONFIG_CFL
int cfl_alpha_rate = 0;
if (mode == DC_PRED) {
const TX_SIZE uv_tx_size = get_uv_tx_size(mbmi, &xd->plane[1]);
cfl_alpha_rate = cfl_rd_pick_alpha(x, uv_tx_size);
}
#endif
#if CONFIG_EXT_INTRA
mbmi->angle_delta[1] = 0;
if (is_directional_mode && av1_use_angle_delta(mbmi->sb_type)) {
......@@ -5408,7 +5552,7 @@ static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
#if CONFIG_CFL
if (mode == DC_PRED) {
this_rate += xd->cfl->costs[mbmi->cfl_alpha_idx];
this_rate += cfl_alpha_rate;
}
#endif
#if CONFIG_EXT_INTRA
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment