Commit 1f8d0950 authored by David Michael Barr's avatar David Michael Barr Committed by Luc Trudeau

[CFL] Switch to txfm_rd_in_plane in alpha search

This is more precise than the dist functions it replaces.

Results on Subset1 (compared with previous commit with CfL enabled)
  PSNR | PSNR Cb | PSNR Cr | PSNR HVS |   SSIM | MS SSIM | CIEDE 2000
0.0634 | -0.9188 | -0.9429 |   0.0609 | 0.0722 |  0.0593 |    -0.3226

Change-Id: I955a7d7eceea50482edb40b0d1041b300e3c9042
parent 29597c49
......@@ -633,6 +633,10 @@ typedef struct cfl_ctx {
// Whether the reconstructed luma pixels need to be stored
int store_y;
#if CONFIG_DEBUG
int rate;
#endif // CONFIG_DEBUG
int is_chroma_reference;
#if CONFIG_DEBUG
// The prediction used for sub8x8 blocks originates from multiple luma blocks,
......
......@@ -5401,160 +5401,57 @@ static int rd_pick_intra_angle_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
#endif // CONFIG_EXT_INTRA
#if CONFIG_CFL
static int64_t cfl_alpha_dist_lbd(const int16_t *pred_buf_q3,
const uint8_t *src, int src_stride, int width,
int height, int dc_pred, int alpha_q3,
int64_t *dist_neg_out) {
int64_t dist = 0;
int diff;
if (alpha_q3 == 0) {
for (int j = 0; j < height; j++) {
for (int i = 0; i < width; i++) {
diff = src[i] - dc_pred;
dist += diff * diff;
}
src += src_stride;
}
if (dist_neg_out) *dist_neg_out = dist;
return dist;
}
int64_t dist_neg = 0;
for (int j = 0; j < height; j++) {
for (int i = 0; i < width; i++) {
const int uv = src[i];
diff = uv -
clip_pixel(get_scaled_luma_q0(alpha_q3, pred_buf_q3[i]) + dc_pred);
dist += diff * diff;
diff = uv - clip_pixel(get_scaled_luma_q0(-alpha_q3, pred_buf_q3[i]) +
dc_pred);
dist_neg += diff * diff;
}
pred_buf_q3 += MAX_SB_SIZE;
src += src_stride;
}
if (dist_neg_out) *dist_neg_out = dist_neg;
return dist;
}
#if CONFIG_HIGHBITDEPTH
static int64_t cfl_alpha_dist_hbd(const int16_t *pred_buf_q3,
const uint16_t *src, int src_stride,
int width, int height, int dc_pred,
int alpha_q3, int bit_depth,
int64_t *dist_neg_out) {
const int shift = 2 * (bit_depth - 8);
const int rounding = shift > 0 ? (1 << shift) >> 1 : 0;
int64_t dist = 0;
int diff;
if (alpha_q3 == 0) {
for (int j = 0; j < height; j++) {
for (int i = 0; i < width; i++) {
diff = src[i] - dc_pred;
dist += diff * diff;
}
src += src_stride;
}
dist = (dist + rounding) >> shift;
if (dist_neg_out) *dist_neg_out = dist;
return dist;
}
int64_t dist_neg = 0;
for (int j = 0; j < height; j++) {
for (int i = 0; i < width; i++) {
const int uv = src[i];
diff = uv - clip_pixel_highbd(
get_scaled_luma_q0(alpha_q3, pred_buf_q3[i]) + dc_pred,
bit_depth);
dist += diff * diff;
diff = uv - clip_pixel_highbd(
get_scaled_luma_q0(-alpha_q3, pred_buf_q3[i]) + dc_pred,
bit_depth);
dist_neg += diff * diff;
}
pred_buf_q3 += MAX_SB_SIZE;
src += src_stride;
}
if (dist_neg_out) *dist_neg_out = (dist_neg + rounding) >> shift;
return (dist + rounding) >> shift;
}
#endif // CONFIG_HIGHBITDEPTH
static int64_t cfl_alpha_dist(const int16_t *pred_buf_q3, const uint8_t *src,
int src_stride, int width, int height,
int dc_pred, int alpha_q3, int use_hbd,
int bit_depth, int64_t *dist_neg_out) {
#if CONFIG_HIGHBITDEPTH
if (use_hbd) {
const uint16_t *src_16 = CONVERT_TO_SHORTPTR(src);
return cfl_alpha_dist_hbd(pred_buf_q3, src_16, src_stride, width, height,
dc_pred, alpha_q3, bit_depth, dist_neg_out);
}
#endif // CONFIG_HIGHBITDEPTH
(void)use_hbd;
(void)bit_depth;
return cfl_alpha_dist_lbd(pred_buf_q3, src, src_stride, width, height,
dc_pred, alpha_q3, dist_neg_out);
static void txfm_rd_in_plane_once(MACROBLOCK *const x,
const AV1_COMP *const cpi, BLOCK_SIZE bsize,
TX_SIZE tx_size, int plane, int64_t *dist,
int *rate) {
RD_STATS rd_stats;
av1_init_rd_stats(&rd_stats);
txfm_rd_in_plane(x, cpi, &rd_stats, INT64_MAX, plane, bsize, tx_size,
cpi->sf.use_fast_coef_costing);
*dist = rd_stats.dist;
*rate = rd_stats.rate;
}
static int cfl_rd_pick_alpha(MACROBLOCK *const x, TX_SIZE tx_size) {
const struct macroblock_plane *const p_u = &x->plane[AOM_PLANE_U];
const struct macroblock_plane *const p_v = &x->plane[AOM_PLANE_V];
const uint8_t *const src_u = p_u->src.buf;
const uint8_t *const src_v = p_v->src.buf;
const int src_stride_u = p_u->src.stride;
const int src_stride_v = p_v->src.stride;
static int cfl_rd_pick_alpha(MACROBLOCK *const x, const AV1_COMP *const cpi,
BLOCK_SIZE bsize, TX_SIZE tx_size) {
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
bsize = scale_chroma_bsize(bsize, xd->plane[AOM_PLANE_U].subsampling_x,
xd->plane[AOM_PLANE_U].subsampling_y);
CFL_CTX *const cfl = xd->cfl;
cfl_compute_parameters(xd, tx_size);
const int width = cfl->uv_width;
const int height = cfl->uv_height;
const int dc_pred_u = cfl->dc_pred[CFL_PRED_U];
const int dc_pred_v = cfl->dc_pred[CFL_PRED_V];
const int16_t *pred_buf_q3 = cfl->pred_buf_q3;
const int use_hbd = get_bitdepth_data_path_index(xd);
int64_t sse[CFL_PRED_PLANES][CFL_MAGS_SIZE];
sse[CFL_PRED_U][0] =
cfl_alpha_dist(pred_buf_q3, src_u, src_stride_u, width, height, dc_pred_u,
0, use_hbd, xd->bd, NULL);
sse[CFL_PRED_V][0] =
cfl_alpha_dist(pred_buf_q3, src_v, src_stride_v, width, height, dc_pred_v,
0, use_hbd, xd->bd, NULL);
int rates[CFL_PRED_PLANES][CFL_MAGS_SIZE];
int64_t dists[CFL_PRED_PLANES][CFL_MAGS_SIZE];
mbmi->cfl_alpha_idx = 0;
mbmi->cfl_alpha_signs = CFL_SIGN_ZERO * CFL_SIGNS + CFL_SIGN_POS - 1;
txfm_rd_in_plane_once(x, cpi, bsize, tx_size, AOM_PLANE_U,
&dists[CFL_PRED_U][0], &rates[CFL_PRED_U][0]);
mbmi->cfl_alpha_signs = CFL_SIGN_POS * CFL_SIGNS + CFL_SIGN_ZERO - 1;
txfm_rd_in_plane_once(x, cpi, bsize, tx_size, AOM_PLANE_V,
&dists[CFL_PRED_V][0], &rates[CFL_PRED_V][0]);
for (int c = 0; c < CFL_ALPHABET_SIZE; c++) {
const int m = c * 2 + 1;
const int abs_alpha_q3 = c + 1;
sse[CFL_PRED_U][m] = cfl_alpha_dist(
pred_buf_q3, src_u, src_stride_u, width, height, dc_pred_u,
abs_alpha_q3, use_hbd, xd->bd, &sse[CFL_PRED_U][m + 1]);
sse[CFL_PRED_V][m] = cfl_alpha_dist(
pred_buf_q3, src_v, src_stride_v, width, height, dc_pred_v,
abs_alpha_q3, use_hbd, xd->bd, &sse[CFL_PRED_V][m + 1]);
mbmi->cfl_alpha_idx = (c << CFL_ALPHABET_SIZE_LOG2) + c;
for (int sign = CFL_SIGN_NEG; sign < CFL_SIGNS; sign++) {
const int m = c * 2 + 1 + (sign == CFL_SIGN_NEG);
mbmi->cfl_alpha_signs = sign * CFL_SIGNS + sign - 1;
txfm_rd_in_plane_once(x, cpi, bsize, tx_size, AOM_PLANE_U,
&dists[CFL_PRED_U][m], &rates[CFL_PRED_U][m]);
txfm_rd_in_plane_once(x, cpi, bsize, tx_size, AOM_PLANE_V,
&dists[CFL_PRED_V][m], &rates[CFL_PRED_V][m]);
}
}
int64_t dist;
int64_t cost;
int64_t best_cost = INT64_MAX;
int best_rate_overhead = 0;
#if CONFIG_DEBUG
int best_rate = 0;
#endif // CONFIG_DEBUG
// Compute least squares parameter of the entire block
int ind = 0;
int signs = 0;
......@@ -5567,17 +5464,23 @@ static int cfl_rd_pick_alpha(MACROBLOCK *const x, TX_SIZE tx_size) {
const int idx_u = (sign_u == CFL_SIGN_ZERO) ? 0 : u * 2 + 1;
for (int v = 0; v < size_v; v++) {
const int idx_v = (sign_v == CFL_SIGN_ZERO) ? 0 : v * 2 + 1;
dist = sse[CFL_PRED_U][idx_u + (sign_u == CFL_SIGN_NEG)] +
sse[CFL_PRED_V][idx_v + (sign_v == CFL_SIGN_NEG)];
dist *= 16;
const int rate = x->cfl_cost[joint_sign][CFL_PRED_U][u] +
x->cfl_cost[joint_sign][CFL_PRED_V][v];
dist = dists[CFL_PRED_U][idx_u + (sign_u == CFL_SIGN_NEG)] +
dists[CFL_PRED_V][idx_v + (sign_v == CFL_SIGN_NEG)];
int rate_overhead = x->cfl_cost[joint_sign][CFL_PRED_U][u] +
x->cfl_cost[joint_sign][CFL_PRED_V][v];
int rate = x->intra_uv_mode_cost[mbmi->mode][UV_CFL_PRED] +
rate_overhead +
rates[CFL_PRED_U][idx_u + (sign_u == CFL_SIGN_NEG)] +
rates[CFL_PRED_V][idx_v + (sign_v == CFL_SIGN_NEG)];
cost = RDCOST(x->rdmult, rate, dist);
if (cost < best_cost) {
best_cost = cost;
best_rate = rate;
best_rate_overhead = rate_overhead;
ind = (u << CFL_ALPHABET_SIZE_LOG2) + v;
signs = joint_sign;
#if CONFIG_DEBUG
best_rate = rate;
#endif // CONFIG_DEBUG
}
}
}
......@@ -5585,7 +5488,10 @@ static int cfl_rd_pick_alpha(MACROBLOCK *const x, TX_SIZE tx_size) {
mbmi->cfl_alpha_idx = ind;
mbmi->cfl_alpha_signs = signs;
return best_rate;
#if CONFIG_DEBUG
xd->cfl->rate = best_rate;
#endif // CONFIG_DEBUG
return best_rate_overhead;
}
#endif // CONFIG_CFL
......@@ -5628,7 +5534,7 @@ static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
if (mode == UV_CFL_PRED) {
assert(!is_directional_mode);
const TX_SIZE uv_tx_size = av1_get_uv_tx_size(mbmi, &xd->plane[1]);
cfl_alpha_rate = cfl_rd_pick_alpha(x, uv_tx_size);
cfl_alpha_rate = cfl_rd_pick_alpha(x, cpi, bsize, uv_tx_size);
}
#endif
#if CONFIG_EXT_INTRA
......@@ -5653,6 +5559,9 @@ static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
#if CONFIG_CFL
if (mode == UV_CFL_PRED) {
this_rate += cfl_alpha_rate;
#if CONFIG_DEBUG
assert(xd->cfl->rate == this_rate);
#endif // CONFIG_DEBUG
}
#endif
#if CONFIG_EXT_INTRA
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment