Commit 7651b739 authored by Luc Trudeau's avatar Luc Trudeau

[CFL] Q0 DC_Pred

The block level DC_PRED computed by CfL goes down from Q6 to Q0. This
will allow to reuse existing assembly for DC_PRED and also reduce the
requirements on the multilpy required to scale the reconstructed luma
values

Results on Subset1 (compared to f9684d222 with CfL enabled)

   PSNR | PSNR Cb | PSNR Cr | PSNR HVS |    SSIM | MS SSIM | CIEDE 2000
-0.0347 |  0.0229 | -0.1326 |  -0.0420 | -0.0057 | -0.0072 |    -0.0644

Change-Id: I6ba82cc9e04fa4ab7c8ec40a7856deb273881748
parent e0f3a756
......@@ -179,14 +179,8 @@ static void cfl_dc_pred(MACROBLOCKD *xd, BLOCK_SIZE plane_bsize) {
// TODO(ltrudeau) Because of max_block_wide and max_block_high, num_pel will
// not be a power of two. So these divisions will have to use a lookup table.
cfl->dc_pred_q6[CFL_PRED_U] = ((sum_u << 6) + (num_pel >> 1)) / num_pel;
cfl->dc_pred_q6[CFL_PRED_V] = ((sum_v << 6) + (num_pel >> 1)) / num_pel;
// Loss is never more than 1/2 (in Q6)
assert(fabs(cfl->dc_pred_q6[CFL_PRED_U] - (sum_u / ((double)num_pel) * 64)) <=
0.5);
assert(fabs(cfl->dc_pred_q6[CFL_PRED_V] - (sum_v / ((double)num_pel) * 64)) <=
0.5);
cfl->dc_pred[CFL_PRED_U] = (sum_u + (num_pel >> 1)) / num_pel;
cfl->dc_pred[CFL_PRED_V] = (sum_v + (num_pel >> 1)) / num_pel;
}
static void cfl_compute_averages(CFL_CTX *cfl, TX_SIZE tx_size) {
......@@ -260,7 +254,7 @@ void cfl_predict_block(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride,
// TODO(ltrudeau) Convert to uint16 to support HBD
const uint8_t *y_pix = cfl->y_down_pix;
const int dc_pred_bias_q6 = cfl->dc_pred_q6[plane - 1] + 32;
const int dc_pred = cfl->dc_pred[plane - 1];
const double alpha = cfl_idx_to_alpha(
mbmi->cfl_alpha_idx, mbmi->cfl_alpha_signs[plane - 1], plane - 1);
// TODO(ltrudeau) Convert alpha to fixed point.
......@@ -276,18 +270,9 @@ void cfl_predict_block(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride,
cfl_load(cfl, row, col, width, height);
for (int j = 0; j < height; j++) {
for (int i = 0; i < width; i++) {
const int pred_q6 =
get_scaled_luma_q6(alpha_q3, y_pix[i], avg_q3) + dc_pred_bias_q6;
// TODO(ltrudeau) Manage HBD.
if (pred_q6 <= 0) {
dst[i] = 0;
} else if (pred_q6 > (255 << 6)) {
dst[i] = 255;
} else {
dst[i] = (uint8_t)(pred_q6 >> 6);
assert(dst[i] == (int)(alpha * (y_pix[i] - (avg_q3 / 8.0)) +
(cfl->dc_pred_q6[plane - 1] / 64.0) + 0.5));
}
// TODO(ltrudeau) add support for HBD.
dst[i] =
clip_pixel(get_scaled_luma_q0(alpha_q3, y_pix[i], avg_q3) + dc_pred);
}
dst += dst_stride;
y_pix += MAX_SB_SIZE;
......
......@@ -57,11 +57,7 @@ typedef struct {
int subsampling_x, subsampling_y;
// Block level DC_PRED for each chromatic plane
// Fixed point dc_pred is Q12.6
// * Worst case division is 1/128
// * Max error is 1/128th
// Note: 6 is chosen because alpha_q3 * y_average_q3 implies Q6
int dc_pred_q6[CFL_PRED_PLANES];
int dc_pred[CFL_PRED_PLANES];
// The rate associated with each alpha codeword
int costs[CFL_ALPHABET_SIZE];
......@@ -80,8 +76,8 @@ static const int cfl_alpha_codes[CFL_ALPHABET_SIZE][CFL_PRED_PLANES] = {
{ 0, 3 }, { 5, 1 }, { 1, 5 }, { 0, 5 }
};
static INLINE int get_scaled_luma_q6(int alpha_q3, int y_pix, int avg_q3) {
return alpha_q3 * ((y_pix << 3) - avg_q3);
static INLINE int get_scaled_luma_q0(int alpha_q3, int y_pix, int avg_q3) {
return (alpha_q3 * ((y_pix << 3) - avg_q3) + 32) >> 6;
}
void cfl_init(CFL_CTX *cfl, AV1_COMMON *cm);
......
......@@ -1415,16 +1415,15 @@ void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col,
static int cfl_alpha_dist(const uint8_t *y_pix, int y_stride,
const int y_averages_q3[MAX_NUM_TXB],
const uint8_t *src, int src_stride, int width,
int height, TX_SIZE tx_size, int dc_pred_q6,
int height, TX_SIZE tx_size, int dc_pred,
double alpha, int *dist_neg_out) {
int dist = 0;
int diff;
if (alpha == 0.0) {
const int dc_pred_bias = (dc_pred_q6 + 32) >> 6;
for (int j = 0; j < height; j++) {
for (int i = 0; i < width; i++) {
diff = src[i] - dc_pred_bias;
diff = src[i] - dc_pred;
dist += diff * diff;
}
src += src_stride;
......@@ -1435,7 +1434,6 @@ static int cfl_alpha_dist(const uint8_t *y_pix, int y_stride,
return dist;
}
const int dc_pred_bias_q6 = dc_pred_q6 + 32;
// TODO(ltrudeau) Convert alpha to fixed point
const int alpha_q3 = (int)(alpha * 8);
int dist_neg = 0;
......@@ -1457,17 +1455,15 @@ static int cfl_alpha_dist(const uint8_t *y_pix, int y_stride,
for (int t_i = b_i; t_i < w; t_i++) {
const int uv = t_src[t_i];
const int scaled_luma_q6 =
get_scaled_luma_q6(alpha_q3, t_y_pix[t_i], tx_avg_q3);
const int scaled_luma =
get_scaled_luma_q0(alpha_q3, t_y_pix[t_i], tx_avg_q3);
// TODO(ltrudeau) add support for HBD.
diff = uv -
(clamp(scaled_luma_q6 + dc_pred_bias_q6, 0, (255 << 6)) >> 6);
diff = uv - clamp(scaled_luma + dc_pred, 0, 255);
dist += diff * diff;
// TODO(ltrudeau) add support for HBD.
diff = uv -
(clamp(-scaled_luma_q6 + dc_pred_bias_q6, 0, (255 << 6)) >> 6);
diff = uv - clamp(-scaled_luma + dc_pred, 0, 255);
dist_neg += diff * diff;
}
t_y_pix += y_stride;
......@@ -1517,8 +1513,8 @@ static void cfl_compute_alpha_ind(MACROBLOCK *const x, FRAME_CONTEXT *ec_ctx,
cfl_compute_parameters(xd, tx_size);
const int width = cfl->uv_width;
const int height = cfl->uv_height;
const int dc_pred_u_q6 = cfl->dc_pred_q6[CFL_PRED_U];
const int dc_pred_v_q6 = cfl->dc_pred_q6[CFL_PRED_V];
const int dc_pred_u = cfl->dc_pred[CFL_PRED_U];
const int dc_pred_v = cfl->dc_pred[CFL_PRED_V];
const int *y_averages_q3 = cfl->y_averages_q3;
const uint8_t *y_pix = cfl->y_down_pix;
......@@ -1529,19 +1525,19 @@ static void cfl_compute_alpha_ind(MACROBLOCK *const x, FRAME_CONTEXT *ec_ctx,
int sse[CFL_PRED_PLANES][CFL_MAGS_SIZE];
sse[CFL_PRED_U][0] =
cfl_alpha_dist(y_pix, MAX_SB_SIZE, y_averages_q3, src_u, src_stride_u,
width, height, tx_size, dc_pred_u_q6, 0, NULL);
width, height, tx_size, dc_pred_u, 0, NULL);
sse[CFL_PRED_V][0] =
cfl_alpha_dist(y_pix, MAX_SB_SIZE, y_averages_q3, src_v, src_stride_v,
width, height, tx_size, dc_pred_v_q6, 0, NULL);
width, height, tx_size, dc_pred_v, 0, NULL);
for (int m = 1; m < CFL_MAGS_SIZE; m += 2) {
assert(cfl_alpha_mags[m + 1] == -cfl_alpha_mags[m]);
sse[CFL_PRED_U][m] = cfl_alpha_dist(
y_pix, MAX_SB_SIZE, y_averages_q3, src_u, src_stride_u, width, height,
tx_size, dc_pred_u_q6, cfl_alpha_mags[m], &sse[CFL_PRED_U][m + 1]);
tx_size, dc_pred_u, cfl_alpha_mags[m], &sse[CFL_PRED_U][m + 1]);
sse[CFL_PRED_V][m] = cfl_alpha_dist(
y_pix, MAX_SB_SIZE, y_averages_q3, src_v, src_stride_v, width, height,
tx_size, dc_pred_v_q6, cfl_alpha_mags[m], &sse[CFL_PRED_V][m + 1]);
tx_size, dc_pred_v, cfl_alpha_mags[m], &sse[CFL_PRED_V][m + 1]);
}
int dist;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment