Commit 475fc9df authored by Luc Trudeau's avatar Luc Trudeau Committed by David Michael Barr

[CFL] Fewer bits for fixed point

Since alpha is Q3, we reduce y_average from Q10 to Q3. As such, the
prediction is reduced from Q13 to Q6. Chroma dc_pred is reduced from Q7
to Q6 in order to match with the prediction.

Results on Subset1 (compared to 209de2e5b with CfL enabled)

  PSNR | PSNR Cb | PSNR Cr | PSNR HVS |   SSIM | MS SSIM | CIEDE 2000
0.0010 |  0.0176 | -0.0538 |  -0.0043 | 0.0027 | -0.0097 |    -0.0018

Change-Id: Ib7dd3968a764e0380ddc0ad2333ebacf1e9699cd
parent af7846ec
...@@ -179,8 +179,14 @@ static void cfl_dc_pred(MACROBLOCKD *xd, BLOCK_SIZE plane_bsize) { ...@@ -179,8 +179,14 @@ static void cfl_dc_pred(MACROBLOCKD *xd, BLOCK_SIZE plane_bsize) {
// TODO(ltrudeau) Because of max_block_wide and max_block_high, num_pel will // TODO(ltrudeau) Because of max_block_wide and max_block_high, num_pel will
// not be a power of two. So these divisions will have to use a lookup table. // not be a power of two. So these divisions will have to use a lookup table.
cfl->dc_pred_q7[CFL_PRED_U] = (sum_u << 7) / num_pel; cfl->dc_pred_q6[CFL_PRED_U] = ((sum_u << 6) + (num_pel >> 1)) / num_pel;
cfl->dc_pred_q7[CFL_PRED_V] = (sum_v << 7) / num_pel; cfl->dc_pred_q6[CFL_PRED_V] = ((sum_v << 6) + (num_pel >> 1)) / num_pel;
// Loss is never more than 1/2 (in Q6)
assert(fabs(cfl->dc_pred_q6[CFL_PRED_U] - (sum_u / ((double)num_pel) * 64)) <=
0.5);
assert(fabs(cfl->dc_pred_q6[CFL_PRED_V] - (sum_v / ((double)num_pel) * 64)) <=
0.5);
} }
static void cfl_compute_averages(CFL_CTX *cfl, TX_SIZE tx_size) { static void cfl_compute_averages(CFL_CTX *cfl, TX_SIZE tx_size) {
...@@ -197,7 +203,7 @@ static void cfl_compute_averages(CFL_CTX *cfl, TX_SIZE tx_size) { ...@@ -197,7 +203,7 @@ static void cfl_compute_averages(CFL_CTX *cfl, TX_SIZE tx_size) {
const uint8_t *y_pix = cfl->y_down_pix; const uint8_t *y_pix = cfl->y_down_pix;
// TODO(ltrudeau) Convert to uint16 for HBD support // TODO(ltrudeau) Convert to uint16 for HBD support
const uint8_t *t_y_pix; const uint8_t *t_y_pix;
int *averages_q10 = cfl->y_averages_q10; int *averages_q3 = cfl->y_averages_q3;
cfl_load(cfl, 0, 0, width, height); cfl_load(cfl, 0, 0, width, height);
...@@ -212,11 +218,12 @@ static void cfl_compute_averages(CFL_CTX *cfl, TX_SIZE tx_size) { ...@@ -212,11 +218,12 @@ static void cfl_compute_averages(CFL_CTX *cfl, TX_SIZE tx_size) {
} }
t_y_pix += MAX_SB_SIZE; t_y_pix += MAX_SB_SIZE;
} }
averages_q10[a++] = (sum << 10) >> num_pel_log2; averages_q3[a++] =
((sum << 3) + (1 << (num_pel_log2 - 1))) >> num_pel_log2;
// Assert no loss from fixed point // Loss is never more than 1/2 (in Q3)
assert((double)averages_q10[a - 1] == assert(fabs((double)averages_q3[a - 1] -
(sum / ((double)(1 << num_pel_log2))) * (1 << 10)); (sum / ((double)(1 << num_pel_log2))) * (1 << 3)) <= 0.5);
} }
assert(a % stride == 0); assert(a % stride == 0);
y_pix += block_row_stride; y_pix += block_row_stride;
...@@ -253,7 +260,7 @@ void cfl_predict_block(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride, ...@@ -253,7 +260,7 @@ void cfl_predict_block(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride,
// TODO(ltrudeau) Convert to uint16 to support HBD // TODO(ltrudeau) Convert to uint16 to support HBD
const uint8_t *y_pix = cfl->y_down_pix; const uint8_t *y_pix = cfl->y_down_pix;
const int dc_pred_bias_q13 = (cfl->dc_pred_q7[plane - 1] << 6) + (1 << 12); const int dc_pred_bias_q6 = cfl->dc_pred_q6[plane - 1] + 32;
const double alpha = cfl_idx_to_alpha( const double alpha = cfl_idx_to_alpha(
mbmi->cfl_alpha_idx, mbmi->cfl_alpha_signs[plane - 1], plane - 1); mbmi->cfl_alpha_idx, mbmi->cfl_alpha_signs[plane - 1], plane - 1);
// TODO(ltrudeau) Convert alpha to fixed point. // TODO(ltrudeau) Convert alpha to fixed point.
...@@ -263,23 +270,23 @@ void cfl_predict_block(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride, ...@@ -263,23 +270,23 @@ void cfl_predict_block(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride,
(row << tx_size_wide_log2[0]) >> tx_size_wide_log2[tx_size]; (row << tx_size_wide_log2[0]) >> tx_size_wide_log2[tx_size];
const int avg_col = const int avg_col =
(col << tx_size_high_log2[0]) >> tx_size_high_log2[tx_size]; (col << tx_size_high_log2[0]) >> tx_size_high_log2[tx_size];
const int avg_q10 = const int avg_q3 =
cfl->y_averages_q10[cfl->y_averages_stride * avg_row + avg_col]; cfl->y_averages_q3[cfl->y_averages_stride * avg_row + avg_col];
cfl_load(cfl, row, col, width, height); cfl_load(cfl, row, col, width, height);
for (int j = 0; j < height; j++) { for (int j = 0; j < height; j++) {
for (int i = 0; i < width; i++) { for (int i = 0; i < width; i++) {
const int pred_q13 = const int pred_q6 =
get_scaled_luma_q13(alpha_q3, y_pix[i], avg_q10) + dc_pred_bias_q13; get_scaled_luma_q6(alpha_q3, y_pix[i], avg_q3) + dc_pred_bias_q6;
// TODO(ltrudeau) Manage HBD. // TODO(ltrudeau) Manage HBD.
if (pred_q13 <= 0) { if (pred_q6 <= 0) {
dst[i] = 0; dst[i] = 0;
} else if (pred_q13 > (255 << 13)) { } else if (pred_q6 > (255 << 6)) {
dst[i] = 255; dst[i] = 255;
} else { } else {
dst[i] = (uint8_t)(pred_q13 >> 13); dst[i] = (uint8_t)(pred_q6 >> 6);
assert(dst[i] == (int)(alpha * (y_pix[i] - (avg_q10 / 1024.0)) + assert(dst[i] == (int)(alpha * (y_pix[i] - (avg_q3 / 8.0)) +
(cfl->dc_pred_q7[plane - 1] / 128.0) + 0.5)); (cfl->dc_pred_q6[plane - 1] / 64.0) + 0.5));
} }
} }
dst += dst_stride; dst += dst_stride;
......
...@@ -43,9 +43,12 @@ typedef struct { ...@@ -43,9 +43,12 @@ typedef struct {
// Transform level averages of the luma reconstructed values over the entire // Transform level averages of the luma reconstructed values over the entire
// prediction unit // prediction unit
// Fixed point y_averages is Q12.10: // Fixed point y_averages is Q12.3:
// * Worst case division is 1/1024 // * Worst case division is 1/1024
int y_averages_q10[MAX_NUM_TXB]; // * Max error will be 1/16th.
// Note: 3 is chosen so that y_averages fits in 15 bits when 12 bit input is
// used
int y_averages_q3[MAX_NUM_TXB];
int y_averages_stride; int y_averages_stride;
int are_parameters_computed; int are_parameters_computed;
...@@ -54,9 +57,11 @@ typedef struct { ...@@ -54,9 +57,11 @@ typedef struct {
int subsampling_x, subsampling_y; int subsampling_x, subsampling_y;
// Block level DC_PRED for each chromatic plane // Block level DC_PRED for each chromatic plane
// Fixed point dc_pred is Q12.7: // Fixed point dc_pred is Q12.6
// * Worst case division is 1/128 // * Worst case division is 1/128
int dc_pred_q7[CFL_PRED_PLANES]; // * Max error is 1/128th
// Note: 6 is chosen because alpha_q3 * y_average_q3 implies Q6
int dc_pred_q6[CFL_PRED_PLANES];
// The rate associated with each alpha codeword // The rate associated with each alpha codeword
int costs[CFL_ALPHABET_SIZE]; int costs[CFL_ALPHABET_SIZE];
...@@ -75,8 +80,8 @@ static const int cfl_alpha_codes[CFL_ALPHABET_SIZE][CFL_PRED_PLANES] = { ...@@ -75,8 +80,8 @@ static const int cfl_alpha_codes[CFL_ALPHABET_SIZE][CFL_PRED_PLANES] = {
{ 0, 3 }, { 5, 1 }, { 1, 5 }, { 0, 5 } { 0, 3 }, { 5, 1 }, { 1, 5 }, { 0, 5 }
}; };
static INLINE int get_scaled_luma_q13(int alpha_q3, int y_pix, int avg_q10) { static INLINE int get_scaled_luma_q6(int alpha_q3, int y_pix, int avg_q3) {
return alpha_q3 * ((y_pix << 10) - avg_q10); return alpha_q3 * ((y_pix << 3) - avg_q3);
} }
void cfl_init(CFL_CTX *cfl, AV1_COMMON *cm); void cfl_init(CFL_CTX *cfl, AV1_COMMON *cm);
......
...@@ -1429,15 +1429,15 @@ void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col, ...@@ -1429,15 +1429,15 @@ void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col,
#if CONFIG_CFL #if CONFIG_CFL
static int cfl_alpha_dist(const uint8_t *y_pix, int y_stride, static int cfl_alpha_dist(const uint8_t *y_pix, int y_stride,
const int y_averages_q10[MAX_NUM_TXB], const int y_averages_q3[MAX_NUM_TXB],
const uint8_t *src, int src_stride, int width, const uint8_t *src, int src_stride, int width,
int height, TX_SIZE tx_size, int dc_pred_q7, int height, TX_SIZE tx_size, int dc_pred_q6,
double alpha, int *dist_neg_out) { double alpha, int *dist_neg_out) {
int dist = 0; int dist = 0;
int diff; int diff;
if (alpha == 0.0) { if (alpha == 0.0) {
const int dc_pred_bias = (dc_pred_q7 + 64) >> 7; const int dc_pred_bias = (dc_pred_q6 + 32) >> 6;
for (int j = 0; j < height; j++) { for (int j = 0; j < height; j++) {
for (int i = 0; i < width; i++) { for (int i = 0; i < width; i++) {
diff = src[i] - dc_pred_bias; diff = src[i] - dc_pred_bias;
...@@ -1451,7 +1451,7 @@ static int cfl_alpha_dist(const uint8_t *y_pix, int y_stride, ...@@ -1451,7 +1451,7 @@ static int cfl_alpha_dist(const uint8_t *y_pix, int y_stride,
return dist; return dist;
} }
const int dc_pred_bias_q13 = (dc_pred_q7 << 6) + (1 << 12); const int dc_pred_bias_q6 = dc_pred_q6 + 32;
// TODO(ltrudeau) Convert alpha to fixed point // TODO(ltrudeau) Convert alpha to fixed point
const int alpha_q3 = (int)(alpha * 8); const int alpha_q3 = (int)(alpha * 8);
int dist_neg = 0; int dist_neg = 0;
...@@ -1466,26 +1466,24 @@ static int cfl_alpha_dist(const uint8_t *y_pix, int y_stride, ...@@ -1466,26 +1466,24 @@ static int cfl_alpha_dist(const uint8_t *y_pix, int y_stride,
const int h = b_j + tx_height; const int h = b_j + tx_height;
for (int b_i = 0; b_i < width; b_i += tx_width) { for (int b_i = 0; b_i < width; b_i += tx_width) {
const int w = b_i + tx_width; const int w = b_i + tx_width;
const int tx_avg_q10 = y_averages_q10[a++]; const int tx_avg_q3 = y_averages_q3[a++];
t_y_pix = y_pix; t_y_pix = y_pix;
t_src = src; t_src = src;
for (int t_j = b_j; t_j < h; t_j++) { for (int t_j = b_j; t_j < h; t_j++) {
for (int t_i = b_i; t_i < w; t_i++) { for (int t_i = b_i; t_i < w; t_i++) {
const int scaled_luma_q13 =
get_scaled_luma_q13(alpha_q3, t_y_pix[t_i], tx_avg_q10);
const int uv = t_src[t_i]; const int uv = t_src[t_i];
const int scaled_luma_q6 =
get_scaled_luma_q6(alpha_q3, t_y_pix[t_i], tx_avg_q3);
// TODO(ltrudeau) add support for HBD. // TODO(ltrudeau) add support for HBD.
diff = diff = uv -
uv - (clamp(scaled_luma_q6 + dc_pred_bias_q6, 0, (255 << 6)) >> 6);
(clamp(scaled_luma_q13 + dc_pred_bias_q13, 0, (255 << 13)) >> 13);
dist += diff * diff; dist += diff * diff;
// TODO(ltrudeau) add support for HBD. // TODO(ltrudeau) add support for HBD.
diff = uv - diff = uv -
(clamp(-scaled_luma_q13 + dc_pred_bias_q13, 0, (255 << 13)) >> (clamp(-scaled_luma_q6 + dc_pred_bias_q6, 0, (255 << 6)) >> 6);
13);
dist_neg += diff * diff; dist_neg += diff * diff;
} }
t_y_pix += y_stride; t_y_pix += y_stride;
...@@ -1535,9 +1533,9 @@ static void cfl_compute_alpha_ind(MACROBLOCK *const x, FRAME_CONTEXT *ec_ctx, ...@@ -1535,9 +1533,9 @@ static void cfl_compute_alpha_ind(MACROBLOCK *const x, FRAME_CONTEXT *ec_ctx,
cfl_compute_parameters(xd, tx_size); cfl_compute_parameters(xd, tx_size);
const int width = cfl->uv_width; const int width = cfl->uv_width;
const int height = cfl->uv_height; const int height = cfl->uv_height;
const int dc_pred_u_q7 = cfl->dc_pred_q7[CFL_PRED_U]; const int dc_pred_u_q6 = cfl->dc_pred_q6[CFL_PRED_U];
const int dc_pred_v_q7 = cfl->dc_pred_q7[CFL_PRED_V]; const int dc_pred_v_q6 = cfl->dc_pred_q6[CFL_PRED_V];
const int *y_averages_q10 = cfl->y_averages_q10; const int *y_averages_q3 = cfl->y_averages_q3;
const uint8_t *y_pix = cfl->y_down_pix; const uint8_t *y_pix = cfl->y_down_pix;
CFL_SIGN_TYPE *signs = mbmi->cfl_alpha_signs; CFL_SIGN_TYPE *signs = mbmi->cfl_alpha_signs;
...@@ -1546,20 +1544,20 @@ static void cfl_compute_alpha_ind(MACROBLOCK *const x, FRAME_CONTEXT *ec_ctx, ...@@ -1546,20 +1544,20 @@ static void cfl_compute_alpha_ind(MACROBLOCK *const x, FRAME_CONTEXT *ec_ctx,
int sse[CFL_PRED_PLANES][CFL_MAGS_SIZE]; int sse[CFL_PRED_PLANES][CFL_MAGS_SIZE];
sse[CFL_PRED_U][0] = sse[CFL_PRED_U][0] =
cfl_alpha_dist(y_pix, MAX_SB_SIZE, y_averages_q10, src_u, src_stride_u, cfl_alpha_dist(y_pix, MAX_SB_SIZE, y_averages_q3, src_u, src_stride_u,
width, height, tx_size, dc_pred_u_q7, 0, NULL); width, height, tx_size, dc_pred_u_q6, 0, NULL);
sse[CFL_PRED_V][0] = sse[CFL_PRED_V][0] =
cfl_alpha_dist(y_pix, MAX_SB_SIZE, y_averages_q10, src_v, src_stride_v, cfl_alpha_dist(y_pix, MAX_SB_SIZE, y_averages_q3, src_v, src_stride_v,
width, height, tx_size, dc_pred_v_q7, 0, NULL); width, height, tx_size, dc_pred_v_q6, 0, NULL);
for (int m = 1; m < CFL_MAGS_SIZE; m += 2) { for (int m = 1; m < CFL_MAGS_SIZE; m += 2) {
assert(cfl_alpha_mags[m + 1] == -cfl_alpha_mags[m]); assert(cfl_alpha_mags[m + 1] == -cfl_alpha_mags[m]);
sse[CFL_PRED_U][m] = cfl_alpha_dist( sse[CFL_PRED_U][m] = cfl_alpha_dist(
y_pix, MAX_SB_SIZE, y_averages_q10, src_u, src_stride_u, width, height, y_pix, MAX_SB_SIZE, y_averages_q3, src_u, src_stride_u, width, height,
tx_size, dc_pred_u_q7, cfl_alpha_mags[m], &sse[CFL_PRED_U][m + 1]); tx_size, dc_pred_u_q6, cfl_alpha_mags[m], &sse[CFL_PRED_U][m + 1]);
sse[CFL_PRED_V][m] = cfl_alpha_dist( sse[CFL_PRED_V][m] = cfl_alpha_dist(
y_pix, MAX_SB_SIZE, y_averages_q10, src_v, src_stride_v, width, height, y_pix, MAX_SB_SIZE, y_averages_q3, src_v, src_stride_v, width, height,
tx_size, dc_pred_v_q7, cfl_alpha_mags[m], &sse[CFL_PRED_V][m + 1]); tx_size, dc_pred_v_q6, cfl_alpha_mags[m], &sse[CFL_PRED_V][m + 1]);
} }
int dist; int dist;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment