diff --git a/av1/common/cfl.c b/av1/common/cfl.c index 5f5e397179d242b3e7fdb91760c3365601b22187..b9e356be6ee626ad461192e4c3b5e8dc2d411f7a 100644 --- a/av1/common/cfl.c +++ b/av1/common/cfl.c @@ -133,7 +133,7 @@ static void cfl_dc_pred(MACROBLOCKD *xd, BLOCK_SIZE plane_bsize) { const int height = max_block_high(xd, plane_bsize, AOM_PLANE_U) << tx_size_high_log2[0]; // Number of pixel on the top and left borders. - const double num_pel = width + height; + const int num_pel = width + height; int sum_u = 0; int sum_v = 0; @@ -179,8 +179,8 @@ static void cfl_dc_pred(MACROBLOCKD *xd, BLOCK_SIZE plane_bsize) { // TODO(ltrudeau) Because of max_block_wide and max_block_high, num_pel will // not be a power of two. So these divisions will have to use a lookup table. - cfl->dc_pred[CFL_PRED_U] = sum_u / num_pel; - cfl->dc_pred[CFL_PRED_V] = sum_v / num_pel; + cfl->dc_pred_q7[CFL_PRED_U] = (sum_u << 7) / num_pel; + cfl->dc_pred_q7[CFL_PRED_V] = (sum_v << 7) / num_pel; } static void cfl_compute_averages(CFL_CTX *cfl, TX_SIZE tx_size) { @@ -253,22 +253,34 @@ void cfl_predict_block(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride, // TODO(ltrudeau) Convert to uint16 to support HBD const uint8_t *y_pix = cfl->y_down_pix; - const double dc_pred = cfl->dc_pred[plane - 1]; + const int dc_pred_bias_q13 = (cfl->dc_pred_q7[plane - 1] << 6) + (1 << 12); const double alpha = cfl_idx_to_alpha( mbmi->cfl_alpha_idx, mbmi->cfl_alpha_signs[plane - 1], plane - 1); + // TODO(ltrudeau) Convert alpha to fixed point. + const int alpha_q3 = (int)(alpha * 8); const int avg_row = (row << tx_size_wide_log2[0]) >> tx_size_wide_log2[tx_size]; const int avg_col = (col << tx_size_high_log2[0]) >> tx_size_high_log2[tx_size]; - const double avg = - cfl->y_averages_q10[cfl->y_averages_stride * avg_row + avg_col] / 1024.0; + const int avg_q10 = + cfl->y_averages_q10[cfl->y_averages_stride * avg_row + avg_col]; cfl_load(cfl, row, col, width, height); for (int j = 0; j < height; j++) { for (int i = 0; i < width; i++) { - // TODO(ltrudeau) call clip_pixel_highbd when HBD is enabled. - dst[i] = clip_pixel((int)(alpha * (y_pix[i] - avg) + dc_pred + 0.5)); + const int pred_q13 = + get_scaled_luma_q13(alpha_q3, y_pix[i], avg_q10) + dc_pred_bias_q13; + // TODO(ltrudeau) Manage HBD. + if (pred_q13 <= 0) { + dst[i] = 0; + } else if (pred_q13 > (255 << 13)) { + dst[i] = 255; + } else { + dst[i] = (uint8_t)(pred_q13 >> 13); + assert(dst[i] == (int)(alpha * (y_pix[i] - (avg_q10 / 1024.0)) + + (cfl->dc_pred_q7[plane - 1] / 128.0) + 0.5)); + } } dst += dst_stride; y_pix += MAX_SB_SIZE; diff --git a/av1/common/cfl.h b/av1/common/cfl.h index 66e5359126fac2fe46ecc519ae5e4d7ce298dd8d..7470eb8ad1c6dcc4bb0ff21f62dbea28519f66af 100644 --- a/av1/common/cfl.h +++ b/av1/common/cfl.h @@ -53,8 +53,10 @@ typedef struct { // Chroma subsampling int subsampling_x, subsampling_y; - // CfL Performs its own block level DC_PRED for each chromatic plane - double dc_pred[CFL_PRED_PLANES]; + // Block level DC_PRED for each chromatic plane + // Fixed point dc_pred is Q12.7: + // * Worst case division is 1/128 + int dc_pred_q7[CFL_PRED_PLANES]; // The rate associated with each alpha codeword int costs[CFL_ALPHABET_SIZE]; @@ -73,6 +75,10 @@ static const int cfl_alpha_codes[CFL_ALPHABET_SIZE][CFL_PRED_PLANES] = { { 0, 3 }, { 5, 1 }, { 1, 5 }, { 0, 5 } }; +static INLINE int get_scaled_luma_q13(int alpha_q3, int y_pix, int avg_q10) { + return alpha_q3 * ((y_pix << 10) - avg_q10); +} + void cfl_init(CFL_CTX *cfl, AV1_COMMON *cm); void cfl_predict_block(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride, diff --git a/av1/encoder/encodemb.c b/av1/encoder/encodemb.c index 27ab853cc08a1aab636101ad14bf1cb2ddc8edea..28461416f5ccb2e9d4fb0734e5502cdea717b98f 100644 --- a/av1/encoder/encodemb.c +++ b/av1/encoder/encodemb.c @@ -1431,17 +1431,16 @@ void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col, static int cfl_alpha_dist(const uint8_t *y_pix, int y_stride, const int y_averages_q10[MAX_NUM_TXB], const uint8_t *src, int src_stride, int width, - int height, TX_SIZE tx_size, double dc_pred, + int height, TX_SIZE tx_size, int dc_pred_q7, double alpha, int *dist_neg_out) { - const double dc_pred_bias = dc_pred + 0.5; int dist = 0; int diff; if (alpha == 0.0) { - const int dc_pred_i = (int)dc_pred_bias; + const int dc_pred_bias = (dc_pred_q7 + 64) >> 7; for (int j = 0; j < height; j++) { for (int i = 0; i < width; i++) { - diff = src[i] - dc_pred_i; + diff = src[i] - dc_pred_bias; dist += diff * diff; } src += src_stride; @@ -1452,6 +1451,9 @@ static int cfl_alpha_dist(const uint8_t *y_pix, int y_stride, return dist; } + const int dc_pred_bias_q13 = (dc_pred_q7 << 6) + (1 << 12); + // TODO(ltrudeau) Convert alpha to fixed point + const int alpha_q3 = (int)(alpha * 8); int dist_neg = 0; const int tx_height = tx_size_high[tx_size]; const int tx_width = tx_size_wide[tx_size]; @@ -1464,21 +1466,26 @@ static int cfl_alpha_dist(const uint8_t *y_pix, int y_stride, const int h = b_j + tx_height; for (int b_i = 0; b_i < width; b_i += tx_width) { const int w = b_i + tx_width; - // TODO(ltrudeau) Remove div when DC_PRED is also fixed point - const double tx_avg_q10 = y_averages_q10[a++] / 1024.0; + const int tx_avg_q10 = y_averages_q10[a++]; t_y_pix = y_pix; t_src = src; for (int t_j = b_j; t_j < h; t_j++) { for (int t_i = b_i; t_i < w; t_i++) { - const double scaled_luma = alpha * (t_y_pix[t_i] - tx_avg_q10); + const int scaled_luma_q13 = + get_scaled_luma_q13(alpha_q3, t_y_pix[t_i], tx_avg_q10); + const int uv = t_src[t_i]; // TODO(ltrudeau) add support for HBD. - diff = uv - clip_pixel((int)(scaled_luma + dc_pred_bias)); + diff = + uv - + (clamp(scaled_luma_q13 + dc_pred_bias_q13, 0, (255 << 13)) >> 13); dist += diff * diff; // TODO(ltrudeau) add support for HBD. - diff = uv - clip_pixel((int)(-scaled_luma + dc_pred_bias)); + diff = uv - + (clamp(-scaled_luma_q13 + dc_pred_bias_q13, 0, (255 << 13)) >> + 13); dist_neg += diff * diff; } t_y_pix += y_stride; @@ -1528,9 +1535,9 @@ static void cfl_compute_alpha_ind(MACROBLOCK *const x, FRAME_CONTEXT *ec_ctx, cfl_compute_parameters(xd, tx_size); const int width = cfl->uv_width; const int height = cfl->uv_height; - const double dc_pred_u = cfl->dc_pred[CFL_PRED_U]; - const double dc_pred_v = cfl->dc_pred[CFL_PRED_V]; - const int *y_averages = cfl->y_averages_q10; + const int dc_pred_u_q7 = cfl->dc_pred_q7[CFL_PRED_U]; + const int dc_pred_v_q7 = cfl->dc_pred_q7[CFL_PRED_V]; + const int *y_averages_q10 = cfl->y_averages_q10; const uint8_t *y_pix = cfl->y_down_pix; CFL_SIGN_TYPE *signs = mbmi->cfl_alpha_signs; @@ -1539,20 +1546,20 @@ static void cfl_compute_alpha_ind(MACROBLOCK *const x, FRAME_CONTEXT *ec_ctx, int sse[CFL_PRED_PLANES][CFL_MAGS_SIZE]; sse[CFL_PRED_U][0] = - cfl_alpha_dist(y_pix, MAX_SB_SIZE, y_averages, src_u, src_stride_u, width, - height, tx_size, dc_pred_u, 0, NULL); + cfl_alpha_dist(y_pix, MAX_SB_SIZE, y_averages_q10, src_u, src_stride_u, + width, height, tx_size, dc_pred_u_q7, 0, NULL); sse[CFL_PRED_V][0] = - cfl_alpha_dist(y_pix, MAX_SB_SIZE, y_averages, src_v, src_stride_v, width, - height, tx_size, dc_pred_v, 0, NULL); + cfl_alpha_dist(y_pix, MAX_SB_SIZE, y_averages_q10, src_v, src_stride_v, + width, height, tx_size, dc_pred_v_q7, 0, NULL); for (int m = 1; m < CFL_MAGS_SIZE; m += 2) { assert(cfl_alpha_mags[m + 1] == -cfl_alpha_mags[m]); sse[CFL_PRED_U][m] = cfl_alpha_dist( - y_pix, MAX_SB_SIZE, y_averages, src_u, src_stride_u, width, height, - tx_size, dc_pred_u, cfl_alpha_mags[m], &sse[CFL_PRED_U][m + 1]); + y_pix, MAX_SB_SIZE, y_averages_q10, src_u, src_stride_u, width, height, + tx_size, dc_pred_u_q7, cfl_alpha_mags[m], &sse[CFL_PRED_U][m + 1]); sse[CFL_PRED_V][m] = cfl_alpha_dist( - y_pix, MAX_SB_SIZE, y_averages, src_v, src_stride_v, width, height, - tx_size, dc_pred_v, cfl_alpha_mags[m], &sse[CFL_PRED_V][m + 1]); + y_pix, MAX_SB_SIZE, y_averages_q10, src_v, src_stride_v, width, height, + tx_size, dc_pred_v_q7, cfl_alpha_mags[m], &sse[CFL_PRED_V][m + 1]); } int dist;