Commit 3dc55e0f authored by Luc Trudeau's avatar Luc Trudeau

[CFL] Better encapsulation

The function cfl_compute_parameters is added and contains the logic
related to building the CfL context parameters. As such, many cfl
functions can now be encapsulated inside of cfl.c and not exposed to the
rest of AV1.

This also allows for supplemental asserts that validate that the CfL
context is properly built.

Results on Subset1 (compared to 9c6f8547 with CfL)

  PSNR | PSNR Cb | PSNR Cr | PSNR HVS |   SSIM | MS SSIM | CIEDE 2000
0.0000 |  0.0000 |  0.0000 |   0.0000 | 0.0000 |  0.0000 |     0.0000

Change-Id: I6d14a426416b3af5491bdc145db7281b5e988cae
parent 24d565b4
...@@ -24,11 +24,97 @@ void cfl_init(CFL_CTX *cfl, AV1_COMMON *cm) { ...@@ -24,11 +24,97 @@ void cfl_init(CFL_CTX *cfl, AV1_COMMON *cm) {
memset(&cfl->y_pix, 0, sizeof(uint8_t) * MAX_SB_SQUARE); memset(&cfl->y_pix, 0, sizeof(uint8_t) * MAX_SB_SQUARE);
cfl->subsampling_x = cm->subsampling_x; cfl->subsampling_x = cm->subsampling_x;
cfl->subsampling_y = cm->subsampling_y; cfl->subsampling_y = cm->subsampling_y;
cfl->are_parameters_computed = 0;
}
// Load from the CfL pixel buffer into output
static void cfl_load(CFL_CTX *cfl, int row, int col, int width, int height) {
const int sub_x = cfl->subsampling_x;
const int sub_y = cfl->subsampling_y;
const int off_log2 = tx_size_wide_log2[0];
// TODO(ltrudeau) convert to uint16 to add HBD support
const uint8_t *y_pix;
// TODO(ltrudeau) convert to uint16 to add HBD support
uint8_t *output = cfl->y_down_pix;
int pred_row_offset = 0;
int output_row_offset = 0;
// TODO(ltrudeau) should be faster to downsample when we store the values
// TODO(ltrudeau) add support for 4:2:2
if (sub_y == 0 && sub_x == 0) {
y_pix = &cfl->y_pix[(row * MAX_SB_SIZE + col) << off_log2];
for (int j = 0; j < height; j++) {
for (int i = 0; i < width; i++) {
// In 4:4:4, pixels match 1 to 1
output[output_row_offset + i] = y_pix[pred_row_offset + i];
}
pred_row_offset += MAX_SB_SIZE;
output_row_offset += MAX_SB_SIZE;
}
} else if (sub_y == 1 && sub_x == 1) {
y_pix = &cfl->y_pix[(row * MAX_SB_SIZE + col) << (off_log2 + sub_y)];
for (int j = 0; j < height; j++) {
for (int i = 0; i < width; i++) {
int top_left = (pred_row_offset + i) << sub_y;
int bot_left = top_left + MAX_SB_SIZE;
// In 4:2:0, average pixels in 2x2 grid
output[output_row_offset + i] = OD_SHR_ROUND(
y_pix[top_left] + y_pix[top_left + 1] // Top row
+ y_pix[bot_left] + y_pix[bot_left + 1] // Bottom row
,
2);
}
pred_row_offset += MAX_SB_SIZE;
output_row_offset += MAX_SB_SIZE;
}
} else {
assert(0); // Unsupported chroma subsampling
}
// Due to frame boundary issues, it is possible that the total area of
// covered by Chroma exceeds that of Luma. When this happens, we write over
// the broken data by repeating the last columns and/or rows.
//
// Note that in order to manage the case where both rows and columns
// overrun,
// we apply rows first. This way, when the rows overrun the bottom of the
// frame, the columns will be copied over them.
const int uv_width = (col << off_log2) + width;
const int uv_height = (row << off_log2) + height;
const int diff_width = uv_width - (cfl->y_width >> sub_x);
const int diff_height = uv_height - (cfl->y_height >> sub_y);
if (diff_width > 0) {
int last_pixel;
output_row_offset = width - diff_width;
for (int j = 0; j < height; j++) {
last_pixel = output_row_offset - 1;
for (int i = 0; i < diff_width; i++) {
output[output_row_offset + i] = output[last_pixel];
}
output_row_offset += MAX_SB_SIZE;
}
}
if (diff_height > 0) {
output_row_offset = (height - diff_height) * MAX_SB_SIZE;
const int last_row_offset = output_row_offset - MAX_SB_SIZE;
for (int j = 0; j < diff_height; j++) {
for (int i = 0; i < width; i++) {
output[output_row_offset + i] = output[last_row_offset + i];
}
output_row_offset += MAX_SB_SIZE;
}
}
} }
// CfL computes its own block-level DC_PRED. This is required to compute both // CfL computes its own block-level DC_PRED. This is required to compute both
// alpha_cb and alpha_cr before the prediction are computed. // alpha_cb and alpha_cr before the prediction are computed.
void cfl_dc_pred(MACROBLOCKD *xd, int width, int height) { static void cfl_dc_pred(MACROBLOCKD *xd) {
const struct macroblockd_plane *const pd_u = &xd->plane[AOM_PLANE_U]; const struct macroblockd_plane *const pd_u = &xd->plane[AOM_PLANE_U];
const struct macroblockd_plane *const pd_v = &xd->plane[AOM_PLANE_V]; const struct macroblockd_plane *const pd_v = &xd->plane[AOM_PLANE_V];
...@@ -38,6 +124,9 @@ void cfl_dc_pred(MACROBLOCKD *xd, int width, int height) { ...@@ -38,6 +124,9 @@ void cfl_dc_pred(MACROBLOCKD *xd, int width, int height) {
const int dst_u_stride = pd_u->dst.stride; const int dst_u_stride = pd_u->dst.stride;
const int dst_v_stride = pd_v->dst.stride; const int dst_v_stride = pd_v->dst.stride;
CFL_CTX *const cfl = xd->cfl;
const int width = cfl->uv_width;
const int height = cfl->uv_height;
// Number of pixel on the top and left borders. // Number of pixel on the top and left borders.
const double num_pel = width + height; const double num_pel = width + height;
...@@ -83,37 +172,70 @@ void cfl_dc_pred(MACROBLOCKD *xd, int width, int height) { ...@@ -83,37 +172,70 @@ void cfl_dc_pred(MACROBLOCKD *xd, int width, int height) {
sum_v += height * 129; sum_v += height * 129;
} }
xd->cfl->dc_pred[CFL_PRED_U] = sum_u / num_pel; cfl->dc_pred[CFL_PRED_U] = sum_u / num_pel;
xd->cfl->dc_pred[CFL_PRED_V] = sum_v / num_pel; cfl->dc_pred[CFL_PRED_V] = sum_v / num_pel;
} }
double cfl_compute_average(uint8_t *y_pix, int y_stride, int width, static void cfl_compute_average(CFL_CTX *cfl) {
int height) { const int width = cfl->uv_width;
const int height = cfl->uv_height;
const double num_pel = width * height;
// TODO(ltrudeau) Convert to uint16 for HBD support
const uint8_t *y_pix = cfl->y_down_pix;
// TODO(ltrudeau) Convert to uint16 for HBD support
cfl_load(cfl, 0, 0, width, height);
int sum = 0; int sum = 0;
for (int j = 0; j < height; j++) { for (int j = 0; j < height; j++) {
for (int i = 0; i < width; i++) { for (int i = 0; i < width; i++) {
sum += y_pix[i]; sum += y_pix[i];
} }
y_pix += y_stride; y_pix += MAX_SB_SIZE;
}
cfl->y_average = sum / num_pel;
}
static INLINE double cfl_idx_to_alpha(int alpha_idx, CFL_SIGN_TYPE alpha_sign,
CFL_PRED_TYPE pred_type) {
const int mag_idx = cfl_alpha_codes[alpha_idx][pred_type];
const double abs_alpha = cfl_alpha_mags[mag_idx];
if (alpha_sign == CFL_SIGN_POS) {
return abs_alpha;
} else {
assert(abs_alpha != 0.0);
assert(cfl_alpha_mags[mag_idx + 1] == -abs_alpha);
return -abs_alpha;
} }
return sum / (double)(width * height);
} }
// Predict the current transform block using CfL. // Predict the current transform block using CfL.
void cfl_predict_block(const CFL_CTX *cfl, uint8_t *dst, int dst_stride, void cfl_predict_block(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride,
int row, int col, TX_SIZE tx_size, double dc_pred, int row, int col, TX_SIZE tx_size, int plane) {
double alpha) { CFL_CTX *const cfl = xd->cfl;
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
// CfL parameters must be computed before prediction can be done.
assert(cfl->are_parameters_computed == 1);
const int width = tx_size_wide[tx_size]; const int width = tx_size_wide[tx_size];
const int height = tx_size_high[tx_size]; const int height = tx_size_high[tx_size];
const double y_avg = cfl->y_avg; // TODO(ltrudeau) Convert to uint16 to support HBD
const uint8_t *y_pix = cfl->y_down_pix;
const double dc_pred = cfl->dc_pred[plane - 1];
const double alpha = cfl_idx_to_alpha(
mbmi->cfl_alpha_idx, mbmi->cfl_alpha_signs[plane - 1], plane - 1);
cfl_load(cfl, dst, dst_stride, row, col, width, height); const double avg = cfl->y_average;
cfl_load(cfl, row, col, width, height);
for (int j = 0; j < height; j++) { for (int j = 0; j < height; j++) {
for (int i = 0; i < width; i++) { for (int i = 0; i < width; i++) {
dst[i] = (uint8_t)(alpha * (dst[i] - y_avg) + dc_pred + 0.5); dst[i] = (uint8_t)(alpha * (y_pix[i] - avg) + dc_pred + 0.5);
} }
dst += dst_stride; dst += dst_stride;
y_pix += MAX_SB_SIZE;
} }
} }
...@@ -130,6 +252,7 @@ void cfl_store(CFL_CTX *cfl, const uint8_t *input, int input_stride, int row, ...@@ -130,6 +252,7 @@ void cfl_store(CFL_CTX *cfl, const uint8_t *input, int input_stride, int row,
assert(MAX_SB_SIZE * (row + tx_height - 1) + col + tx_width - 1 < assert(MAX_SB_SIZE * (row + tx_height - 1) + col + tx_width - 1 <
MAX_SB_SQUARE); MAX_SB_SQUARE);
// TODO(ltrudeau) Speedup possible by moving the downsampling to cfl_store
for (int j = 0; j < tx_height; j++) { for (int j = 0; j < tx_height; j++) {
for (int i = 0; i < tx_width; i++) { for (int i = 0; i < tx_width; i++) {
y_pix[i] = input[i]; y_pix[i] = input[i];
...@@ -148,85 +271,34 @@ void cfl_store(CFL_CTX *cfl, const uint8_t *input, int input_stride, int row, ...@@ -148,85 +271,34 @@ void cfl_store(CFL_CTX *cfl, const uint8_t *input, int input_stride, int row,
cfl->y_width = OD_MAXI((col << tx_off_log2) + tx_width, cfl->y_width); cfl->y_width = OD_MAXI((col << tx_off_log2) + tx_width, cfl->y_width);
cfl->y_height = OD_MAXI((row << tx_off_log2) + tx_height, cfl->y_height); cfl->y_height = OD_MAXI((row << tx_off_log2) + tx_height, cfl->y_height);
} }
}
// Load from the CfL pixel buffer into output
void cfl_load(const CFL_CTX *cfl, uint8_t *output, int output_stride, int row,
int col, int width, int height) {
const int sub_x = cfl->subsampling_x;
const int sub_y = cfl->subsampling_y;
const int off_log2 = tx_size_wide_log2[0];
const uint8_t *y_pix;
int pred_row_offset = 0;
int output_row_offset = 0;
// TODO(ltrudeau) add support for 4:2:2 // Invalidate current parameters
if (sub_y == 0 && sub_x == 0) { cfl->are_parameters_computed = 0;
y_pix = &cfl->y_pix[(row * MAX_SB_SIZE + col) << off_log2]; }
for (int j = 0; j < height; j++) {
for (int i = 0; i < width; i++) {
// In 4:4:4, pixels match 1 to 1
output[output_row_offset + i] = y_pix[pred_row_offset + i];
}
pred_row_offset += MAX_SB_SIZE;
output_row_offset += output_stride;
}
} else if (sub_y == 1 && sub_x == 1) {
y_pix = &cfl->y_pix[(row * MAX_SB_SIZE + col) << (off_log2 + sub_y)];
for (int j = 0; j < height; j++) {
for (int i = 0; i < width; i++) {
int top_left = (pred_row_offset + i) << sub_y;
int bot_left = top_left + MAX_SB_SIZE;
// In 4:2:0, average pixels in 2x2 grid
output[output_row_offset + i] = OD_SHR_ROUND(
y_pix[top_left] + y_pix[top_left + 1] // Top row
+ y_pix[bot_left] + y_pix[bot_left + 1] // Bottom row
,
2);
}
pred_row_offset += MAX_SB_SIZE;
output_row_offset += output_stride;
}
} else {
assert(0); // Unsupported chroma subsampling
}
// Due to frame boundary issues, it is possible that the total area of
// covered by Chroma exceeds that of Luma. When this happens, we write over
// the broken data by repeating the last columns and/or rows.
//
// Note that in order to manage the case where both rows and columns
// overrun,
// we apply rows first. This way, when the rows overrun the bottom of the
// frame, the columns will be copied over them.
const int uv_width = (col << off_log2) + width;
const int uv_height = (row << off_log2) + height;
const int diff_width = uv_width - (cfl->y_width >> sub_x); void cfl_compute_parameters(MACROBLOCKD *const xd, TX_SIZE tx_size) {
const int diff_height = uv_height - (cfl->y_height >> sub_y); CFL_CTX *const cfl = xd->cfl;
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
if (diff_width > 0) { // Do not call cfl_compute_parameters multiple time on the same values.
int last_pixel; assert(cfl->are_parameters_computed == 0);
output_row_offset = width - diff_width;
for (int j = 0; j < height; j++) { #if CONFIG_CHROMA_SUB8X8
last_pixel = output_row_offset - 1; const BLOCK_SIZE plane_bsize = AOMMAX(
for (int i = 0; i < diff_width; i++) { BLOCK_4X4, get_plane_block_size(mbmi->sb_type, &xd->plane[AOM_PLANE_U]));
output[output_row_offset + i] = output[last_pixel]; #else
} const BLOCK_SIZE plane_bsize =
output_row_offset += output_stride; get_plane_block_size(mbmi->sb_type, &xd->plane[AOM_PLANE_U]);
} #endif
} // AOM_PLANE_U is used, but both planes will have the same sizes.
cfl->uv_width = max_intra_block_width(xd, plane_bsize, AOM_PLANE_U, tx_size);
cfl->uv_height =
max_intra_block_height(xd, plane_bsize, AOM_PLANE_U, tx_size);
if (diff_height > 0) { // Compute block-level DC_PRED for both chromatic planes.
output_row_offset = (height - diff_height) * output_stride; // DC_PRED replaces beta in the linear model.
const int last_row_offset = output_row_offset - output_stride; cfl_dc_pred(xd);
for (int j = 0; j < diff_height; j++) { // Compute block-level average on reconstructed luma input.
for (int i = 0; i < width; i++) { cfl_compute_average(cfl);
output[output_row_offset + i] = output[last_row_offset + i]; cfl->are_parameters_computed = 1;
}
output_row_offset += output_stride;
}
}
} }
...@@ -26,13 +26,25 @@ typedef struct macroblockd MACROBLOCKD; ...@@ -26,13 +26,25 @@ typedef struct macroblockd MACROBLOCKD;
typedef struct { typedef struct {
// Pixel buffer containing the luma pixels used as prediction for chroma // Pixel buffer containing the luma pixels used as prediction for chroma
// TODO(ltrudeau) Convert to uint16 for HBD support
uint8_t y_pix[MAX_SB_SQUARE]; uint8_t y_pix[MAX_SB_SQUARE];
// Pixel buffer containing the downsampled luma pixels used as prediction for
// chroma
// TODO(ltrudeau) Convert to uint16 for HBD support
uint8_t y_down_pix[MAX_SB_SQUARE];
// Height and width of the luma prediction block currently in the pixel buffer // Height and width of the luma prediction block currently in the pixel buffer
int y_height, y_width; int y_height, y_width;
// Height and width of the chroma prediction block currently associated with
// this context
int uv_height, uv_width;
// Average of the luma reconstructed values over the entire prediction unit // Average of the luma reconstructed values over the entire prediction unit
double y_avg; double y_average;
int are_parameters_computed;
// Chroma subsampling // Chroma subsampling
int subsampling_x, subsampling_y; int subsampling_x, subsampling_y;
...@@ -57,30 +69,12 @@ static const int cfl_alpha_codes[CFL_ALPHABET_SIZE][CFL_PRED_PLANES] = { ...@@ -57,30 +69,12 @@ static const int cfl_alpha_codes[CFL_ALPHABET_SIZE][CFL_PRED_PLANES] = {
void cfl_init(CFL_CTX *cfl, AV1_COMMON *cm); void cfl_init(CFL_CTX *cfl, AV1_COMMON *cm);
void cfl_dc_pred(MACROBLOCKD *xd, int width, int height); void cfl_predict_block(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride,
int row, int col, TX_SIZE tx_size, int plane);
double cfl_compute_average(uint8_t *y_pix, int y_stride, int height, int width);
static INLINE double cfl_idx_to_alpha(int alpha_idx, CFL_SIGN_TYPE alpha_sign,
CFL_PRED_TYPE pred_type) {
const int mag_idx = cfl_alpha_codes[alpha_idx][pred_type];
const double abs_alpha = cfl_alpha_mags[mag_idx];
if (alpha_sign == CFL_SIGN_POS) {
return abs_alpha;
} else {
assert(abs_alpha != 0.0);
assert(cfl_alpha_mags[mag_idx + 1] == -abs_alpha);
return -abs_alpha;
}
}
void cfl_predict_block(const CFL_CTX *cfl, uint8_t *dst, int dst_stride,
int row, int col, TX_SIZE tx_size, double dc_pred,
double alpha);
void cfl_store(CFL_CTX *cfl, const uint8_t *input, int input_stride, int row, void cfl_store(CFL_CTX *cfl, const uint8_t *input, int input_stride, int row,
int col, TX_SIZE tx_size); int col, TX_SIZE tx_size);
void cfl_load(const CFL_CTX *cfl, uint8_t *output, int output_stride, int row, void cfl_compute_parameters(MACROBLOCKD *const xd, TX_SIZE tx_size);
int col, int width, int height);
#endif // AV1_COMMON_CFL_H_ #endif // AV1_COMMON_CFL_H_
...@@ -2719,37 +2719,16 @@ void av1_predict_intra_block_facade(MACROBLOCKD *xd, int plane, int block_idx, ...@@ -2719,37 +2719,16 @@ void av1_predict_intra_block_facade(MACROBLOCKD *xd, int plane, int block_idx,
mode, dst, dst_stride, dst, dst_stride, blk_col, mode, dst, dst_stride, dst, dst_stride, blk_col,
blk_row, plane); blk_row, plane);
#if CONFIG_CFL #if CONFIG_CFL
CFL_CTX *const cfl = xd->cfl;
if (plane != AOM_PLANE_Y && mbmi->uv_mode == DC_PRED) { if (plane != AOM_PLANE_Y && mbmi->uv_mode == DC_PRED) {
if (plane == AOM_PLANE_U && blk_col == 0 && blk_row == 0) { if (plane == AOM_PLANE_U && blk_col == 0 && blk_row == 0) {
#if CONFIG_CHROMA_SUB8X8 // Avoid computing the CfL parameters twice, if they have already been
const BLOCK_SIZE plane_bsize = // computed in the encoder_facade
AOMMAX(BLOCK_4X4, get_plane_block_size(mbmi->sb_type, pd)); if (!xd->cfl->are_parameters_computed)
#else cfl_compute_parameters(xd, tx_size);
const BLOCK_SIZE plane_bsize = get_plane_block_size(mbmi->sb_type, pd);
#endif
const int width =
max_intra_block_width(xd, plane_bsize, AOM_PLANE_U, tx_size);
const int height =
max_intra_block_height(xd, plane_bsize, AOM_PLANE_U, tx_size);
// Temporary pixel buffer used to store the CfL prediction when we compute
// the average over the reconstructed and downsampled luma pixels
// TODO(ltrudeau) Convert to uint16 when adding HBD support
uint8_t tmp_pix[MAX_SB_SQUARE];
// Compute the block-level DC_PRED for both chromatic planes. DC_PRED
// replaces beta in the linear model.
cfl_dc_pred(xd, width, height);
cfl_load(cfl, tmp_pix, MAX_SB_SIZE, 0, 0, width, height);
cfl->y_avg = cfl_compute_average(tmp_pix, MAX_SB_SIZE, width, height);
} }
cfl_predict_block( cfl_predict_block(xd, dst, pd->dst.stride, blk_row, blk_col, tx_size,
cfl, dst, pd->dst.stride, blk_row, blk_col, tx_size, plane);
cfl->dc_pred[plane - 1],
cfl_idx_to_alpha(mbmi->cfl_alpha_idx, mbmi->cfl_alpha_signs[plane - 1],
plane - 1));
} }
#endif #endif
} }
......
...@@ -1364,8 +1364,22 @@ void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col, ...@@ -1364,8 +1364,22 @@ void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col,
FRAME_CONTEXT *const ec_ctx = cm->fc; FRAME_CONTEXT *const ec_ctx = cm->fc;
#endif // CONFIG_EC_ADAPT #endif // CONFIG_EC_ADAPT
#if CONFIG_DEBUG
const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
// av1_predict_intra_block_facade does not pass plane_bsize, we need to validate
// that we will get the same value of plane_bsize on the other side.
#if CONFIG_CHROMA_SUB8X8
const BLOCK_SIZE plane_bsize_val =
AOMMAX(BLOCK_4X4, get_plane_block_size(mbmi->sb_type, &xd->plane[plane]));
#else
const BLOCK_SIZE plane_bsize_val =
get_plane_block_size(mbmi->sb_type, &xd->plane[plane]);
#endif // CONFIG_CHROMA_SUB8X8
assert(plane_bsize == plane_bsize_val);
#endif // CONFIG_DEBUG
av1_predict_intra_block_encoder_facade(x, ec_ctx, plane, block, blk_col, av1_predict_intra_block_encoder_facade(x, ec_ctx, plane, block, blk_col,
blk_row, tx_size, plane_bsize); blk_row, tx_size);
#else #else
av1_predict_intra_block_facade(xd, plane, block, blk_col, blk_row, tx_size); av1_predict_intra_block_facade(xd, plane, block, blk_col, blk_row, tx_size);
#endif #endif
...@@ -1418,10 +1432,11 @@ void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col, ...@@ -1418,10 +1432,11 @@ void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col,
} }
#if CONFIG_CFL #if CONFIG_CFL
static int cfl_alpha_dist(const uint8_t *y_pix, int y_stride, double y_avg, static int cfl_alpha_dist(const uint8_t *y_pix, int y_stride,
const uint8_t *src, int src_stride, int width, const double y_average, const uint8_t *src,
int height, TX_SIZE tx_size, double dc_pred, int src_stride, int width, int height,
double alpha, int *dist_neg_out) { TX_SIZE tx_size, double dc_pred, double alpha,
int *dist_neg_out) {
const double dc_pred_bias = dc_pred + 0.5; const double dc_pred_bias = dc_pred + 0.5;
int dist = 0; int dist = 0;
int diff; int diff;
...@@ -1444,6 +1459,8 @@ static int cfl_alpha_dist(const uint8_t *y_pix, int y_stride, double y_avg, ...@@ -1444,6 +1459,8 @@ static int cfl_alpha_dist(const uint8_t *y_pix, int y_stride, double y_avg,
int dist_neg = 0; int dist_neg = 0;
const int tx_height = tx_size_high[tx_size]; const int tx_height = tx_size_high[tx_size];
const int tx_width = tx_size_wide[tx_size]; const int tx_width = tx_size_wide[tx_size];
const int y_block_row_off = y_stride * tx_height;
const int src_block_row_off = src_stride * tx_height;
const uint8_t *t_y_pix; const uint8_t *t_y_pix;
const uint8_t *t_src; const uint8_t *t_src;
for (int b_j = 0; b_j < height; b_j += tx_height) { for (int b_j = 0; b_j < height; b_j += tx_height) {
...@@ -1454,7 +1471,7 @@ static int cfl_alpha_dist(const uint8_t *y_pix, int y_stride, double y_avg, ...@@ -1454,7 +1471,7 @@ static int cfl_alpha_dist(const uint8_t *y_pix, int y_stride, double y_avg,
t_src = src; t_src = src;
for (int t_j = b_j; t_j < h; t_j++) { for (int t_j = b_j; t_j < h; t_j++) {
for (int t_i = b_i; t_i < w; t_i++) { for (int t_i = b_i; t_i < w; t_i++) {
const double scaled_luma = alpha * (t_y_pix[t_i] - y_avg); const double scaled_luma = alpha * (t_y_pix[t_i] - y_average);
const int uv = t_src[t_i]; const int uv = t_src[t_i];
diff = uv - (int)(scaled_luma + dc_pred_bias); diff = uv - (int)(scaled_luma + dc_pred_bias);
dist += diff * diff; dist += diff * diff;
...@@ -1465,8 +1482,8 @@ static int cfl_alpha_dist(const uint8_t *y_pix, int y_stride, double y_avg, ...@@ -1465,8 +1482,8 @@ static int cfl_alpha_dist(const uint8_t *y_pix, int y_stride, double y_avg,
t_src += src_stride; t_src += src_stride;
} }
} }
y_pix += y_stride * tx_height; y_pix += y_block_row_off;
src += src_stride * tx_height; src += src_block_row_off;
} }
if (dist_neg_out) *dist_neg_out = dist_neg; if (dist_neg_out) *dist_neg_out = dist_neg;
...@@ -1474,35 +1491,64 @@ static int cfl_alpha_dist(const uint8_t *y_pix, int y_stride, double y_avg, ...@@ -1474,35 +1491,64 @@ static int cfl_alpha_dist(const uint8_t *y_pix, int y_stride, double y_avg,
return dist; return dist;
} }
static int cfl_compute_alpha_ind(MACROBLOCK *const x, const CFL_CTX *const cfl, static inline void cfl_update_costs(CFL_CTX *cfl, FRAME_CONTEXT *ec_ctx) {
int width, int height, TX_SIZE tx_size, assert(ec_ctx->cfl_alpha_cdf[CFL_ALPHABET_SIZE - 1] ==
uint8_t y_pix[MAX_SB_SQUARE], AOM_ICDF(CDF_PROB_TOP));
CFL_SIGN_TYPE signs_out[CFL_SIGNS]) { const int prob_den = CDF_PROB_TOP;
int prob_num = AOM_ICDF(ec_ctx->cfl_alpha_cdf[0]);
cfl->costs[0] = av1_cost_zero(get_prob(prob_num, prob_den));
for (int c = 1; c < CFL_ALPHABET_SIZE; c++) {
int sign_bit_cost = (cfl_alpha_codes[c][CFL_PRED_U] != 0) +
(cfl_alpha_codes[c][CFL_PRED_V] != 0);
prob_num = AOM_ICDF(ec_ctx->cfl_alpha_cdf[c]) -
AOM_ICDF(ec_ctx->cfl_alpha_cdf[c - 1]);
cfl->costs[c] = av1_cost_zero(get_prob(prob_num, prob_den)) +
av1_cost_literal(sign_bit_cost);
}
}
static void cfl_compute_alpha_ind(MACROBLOCK *const x, FRAME_CONTEXT *ec_ctx,
TX_SIZE tx_size) {
const struct macroblock_plane *const p_u = &x->plane[AOM_PLANE_U]; const struct macroblock_plane *const p_u = &x->plane[AOM_PLANE_U];
const struct macroblock_plane *const p_v = &x->plane[AOM_PLANE_V]; const struct macroblock_plane *const p_v = &x->plane[AOM_PLANE_V];
const uint8_t *const src_u = p_u->src.buf; const uint8_t *const src_u = p_u->src.buf;
const uint8_t *const src_v = p_v->src.buf; const uint8_t *const src_v = p_v->src.buf;
const int src_stride_u