Commit 3e18e4ae authored by Luc Trudeau's avatar Luc Trudeau

[CFL] Compute Luma Average Over Partition Unit

Extract the compution of the luma reconstructed average out of cfl_load
and into cfl_compute_average. The reconstructed luma average is stored
in the CFL_CONTEXT to avoid computing it for each transform block and
for each plane.

Results on subset1 (compared to 803bea26 with CfL)
   PSNR | PSNR Cb | PSNR Cr | PSNR HVS |    SSIM | MS SSIM | CIEDE 2000
-0.0474 | -0.1486 | -0.2931 |  -0.0358 | -0.0397 | -0.0127 |    -0.1162

Change-Id: I9e34af0fe5961ce8dbe70cb80aea2a16221d0d92
parent 401680b4
......@@ -28,7 +28,7 @@ void cfl_init(CFL_CTX *cfl, AV1_COMMON *cm) {
// CfL computes its own block-level DC_PRED. This is required to compute both
// alpha_cb and alpha_cr before the prediction are computed.
void cfl_dc_pred(MACROBLOCKD *xd, BLOCK_SIZE plane_bsize) {
void cfl_dc_pred(MACROBLOCKD *xd, int width, int height) {
const struct macroblockd_plane *const pd_u = &xd->plane[AOM_PLANE_U];
const struct macroblockd_plane *const pd_v = &xd->plane[AOM_PLANE_V];
......@@ -38,12 +38,8 @@ void cfl_dc_pred(MACROBLOCKD *xd, BLOCK_SIZE plane_bsize) {
const int dst_u_stride = pd_u->dst.stride;
const int dst_v_stride = pd_v->dst.stride;
assert(plane_bsize != BLOCK_INVALID);
const int block_width = block_size_wide[plane_bsize];
const int block_height = block_size_high[plane_bsize];
// Number of pixel on the top and left borders.
const double num_pel = block_width + block_height;
const double num_pel = width + height;
int sum_u = 0;
int sum_v = 0;
......@@ -64,13 +60,13 @@ void cfl_dc_pred(MACROBLOCKD *xd, BLOCK_SIZE plane_bsize) {
if (xd->up_available && xd->mb_to_right_edge >= 0) {
#endif
// TODO(ltrudeau) replace this with DC_PRED assembly
for (int i = 0; i < block_width; i++) {
for (int i = 0; i < width; i++) {
sum_u += dst_u[-dst_u_stride + i];
sum_v += dst_v[-dst_v_stride + i];
}
} else {
sum_u = block_width * 127;
sum_v = block_width * 127;
sum_u = width * 127;
sum_v = width * 127;
}
#if CONFIG_CHROMA_SUB8X8
......@@ -78,27 +74,40 @@ void cfl_dc_pred(MACROBLOCKD *xd, BLOCK_SIZE plane_bsize) {
#else
if (xd->left_available && xd->mb_to_bottom_edge >= 0) {
#endif
for (int i = 0; i < block_height; i++) {
for (int i = 0; i < height; i++) {
sum_u += dst_u[i * dst_u_stride - 1];
sum_v += dst_v[i * dst_v_stride - 1];
}
} else {
sum_u += block_height * 129;
sum_v += block_height * 129;
sum_u += height * 129;
sum_v += height * 129;
}
xd->cfl->dc_pred[CFL_PRED_U] = sum_u / num_pel;
xd->cfl->dc_pred[CFL_PRED_V] = sum_v / num_pel;
}
double cfl_compute_average(uint8_t *y_pix, int y_stride, int width,
int height) {
int sum = 0;
for (int j = 0; j < height; j++) {
for (int i = 0; i < width; i++) {
sum += y_pix[i];
}
y_pix += y_stride;
}
return sum / (double)(width * height);
}
// Predict the current transform block using CfL.
void cfl_predict_block(const CFL_CTX *cfl, uint8_t *dst, int dst_stride,
int row, int col, TX_SIZE tx_size, double dc_pred,
double alpha) {
const int width = tx_size_wide[tx_size];
const int height = tx_size_high[tx_size];
const double y_avg = cfl->y_avg;
const double y_avg = cfl_load(cfl, dst, dst_stride, row, col, width, height);
cfl_load(cfl, dst, dst_stride, row, col, width, height);
for (int j = 0; j < height; j++) {
for (int i = 0; i < width; i++) {
......@@ -142,7 +151,7 @@ void cfl_store(CFL_CTX *cfl, const uint8_t *input, int input_stride, int row,
}
// Load from the CfL pixel buffer into output
double cfl_load(const CFL_CTX *cfl, uint8_t *output, int output_stride, int row,
void cfl_load(const CFL_CTX *cfl, uint8_t *output, int output_stride, int row,
int col, int width, int height) {
const int sub_x = cfl->subsampling_x;
const int sub_y = cfl->subsampling_y;
......@@ -226,14 +235,4 @@ double cfl_load(const CFL_CTX *cfl, uint8_t *output, int output_stride, int row,
output_row_offset += output_stride;
}
}
int avg = 0;
output_row_offset = 0;
for (int j = 0; j < height; j++) {
for (int i = 0; i < width; i++) {
avg += output[output_row_offset + i];
}
output_row_offset += output_stride;
}
return avg / (double)(width * height);
}
......@@ -31,6 +31,9 @@ typedef struct {
// Height and width of the luma prediction block currently in the pixel buffer
int y_height, y_width;
// Average of the luma reconstructed values over the entire prediction unit
double y_avg;
// Chroma subsampling
int subsampling_x, subsampling_y;
......@@ -59,7 +62,9 @@ static const int cfl_alpha_codes[CFL_ALPHABET_SIZE][CFL_PRED_PLANES] = {
void cfl_init(CFL_CTX *cfl, AV1_COMMON *cm);
void cfl_dc_pred(MACROBLOCKD *xd, BLOCK_SIZE plane_bsize);
void cfl_dc_pred(MACROBLOCKD *xd, int width, int height);
double cfl_compute_average(uint8_t *y_pix, int y_stride, int height, int width);
static INLINE double cfl_idx_to_alpha(int alpha_idx, CFL_SIGN_TYPE alpha_sign,
CFL_PRED_TYPE pred_type) {
......@@ -81,6 +86,6 @@ void cfl_predict_block(const CFL_CTX *cfl, uint8_t *dst, int dst_stride,
void cfl_store(CFL_CTX *cfl, const uint8_t *input, int input_stride, int row,
int col, TX_SIZE tx_size);
double cfl_load(const CFL_CTX *cfl, uint8_t *output, int output_stride, int row,
void cfl_load(const CFL_CTX *cfl, uint8_t *output, int output_stride, int row,
int col, int width, int height);
#endif // AV1_COMMON_CFL_H_
......@@ -898,6 +898,24 @@ static INLINE int max_block_high(const MACROBLOCKD *xd, BLOCK_SIZE bsize,
return max_blocks_high >> tx_size_wide_log2[0];
}
#if CONFIG_CFL
static INLINE int max_intra_block_width(const MACROBLOCKD *xd,
BLOCK_SIZE plane_bsize, int plane,
TX_SIZE tx_size) {
const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane)
<< tx_size_wide_log2[0];
return ALIGN_POWER_OF_TWO(max_blocks_wide, tx_size_wide_log2[tx_size]);
}
static INLINE int max_intra_block_height(const MACROBLOCKD *xd,
BLOCK_SIZE plane_bsize, int plane,
TX_SIZE tx_size) {
const int max_blocks_high = max_block_high(xd, plane_bsize, plane)
<< tx_size_high_log2[0];
return ALIGN_POWER_OF_TWO(max_blocks_high, tx_size_high_log2[tx_size]);
}
#endif // CONFIG_CFL
static INLINE void av1_zero_above_context(AV1_COMMON *const cm,
int mi_col_start, int mi_col_end) {
const int width = mi_col_end - mi_col_start;
......
......@@ -2526,22 +2526,35 @@ void av1_predict_intra_block_facade(MACROBLOCKD *xd, int plane, int block_idx,
mode, dst, dst_stride, dst, dst_stride, blk_col,
blk_row, plane);
#if CONFIG_CFL
CFL_CTX *const cfl = xd->cfl;
if (plane != AOM_PLANE_Y && mbmi->uv_mode == DC_PRED) {
if (plane == AOM_PLANE_U && blk_col == 0 && blk_row == 0) {
// Compute the block-level DC_PRED for both chromatic planes. DC_PRED replaces
// beta in the linear model.
#if CONFIG_CB4X4 && !CONFIG_CHROMA_2X2
const BLOCK_SIZE plane_bsize =
AOMMAX(BLOCK_4X4, get_plane_block_size(mbmi->sb_type, pd));
#else
const BLOCK_SIZE plane_bsize = get_plane_block_size(mbmi->sb_type, pd);
#endif
cfl_dc_pred(xd, plane_bsize);
const int width =
max_intra_block_width(xd, plane_bsize, AOM_PLANE_U, tx_size);
const int height =
max_intra_block_height(xd, plane_bsize, AOM_PLANE_U, tx_size);
// Temporary pixel buffer used to store the CfL prediction when we compute
// the average over the reconstructed and downsampled luma pixels
// TODO(ltrudeau) Convert to uint16 when adding HBD support
uint8_t tmp_pix[MAX_SB_SQUARE];
// Compute the block-level DC_PRED for both chromatic planes. DC_PRED
// replaces beta in the linear model.
cfl_dc_pred(xd, width, height);
cfl_load(cfl, tmp_pix, MAX_SB_SIZE, 0, 0, width, height);
cfl->y_avg = cfl_compute_average(tmp_pix, MAX_SB_SIZE, width, height);
}
cfl_predict_block(
xd->cfl, dst, pd->dst.stride, blk_row, blk_col, tx_size,
xd->cfl->dc_pred[plane - 1],
cfl, dst, pd->dst.stride, blk_row, blk_col, tx_size,
cfl->dc_pred[plane - 1],
cfl_idx_to_alpha(mbmi->cfl_alpha_idx, mbmi->cfl_alpha_signs[plane - 1],
plane - 1));
}
......
......@@ -1537,7 +1537,8 @@ static int cfl_alpha_dist(const uint8_t *y_pix, int y_stride, double y_avg,
}
static int cfl_compute_alpha_ind(MACROBLOCK *const x, const CFL_CTX *const cfl,
BLOCK_SIZE bsize,
int width, int height,
uint8_t y_pix[MAX_SB_SQUARE],
CFL_SIGN_TYPE signs_out[CFL_SIGNS]) {
const struct macroblock_plane *const p_u = &x->plane[AOM_PLANE_U];
const struct macroblock_plane *const p_v = &x->plane[AOM_PLANE_V];
......@@ -1545,33 +1546,25 @@ static int cfl_compute_alpha_ind(MACROBLOCK *const x, const CFL_CTX *const cfl,
const uint8_t *const src_v = p_v->src.buf;
const int src_stride_u = p_u->src.stride;
const int src_stride_v = p_v->src.stride;
const int block_width = block_size_wide[bsize];
const int block_height = block_size_high[bsize];
const double dc_pred_u = cfl->dc_pred[CFL_PRED_U];
const double dc_pred_v = cfl->dc_pred[CFL_PRED_V];
// Temporary pixel buffer used to store the CfL prediction when we compute the
// alpha index.
uint8_t tmp_pix[MAX_SB_SQUARE];
// Load CfL Prediction over the entire block
const double y_avg =
cfl_load(cfl, tmp_pix, MAX_SB_SIZE, 0, 0, block_width, block_height);
const double y_avg = cfl->y_avg;
int sse[CFL_PRED_PLANES][CFL_MAGS_SIZE];
sse[CFL_PRED_U][0] =
cfl_alpha_dist(tmp_pix, MAX_SB_SIZE, y_avg, src_u, src_stride_u,
block_width, block_height, dc_pred_u, 0, NULL);
cfl_alpha_dist(y_pix, MAX_SB_SIZE, y_avg, src_u, src_stride_u, width,
height, dc_pred_u, 0, NULL);
sse[CFL_PRED_V][0] =
cfl_alpha_dist(tmp_pix, MAX_SB_SIZE, y_avg, src_v, src_stride_v,
block_width, block_height, dc_pred_v, 0, NULL);
cfl_alpha_dist(y_pix, MAX_SB_SIZE, y_avg, src_v, src_stride_v, width,
height, dc_pred_v, 0, NULL);
for (int m = 1; m < CFL_MAGS_SIZE; m += 2) {
assert(cfl_alpha_mags[m + 1] == -cfl_alpha_mags[m]);
sse[CFL_PRED_U][m] = cfl_alpha_dist(
tmp_pix, MAX_SB_SIZE, y_avg, src_u, src_stride_u, block_width,
block_height, dc_pred_u, cfl_alpha_mags[m], &sse[CFL_PRED_U][m + 1]);
y_pix, MAX_SB_SIZE, y_avg, src_u, src_stride_u, width, height,
dc_pred_u, cfl_alpha_mags[m], &sse[CFL_PRED_U][m + 1]);
sse[CFL_PRED_V][m] = cfl_alpha_dist(
tmp_pix, MAX_SB_SIZE, y_avg, src_v, src_stride_v, block_width,
block_height, dc_pred_v, cfl_alpha_mags[m], &sse[CFL_PRED_V][m + 1]);
y_pix, MAX_SB_SIZE, y_avg, src_v, src_stride_v, width, height,
dc_pred_v, cfl_alpha_mags[m], &sse[CFL_PRED_V][m + 1]);
}
int dist;
......@@ -1637,11 +1630,21 @@ void av1_predict_intra_block_encoder_facade(MACROBLOCK *x,
MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
if (plane != AOM_PLANE_Y && mbmi->uv_mode == DC_PRED) {
if (blk_col == 0 && blk_row == 0 && plane == AOM_PLANE_U) {
const int width =
max_intra_block_width(xd, plane_bsize, AOM_PLANE_U, tx_size);
const int height =
max_intra_block_height(xd, plane_bsize, AOM_PLANE_U, tx_size);
uint8_t tmp_pix[MAX_SB_SQUARE];
CFL_CTX *const cfl = xd->cfl;
cfl_update_costs(cfl, ec_ctx);
cfl_dc_pred(xd, plane_bsize);
mbmi->cfl_alpha_idx =
cfl_compute_alpha_ind(x, cfl, plane_bsize, mbmi->cfl_alpha_signs);
cfl_dc_pred(xd, width, height);
// Load CfL Prediction over the entire block
cfl_load(cfl, tmp_pix, MAX_SB_SIZE, 0, 0, width, height);
cfl->y_avg = cfl_compute_average(tmp_pix, MAX_SB_SIZE, width, height);
mbmi->cfl_alpha_idx = cfl_compute_alpha_ind(
x, cfl, width, height, tmp_pix, mbmi->cfl_alpha_signs);
}
}
#if CONFIG_DEBUG
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment