Commit 467205ac authored by Luc Trudeau's avatar Luc Trudeau

[CFL] Cache DC_PRED during CfL-RDO

By default, the DC_PRED is not cached (this includes
decoding). During cfl_rd_pick_alpha(), DC_PRED caching
is enabled, the DC_PRED is cached after the first time it
is computed (for each plane) and then it is reused when
testing all the other scaling parameters.

Change-Id: Ie8ba0bb0427c4d9be8de5b44e6330e8a78b9c7d9
parent 9cea993b
......@@ -537,14 +537,22 @@ typedef struct {
(CFL_SUB8X8_VAL_MI_SIZE * CFL_SUB8X8_VAL_MI_SIZE)
#endif // CONFIG_DEBUG
#define CFL_MAX_BLOCK_SIZE (BLOCK_32X32)
#define CFL_PRED_BUF_LINE (32)
#define CFL_PRED_BUF_SQUARE (CFL_PRED_BUF_LINE * CFL_PRED_BUF_LINE)
#define CFL_BUF_LINE (32)
#define CFL_BUF_SQUARE (CFL_BUF_LINE * CFL_BUF_LINE)
typedef struct cfl_ctx {
// The CfL prediction buffer is used in two steps:
// 1. Stores Q3 reconstructed luma pixels
// (only Q2 is required, but Q3 is used to avoid shifts)
// 2. Stores Q3 AC contributions (step1 - tx block avg)
int16_t pred_buf_q3[CFL_PRED_BUF_SQUARE];
int16_t pred_buf_q3[CFL_BUF_SQUARE];
// Cache the DC_PRED when performing RDO, so it does not have to be recomputed
// for every scaling parameter
int dc_pred_is_cached[CFL_PRED_PLANES];
// The DC_PRED cache is disable when decoding
int use_dc_pred_cache;
// Only cache the first row of the DC_PRED
int16_t dc_pred_cache[CFL_PRED_PLANES][CFL_BUF_LINE];
// Height and width currently used in the CfL prediction buffer.
int buf_height, buf_width;
......
This diff is collapsed.
......@@ -25,6 +25,11 @@ static INLINE int get_scaled_luma_q0(int alpha_q3, int16_t pred_buf_q3) {
return ROUND_POWER_OF_TWO_SIGNED(scaled_luma_q6, 6);
}
static INLINE CFL_PRED_TYPE get_cfl_pred_type(PLANE_TYPE plane) {
assert(plane > 0);
return plane - 1;
}
void cfl_predict_block(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride,
TX_SIZE tx_size, int plane);
......@@ -32,4 +37,10 @@ void cfl_store_block(MACROBLOCKD *const xd, BLOCK_SIZE bsize, TX_SIZE tx_size);
void cfl_store_tx(MACROBLOCKD *const xd, int row, int col, TX_SIZE tx_size,
BLOCK_SIZE bsize);
void cfl_store_dc_pred(MACROBLOCKD *const xd, const uint8_t *input,
CFL_PRED_TYPE pred_plane, int width);
void cfl_load_dc_pred(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride,
TX_SIZE tx_size, CFL_PRED_TYPE pred_plane);
#endif // AV1_COMMON_CFL_H_
......@@ -2762,10 +2762,6 @@ void av1_predict_intra_block_facade(const AV1_COMMON *cm, MACROBLOCKD *xd,
const PREDICTION_MODE mode =
(plane == AOM_PLANE_Y) ? mbmi->mode : get_uv_mode(mbmi->uv_mode);
av1_predict_intra_block(cm, xd, pd->width, pd->height,
txsize_to_bsize[tx_size], mode, dst, dst_stride, dst,
dst_stride, blk_col, blk_row, plane);
#if CONFIG_CFL
if (plane != AOM_PLANE_Y && mbmi->uv_mode == UV_CFL_PRED) {
#if CONFIG_DEBUG
......@@ -2777,9 +2773,26 @@ void av1_predict_intra_block_facade(const AV1_COMMON *cm, MACROBLOCKD *xd,
assert(block_size_wide[plane_bsize] == tx_size_wide[tx_size]);
assert(block_size_high[plane_bsize] == tx_size_high[tx_size]);
#endif
CFL_CTX *const cfl = &xd->cfl;
CFL_PRED_TYPE pred_plane = get_cfl_pred_type(plane);
if (cfl->dc_pred_is_cached[pred_plane] == 0) {
av1_predict_intra_block(cm, xd, pd->width, pd->height,
txsize_to_bsize[tx_size], mode, dst, dst_stride,
dst, dst_stride, blk_col, blk_row, plane);
if (cfl->use_dc_pred_cache) {
cfl_store_dc_pred(xd, dst, pred_plane, tx_size_wide[tx_size]);
cfl->dc_pred_is_cached[pred_plane] = 1;
}
} else {
cfl_load_dc_pred(xd, dst, dst_stride, tx_size, pred_plane);
}
cfl_predict_block(xd, dst, dst_stride, tx_size, plane);
return;
}
#endif
av1_predict_intra_block(cm, xd, pd->width, pd->height,
txsize_to_bsize[tx_size], mode, dst, dst_stride, dst,
dst_stride, blk_col, blk_row, plane);
}
// Copy the given row of dst into the equivalent row of ref, saving
......
......@@ -5418,6 +5418,7 @@ static int cfl_rd_pick_alpha(MACROBLOCK *const x, const AV1_COMP *const cpi,
assert(block_size_high[plane_bsize] == tx_size_high[tx_size]);
#endif
xd->cfl.use_dc_pred_cache = 1;
const int64_t mode_rd =
RDCOST(x->rdmult, x->intra_uv_mode_cost[mbmi->mode][UV_CFL_PRED], 0);
int64_t best_rd_uv[CFL_JOINT_SIGNS][CFL_PRED_PLANES];
......@@ -5508,6 +5509,9 @@ static int cfl_rd_pick_alpha(MACROBLOCK *const x, const AV1_COMP *const cpi,
mbmi->cfl_alpha_idx = ind;
mbmi->cfl_alpha_signs = best_joint_sign;
xd->cfl.use_dc_pred_cache = 0;
xd->cfl.dc_pred_is_cached[0] = 0;
xd->cfl.dc_pred_is_cached[1] = 0;
return best_rate_overhead;
}
#endif // CONFIG_CFL
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment