Commit e3980281 authored by Luc Trudeau's avatar Luc Trudeau

[CFL] Store luma as prediction for chroma

Stores the reconstructed luma pixels for each transform block inside a
prediction block. Rectangular transform blocks are supported.

As for RDO, after all the modes have been tested for luma, an extra
encoding is perform in order to store the reconstructed pixel values of
the best mode. These values are then used for RDO on the chromatic
planes.

Change-Id: I354d9827e32fd41065f1b2ce02832d943a6fa156
parent 72d55e2c
......@@ -531,6 +531,12 @@ typedef int16_t EobThresholdMD[TX_SIZES_ALL][TX_TYPES];
#if CONFIG_CFL
typedef struct {
// Pixel buffer containing the luma pixels used as prediction for chroma
uint8_t y_pix[MAX_SB_SQUARE];
// Height and width of the luma prediction block currently in the pixel buffer
int y_height, y_width;
// CfL Performs its own block level DC_PRED for each chromatic plane
int dc_pred[CFL_PRED_PLANES];
} CFL_CTX;
......
......@@ -84,3 +84,36 @@ void cfl_predict_block(uint8_t *dst, int dst_stride, TX_SIZE tx_size,
dst += dst_stride;
}
}
void cfl_store(CFL_CTX *cfl, const uint8_t *input, int input_stride, int row,
int col, TX_SIZE tx_size) {
const int tx_width = tx_size_wide[tx_size];
const int tx_height = tx_size_high[tx_size];
const int tx_off_log2 = tx_size_wide_log2[0];
// Store the input into the CfL pixel buffer
uint8_t *y_pix = &cfl->y_pix[(row * MAX_SB_SIZE + col) << tx_off_log2];
// Check that we remain inside the pixel buffer.
assert(MAX_SB_SIZE * (row + tx_height - 1) + col + tx_width - 1 <
MAX_SB_SQUARE);
for (int j = 0; j < tx_height; j++) {
for (int i = 0; i < tx_width; i++) {
y_pix[i] = input[i];
}
y_pix += MAX_SB_SIZE;
input += input_stride;
}
// Store the surface of the pixel buffer that was written to, this way we
// can manage chroma overrun (e.g. when the chroma surfaces goes beyond the
// frame boundary)
if (col == 0 && row == 0) {
cfl->y_width = tx_width;
cfl->y_height = tx_height;
} else {
cfl->y_width = OD_MAXI((col << tx_off_log2) + tx_width, cfl->y_width);
cfl->y_height = OD_MAXI((row << tx_off_log2) + tx_height, cfl->y_height);
}
}
......@@ -20,4 +20,7 @@ void cfl_dc_pred(MACROBLOCKD *xd, BLOCK_SIZE plane_bsize, TX_SIZE tx_size);
void cfl_predict_block(uint8_t *dst, int dst_stride, TX_SIZE tx_size,
int dc_pred);
void cfl_store(CFL_CTX *cfl, const uint8_t *input, int input_stride, int row,
int col, TX_SIZE tx_size);
#endif // AV1_COMMON_CFL_H_
......@@ -535,6 +535,7 @@ static INLINE void av1_init_macroblockd(AV1_COMMON *cm, MACROBLOCKD *xd,
#endif
#if CONFIG_CFL
xd->cfl = cfl;
memset(&cfl->y_pix, 0, sizeof(uint8_t) * MAX_SB_SQUARE);
#endif
xd->above_context[i] = cm->above_context[i];
if (xd->plane[i].plane_type == PLANE_TYPE_Y) {
......
......@@ -79,6 +79,10 @@
#include "av1/encoder/hybrid_fwd_txfm.h"
#endif
#if CONFIG_CFL
#include "av1/common/cfl.h"
#endif
static struct aom_read_bit_buffer *init_read_bit_buffer(
AV1Decoder *pbi, struct aom_read_bit_buffer *rb, const uint8_t *data,
const uint8_t *data_end, uint8_t clear_data[MAX_AV1_HEADER_SIZE]);
......@@ -564,6 +568,14 @@ static void predict_and_reconstruct_intra_block(
av1_pvq_decode_helper2(cm, xd, mbmi, plane, row, col, tx_size, tx_type);
#endif
}
#if CONFIG_CFL
if (plane == AOM_PLANE_Y) {
struct macroblockd_plane *const pd = &xd->plane[plane];
uint8_t *dst =
&pd->dst.buf[(row * pd->dst.stride + col) << tx_size_wide_log2[0]];
cfl_store(xd->cfl, dst, pd->dst.stride, row, col, tx_size);
}
#endif
}
#if CONFIG_VAR_TX && !CONFIG_COEF_INTERLEAVE
......
......@@ -228,6 +228,10 @@ struct macroblock {
// 4x4 blocks are coded.
int rate_4x4[256];
#endif
#if CONFIG_CFL
// Whether luma needs to be stored during RDO.
int cfl_store_y;
#endif
};
#ifdef __cplusplus
......
......@@ -1869,6 +1869,10 @@ static void rd_pick_sb_modes(const AV1_COMP *const cpi, TileDataEnc *tile_data,
x->pvq_speed = 1;
x->pvq_coded = 0;
#endif
#if CONFIG_CFL
// Don't store luma during RDO (we will store the best mode later).
x->cfl_store_y = 0;
#endif
set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
mbmi = &xd->mi[0]->mbmi;
......@@ -4574,6 +4578,10 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
*rate_nocoef = best_rate_nocoef;
#endif // CONFIG_SUPERTX
#if CONFIG_CFL
// Store the luma for the best mode
x->cfl_store_y = 1;
#endif
if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX &&
pc_tree->index != 3) {
if (bsize == cm->sb_size) {
......@@ -4587,6 +4595,9 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
pc_tree, NULL);
}
}
#if CONFIG_CFL
x->cfl_store_y = 0;
#endif
if (bsize == cm->sb_size) {
#if !CONFIG_PVQ && !CONFIG_LV_MAP
......@@ -5036,6 +5047,7 @@ void av1_encode_tile(AV1_COMP *cpi, ThreadData *td, int tile_row,
#if CONFIG_CFL
td->mb.e_mbd.cfl = &this_tile->cfl;
memset(&this_tile->cfl.y_pix, 0, sizeof(uint8_t) * MAX_SB_SQUARE);
#endif
#if CONFIG_PVQ
......@@ -5921,6 +5933,9 @@ static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td,
x->pvq_speed = 0;
x->pvq_coded = (dry_run == OUTPUT_ENABLED) ? 1 : 0;
#endif
#if CONFIG_CFL
x->cfl_store_y = (dry_run == OUTPUT_ENABLED) ? 1 : 0;
#endif
if (!is_inter) {
int plane;
......
......@@ -38,6 +38,10 @@
#include "av1/encoder/pvq_encoder.h"
#endif
#if CONFIG_CFL
#include "av1/common/cfl.h"
#endif
// Check if one needs to use c version subtraction.
static int check_subtract_block_size(int w, int h) { return w < 4 || h < 4; }
......@@ -1475,6 +1479,11 @@ void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col,
#else
// Note : *(args->skip) == mbmi->skip
#endif
#if CONFIG_CFL
if (plane == AOM_PLANE_Y && x->cfl_store_y) {
cfl_store(xd->cfl, dst, dst_stride, blk_row, blk_col, tx_size);
}
#endif
}
void av1_encode_intra_block_plane(AV1_COMMON *cm, MACROBLOCK *x,
......
......@@ -537,6 +537,10 @@ void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) {
xd->mi = cm->mi_grid_visible;
xd->mi[0] = cm->mi;
#if CONFIG_CFL
// Don't store luma on the fist pass since chroma is not computed
x->cfl_store_y = 0;
#endif
av1_frame_init_quantizer(cpi);
#if CONFIG_PVQ
......
......@@ -3714,6 +3714,16 @@ static int64_t rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
od_encode_rollback(&x->daala_enc, &post_buf);
#endif // CONFIG_PVQ
#if CONFIG_CFL
// Perform one extra txfm_rd_in_plane() call, this time with the best value so
// we can store reconstructed luma values
RD_STATS this_rd_stats;
x->cfl_store_y = 1;
txfm_rd_in_plane(x, cpi, &this_rd_stats, INT64_MAX, 0, bsize,
mic->mbmi.tx_size, cpi->sf.use_fast_coef_costing);
x->cfl_store_y = 0;
#endif
#if CONFIG_PALETTE
if (try_palette) {
rd_pick_palette_intra_sby(cpi, x, bsize, palette_y_mode_ctx,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment