Commit 056d1f40 authored by Luc Trudeau's avatar Luc Trudeau
Browse files

[CFL] Support for 4:2:0 High Bit Depth

high bit depth (_hbd) and low bit depth (_lbd) versions
of the cfl functions: sum_above_row, sum_left_col,
cfl_build_prediction, cfl_luma_subsampling_420 (4:4:4 will
be added in subsequent commit) and cfl_alpha_dist. For
cfl_alpha_dist, special care is given to scale the SSE
according to the bit depth.

BUG=aomedia:835

Change-Id: I5b72845100d88fb8a438efe665bcae7fe1ba50b8
parent e58b564d
......@@ -62,47 +62,95 @@ static INLINE void cfl_pad(CFL_CTX *cfl, int width, int height) {
}
}
static void sum_above_row(const MACROBLOCKD *xd, int width, int *out_sum_u,
int *out_sum_v) {
const struct macroblockd_plane *const pd_u = &xd->plane[AOM_PLANE_U];
const struct macroblockd_plane *const pd_v = &xd->plane[AOM_PLANE_V];
const int dst_u_stride = pd_u->dst.stride;
const int dst_v_stride = pd_v->dst.stride;
const uint8_t *above_dst_u = pd_u->dst.buf - dst_u_stride;
const uint8_t *above_dst_v = pd_v->dst.buf - dst_v_stride;
static void sum_above_row_lbd(const uint8_t *above_u, const uint8_t *above_v,
int width, int *out_sum_u, int *out_sum_v) {
int sum_u = 0;
int sum_v = 0;
for (int i = 0; i < width; i++) {
sum_u += above_u[i];
sum_v += above_v[i];
}
*out_sum_u += sum_u;
*out_sum_v += sum_v;
}
#if CONFIG_HIGHBITDEPTH
static void sum_above_row_hbd(const uint16_t *above_u, const uint16_t *above_v,
int width, int *out_sum_u, int *out_sum_v) {
int sum_u = 0;
int sum_v = 0;
for (int i = 0; i < width; i++) {
sum_u += above_dst_u[i];
sum_v += above_dst_v[i];
sum_u += above_u[i];
sum_v += above_v[i];
}
*out_sum_u += sum_u;
*out_sum_v += sum_v;
}
#endif // CONFIG_HIGHBITDEPTH
static void sum_left_col(const MACROBLOCKD *xd, int height, int *out_sum_u,
int *out_sum_v) {
static void sum_above_row(const MACROBLOCKD *xd, int width, int *out_sum_u,
int *out_sum_v) {
const struct macroblockd_plane *const pd_u = &xd->plane[AOM_PLANE_U];
const struct macroblockd_plane *const pd_v = &xd->plane[AOM_PLANE_V];
#if CONFIG_HIGHBITDEPTH
if (get_bitdepth_data_path_index(xd)) {
const uint16_t *above_u_16 =
CONVERT_TO_SHORTPTR(pd_u->dst.buf) - pd_u->dst.stride;
const uint16_t *above_v_16 =
CONVERT_TO_SHORTPTR(pd_v->dst.buf) - pd_v->dst.stride;
sum_above_row_hbd(above_u_16, above_v_16, width, out_sum_u, out_sum_v);
return;
}
#endif // CONFIG_HIGHBITDEPTH
const uint8_t *above_u = pd_u->dst.buf - pd_u->dst.stride;
const uint8_t *above_v = pd_v->dst.buf - pd_v->dst.stride;
sum_above_row_lbd(above_u, above_v, width, out_sum_u, out_sum_v);
}
const int dst_u_stride = pd_u->dst.stride;
const int dst_v_stride = pd_v->dst.stride;
const uint8_t *left_dst_u = pd_u->dst.buf - 1;
const uint8_t *left_dst_v = pd_v->dst.buf - 1;
static void sum_left_col_lbd(const uint8_t *left_u, int u_stride,
const uint8_t *left_v, int v_stride, int height,
int *out_sum_u, int *out_sum_v) {
int sum_u = 0;
int sum_v = 0;
for (int i = 0; i < height; i++) {
sum_u += left_u[i * u_stride];
sum_v += left_v[i * v_stride];
}
*out_sum_u += sum_u;
*out_sum_v += sum_v;
}
#if CONFIG_HIGHBITDEPTH
static void sum_left_col_hbd(const uint16_t *left_u, int u_stride,
const uint16_t *left_v, int v_stride, int height,
int *out_sum_u, int *out_sum_v) {
int sum_u = 0;
int sum_v = 0;
for (int i = 0; i < height; i++) {
sum_u += left_dst_u[i * dst_u_stride];
sum_v += left_dst_v[i * dst_v_stride];
sum_u += left_u[i * u_stride];
sum_v += left_v[i * v_stride];
}
*out_sum_u += sum_u;
*out_sum_v += sum_v;
}
#endif // CONFIG_HIGHBITDEPTH
static void sum_left_col(const MACROBLOCKD *xd, int height, int *out_sum_u,
int *out_sum_v) {
const struct macroblockd_plane *const pd_u = &xd->plane[AOM_PLANE_U];
const struct macroblockd_plane *const pd_v = &xd->plane[AOM_PLANE_V];
#if CONFIG_HIGHBITDEPTH
if (get_bitdepth_data_path_index(xd)) {
const uint16_t *left_u_16 = CONVERT_TO_SHORTPTR(pd_u->dst.buf) - 1;
const uint16_t *left_v_16 = CONVERT_TO_SHORTPTR(pd_v->dst.buf) - 1;
sum_left_col_hbd(left_u_16, pd_u->dst.stride, left_v_16, pd_v->dst.stride,
height, out_sum_u, out_sum_v);
return;
}
#endif // CONFIG_HIGHBITDEPTH
const uint8_t *left_u = pd_u->dst.buf - 1;
const uint8_t *left_v = pd_v->dst.buf - 1;
sum_left_col_lbd(left_u, pd_u->dst.stride, left_v, pd_v->dst.stride, height,
out_sum_u, out_sum_v);
}
// CfL computes its own block-level DC_PRED. This is required to compute both
// alpha_cb and alpha_cr before the prediction are computed.
......@@ -211,11 +259,9 @@ static INLINE int cfl_idx_to_alpha(int alpha_idx, int joint_sign,
return (alpha_sign == CFL_SIGN_POS) ? abs_alpha_q3 + 1 : -abs_alpha_q3 - 1;
}
// TODO(ltrudeau) add support for HBD.
static INLINE void cfl_build_prediction(const int16_t *pred_buf_q3,
uint8_t *dst, int dst_stride,
int alpha_q3, int dc_pred, int width,
int height) {
static void cfl_build_prediction_lbd(const int16_t *pred_buf_q3, uint8_t *dst,
int dst_stride, int width, int height,
int alpha_q3, int dc_pred) {
for (int j = 0; j < height; j++) {
for (int i = 0; i < width; i++) {
dst[i] =
......@@ -226,6 +272,39 @@ static INLINE void cfl_build_prediction(const int16_t *pred_buf_q3,
}
}
#if CONFIG_HIGHBITDEPTH
static void cfl_build_prediction_hbd(const int16_t *pred_buf_q3, uint16_t *dst,
int dst_stride, int width, int height,
int alpha_q3, int dc_pred, int bit_depth) {
for (int j = 0; j < height; j++) {
for (int i = 0; i < width; i++) {
dst[i] = clip_pixel_highbd(
get_scaled_luma_q0(alpha_q3, pred_buf_q3[i]) + dc_pred, bit_depth);
}
dst += dst_stride;
pred_buf_q3 += MAX_SB_SIZE;
}
}
#endif // CONFIG_HIGHBITDEPTH
static void cfl_build_prediction(const int16_t *pred_buf_q3, uint8_t *dst,
int dst_stride, int width, int height,
int alpha_q3, int dc_pred, int use_hbd,
int bit_depth) {
#if CONFIG_HIGHBITDEPTH
if (use_hbd) {
uint16_t *dst_16 = CONVERT_TO_SHORTPTR(dst);
cfl_build_prediction_hbd(pred_buf_q3, dst_16, dst_stride, width, height,
alpha_q3, dc_pred, bit_depth);
return;
}
#endif // CONFIG_HIGHBITDEPTH
(void)use_hbd;
(void)bit_depth;
cfl_build_prediction_lbd(pred_buf_q3, dst, dst_stride, width, height,
alpha_q3, dc_pred);
}
void cfl_predict_block(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride,
int row, int col, TX_SIZE tx_size, int plane) {
CFL_CTX *const cfl = xd->cfl;
......@@ -239,15 +318,30 @@ void cfl_predict_block(MACROBLOCKD *const xd, uint8_t *dst, int dst_stride,
const int alpha_q3 =
cfl_idx_to_alpha(mbmi->cfl_alpha_idx, mbmi->cfl_alpha_signs, plane - 1);
cfl_build_prediction(pred_buf_q3, dst, dst_stride, alpha_q3,
cfl->dc_pred[plane - 1], tx_size_wide[tx_size],
tx_size_high[tx_size]);
cfl_build_prediction(pred_buf_q3, dst, dst_stride, tx_size_wide[tx_size],
tx_size_high[tx_size], alpha_q3, cfl->dc_pred[plane - 1],
get_bitdepth_data_path_index(xd), xd->bd);
}
static INLINE void cfl_luma_subsampling_420(const uint8_t *input,
int input_stride,
int16_t *output_q3, int width,
int height) {
static void cfl_luma_subsampling_420_lbd(const uint8_t *input, int input_stride,
int16_t *output_q3, int width,
int height) {
for (int j = 0; j < height; j++) {
for (int i = 0; i < width; i++) {
int top = i << 1;
int bot = top + input_stride;
output_q3[i] = (input[top] + input[top + 1] + input[bot] + input[bot + 1])
<< 1;
}
input += input_stride << 1;
output_q3 += MAX_SB_SIZE;
}
}
#if CONFIG_HIGHBITDEPTH
static void cfl_luma_subsampling_420_hbd(const uint16_t *input,
int input_stride, int16_t *output_q3,
int width, int height) {
for (int j = 0; j < height; j++) {
for (int i = 0; i < width; i++) {
int top = i << 1;
......@@ -259,6 +353,22 @@ static INLINE void cfl_luma_subsampling_420(const uint8_t *input,
output_q3 += MAX_SB_SIZE;
}
}
#endif // CONFIG_HIGHBITDEPTH
static void cfl_luma_subsampling_420(const uint8_t *input, int input_stride,
int16_t *output_q3, int width, int height,
int use_hbd) {
#if CONFIG_HIGHBITDEPTH
if (use_hbd) {
const uint16_t *input_16 = CONVERT_TO_SHORTPTR(input);
cfl_luma_subsampling_420_hbd(input_16, input_stride, output_q3, width,
height);
return;
}
#endif // CONFIG_HIGHBITDEPTH
(void)use_hbd;
cfl_luma_subsampling_420_lbd(input, input_stride, output_q3, width, height);
}
static INLINE void cfl_luma_subsampling_444(const uint8_t *input,
int input_stride,
......@@ -275,7 +385,7 @@ static INLINE void cfl_luma_subsampling_444(const uint8_t *input,
static INLINE void cfl_store(CFL_CTX *cfl, const uint8_t *input,
int input_stride, int row, int col, int width,
int height) {
int height, int use_hbd) {
const int tx_off_log2 = tx_size_wide_log2[0];
const int sub_x = cfl->subsampling_x;
const int sub_y = cfl->subsampling_y;
......@@ -307,11 +417,13 @@ static INLINE void cfl_store(CFL_CTX *cfl, const uint8_t *input,
cfl->pred_buf_q3 + (store_row * MAX_SB_SIZE + store_col);
if (sub_y == 0 && sub_x == 0) {
// TODO(ltrudeau) add support for HBD 4:4:4
assert(!use_hbd);
cfl_luma_subsampling_444(input, input_stride, pred_buf_q3, store_width,
store_height);
} else if (sub_y == 1 && sub_x == 1) {
cfl_luma_subsampling_420(input, input_stride, pred_buf_q3, store_width,
store_height);
store_height, use_hbd);
} else {
// TODO(ltrudeau) add support for 4:2:2
assert(0); // Unsupported chroma subsampling
......@@ -372,7 +484,7 @@ void cfl_store_tx(MACROBLOCKD *const xd, int row, int col, TX_SIZE tx_size,
}
#endif
cfl_store(cfl, dst, pd->dst.stride, row, col, tx_size_wide[tx_size],
tx_size_high[tx_size]);
tx_size_high[tx_size], get_bitdepth_data_path_index(xd));
}
void cfl_store_block(MACROBLOCKD *const xd, BLOCK_SIZE bsize, TX_SIZE tx_size) {
......@@ -391,7 +503,8 @@ void cfl_store_block(MACROBLOCKD *const xd, BLOCK_SIZE bsize, TX_SIZE tx_size) {
#endif // CONFIG_CHROMA_SUB8X8
const int width = max_intra_block_width(xd, bsize, AOM_PLANE_Y, tx_size);
const int height = max_intra_block_height(xd, bsize, AOM_PLANE_Y, tx_size);
cfl_store(cfl, pd->dst.buf, pd->dst.stride, row, col, width, height);
cfl_store(cfl, pd->dst.buf, pd->dst.stride, row, col, width, height,
get_bitdepth_data_path_index(xd));
}
void cfl_compute_parameters(MACROBLOCKD *const xd, TX_SIZE tx_size) {
......
......@@ -5856,11 +5856,10 @@ static int rd_pick_intra_angle_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
#endif // CONFIG_EXT_INTRA
#if CONFIG_CFL
// TODO(ltrudeau) add support for HBD.
static int64_t cfl_alpha_dist(const int16_t *pred_buf_q3, const uint8_t *src,
int src_stride, int width, int height,
int dc_pred, int alpha_q3,
int64_t *dist_neg_out) {
static int64_t cfl_alpha_dist_lbd(const int16_t *pred_buf_q3,
const uint8_t *src, int src_stride, int width,
int height, int dc_pred, int alpha_q3,
int64_t *dist_neg_out) {
int64_t dist = 0;
int diff;
......@@ -5898,6 +5897,69 @@ static int64_t cfl_alpha_dist(const int16_t *pred_buf_q3, const uint8_t *src,
return dist;
}
#if CONFIG_HIGHBITDEPTH
static int64_t cfl_alpha_dist_hbd(const int16_t *pred_buf_q3,
const uint16_t *src, int src_stride,
int width, int height, int dc_pred,
int alpha_q3, int bit_depth,
int64_t *dist_neg_out) {
const int shift = 2 * (bit_depth - 8);
const int rounding = shift > 0 ? (1 << shift) >> 1 : 0;
int64_t dist = 0;
int diff;
if (alpha_q3 == 0) {
for (int j = 0; j < height; j++) {
for (int i = 0; i < width; i++) {
diff = src[i] - dc_pred;
dist += diff * diff;
}
src += src_stride;
}
dist = (dist + rounding) >> shift;
if (dist_neg_out) *dist_neg_out = dist;
return dist;
}
int64_t dist_neg = 0;
for (int j = 0; j < height; j++) {
for (int i = 0; i < width; i++) {
const int uv = src[i];
const int scaled_luma = get_scaled_luma_q0(alpha_q3, pred_buf_q3[i]);
diff = uv - clip_pixel_highbd(scaled_luma + dc_pred, bit_depth);
dist += diff * diff;
diff = uv - clip_pixel_highbd(-scaled_luma + dc_pred, bit_depth);
dist_neg += diff * diff;
}
pred_buf_q3 += MAX_SB_SIZE;
src += src_stride;
}
if (dist_neg_out) *dist_neg_out = (dist_neg + rounding) >> shift;
return (dist + rounding) >> shift;
}
#endif // CONFIG_HIGHBITDEPTH
static int64_t cfl_alpha_dist(const int16_t *pred_buf_q3, const uint8_t *src,
int src_stride, int width, int height,
int dc_pred, int alpha_q3, int use_hbd,
int bit_depth, int64_t *dist_neg_out) {
#if CONFIG_HIGHBITDEPTH
if (use_hbd) {
const uint16_t *src_16 = CONVERT_TO_SHORTPTR(src);
return cfl_alpha_dist_hbd(pred_buf_q3, src_16, src_stride, width, height,
dc_pred, alpha_q3, bit_depth, dist_neg_out);
}
#endif // CONFIG_HIGHBITDEPTH
(void)use_hbd;
(void)bit_depth;
return cfl_alpha_dist_lbd(pred_buf_q3, src, src_stride, width, height,
dc_pred, alpha_q3, dist_neg_out);
}
static int cfl_rd_pick_alpha(MACROBLOCK *const x, TX_SIZE tx_size) {
const struct macroblock_plane *const p_u = &x->plane[AOM_PLANE_U];
......@@ -5917,22 +5979,25 @@ static int cfl_rd_pick_alpha(MACROBLOCK *const x, TX_SIZE tx_size) {
const int dc_pred_u = cfl->dc_pred[CFL_PRED_U];
const int dc_pred_v = cfl->dc_pred[CFL_PRED_V];
const int16_t *pred_buf_q3 = cfl->pred_buf_q3;
const int use_hbd = get_bitdepth_data_path_index(xd);
int64_t sse[CFL_PRED_PLANES][CFL_MAGS_SIZE];
sse[CFL_PRED_U][0] = cfl_alpha_dist(pred_buf_q3, src_u, src_stride_u, width,
height, dc_pred_u, 0, NULL);
sse[CFL_PRED_V][0] = cfl_alpha_dist(pred_buf_q3, src_v, src_stride_v, width,
height, dc_pred_v, 0, NULL);
sse[CFL_PRED_U][0] =
cfl_alpha_dist(pred_buf_q3, src_u, src_stride_u, width, height, dc_pred_u,
0, use_hbd, xd->bd, NULL);
sse[CFL_PRED_V][0] =
cfl_alpha_dist(pred_buf_q3, src_v, src_stride_v, width, height, dc_pred_v,
0, use_hbd, xd->bd, NULL);
for (int c = 0; c < CFL_ALPHABET_SIZE; c++) {
const int m = c * 2 + 1;
const int abs_alpha_q3 = c + 1;
sse[CFL_PRED_U][m] =
cfl_alpha_dist(pred_buf_q3, src_u, src_stride_u, width, height,
dc_pred_u, abs_alpha_q3, &sse[CFL_PRED_U][m + 1]);
sse[CFL_PRED_V][m] =
cfl_alpha_dist(pred_buf_q3, src_v, src_stride_v, width, height,
dc_pred_v, abs_alpha_q3, &sse[CFL_PRED_V][m + 1]);
sse[CFL_PRED_U][m] = cfl_alpha_dist(
pred_buf_q3, src_u, src_stride_u, width, height, dc_pred_u,
abs_alpha_q3, use_hbd, xd->bd, &sse[CFL_PRED_U][m + 1]);
sse[CFL_PRED_V][m] = cfl_alpha_dist(
pred_buf_q3, src_v, src_stride_v, width, height, dc_pred_v,
abs_alpha_q3, use_hbd, xd->bd, &sse[CFL_PRED_V][m + 1]);
}
int64_t dist;
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment