Commit f5bdeac2 authored by Cheng Chen's avatar Cheng Chen

Make CDEF work with EXT_PARTITION

Make CDEF select filter strength every 64x64 block when block size
could be larger than 64x64.

With/without this patch, coding performances on AWCY and Google
test of lowres and midres are neutral.

BUG=aomedia:662

Change-Id: Ief82cc51be91fc08a7c6d7e87f6d13bcc4336476
parent e94df5cf
......@@ -27,8 +27,8 @@ int sb_all_skip(const AV1_COMMON *const cm, int mi_row, int mi_col) {
maxc = cm->mi_cols - mi_col;
maxr = cm->mi_rows - mi_row;
maxr = AOMMIN(maxr, cm->mib_size);
maxc = AOMMIN(maxc, cm->mib_size);
maxr = AOMMIN(maxr, MI_SIZE_64X64);
maxc = AOMMIN(maxc, MI_SIZE_64X64);
for (r = 0; r < maxr; r++) {
for (c = 0; c < maxc; c++) {
......@@ -60,8 +60,8 @@ int sb_compute_dering_list(const AV1_COMMON *const cm, int mi_row, int mi_col,
maxc = cm->mi_cols - mi_col;
maxr = cm->mi_rows - mi_row;
maxr = AOMMIN(maxr, cm->mib_size);
maxc = AOMMIN(maxc, cm->mib_size);
maxr = AOMMIN(maxr, MI_SIZE_64X64);
maxc = AOMMIN(maxc, MI_SIZE_64X64);
const int r_step = mi_size_high[BLOCK_8X8];
const int c_step = mi_size_wide[BLOCK_8X8];
......@@ -161,7 +161,7 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
uint16_t src[OD_DERING_INBUF_SIZE];
uint16_t *linebuf[3];
uint16_t *colbuf[3];
dering_list dlist[MAX_MIB_SIZE * MAX_MIB_SIZE];
dering_list dlist[MI_SIZE_64X64 * MI_SIZE_64X64];
unsigned char *row_dering, *prev_row_dering, *curr_row_dering;
int dering_count;
int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = { { 0 } };
......@@ -178,8 +178,8 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
int chroma_dering =
xd->plane[1].subsampling_x == xd->plane[1].subsampling_y &&
xd->plane[2].subsampling_x == xd->plane[2].subsampling_y;
nvsb = (cm->mi_rows + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE;
nhsb = (cm->mi_cols + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE;
nvsb = (cm->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
nhsb = (cm->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
av1_setup_dst_planes(xd->plane, cm->sb_size, frame, 0, 0);
row_dering = aom_malloc(sizeof(*row_dering) * (nhsb + 2) * 2);
memset(row_dering, 1, sizeof(*row_dering) * (nhsb + 2) * 2);
......@@ -202,7 +202,7 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
for (sbr = 0; sbr < nvsb; sbr++) {
for (pli = 0; pli < nplanes; pli++) {
const int block_height =
(MAX_MIB_SIZE << mi_high_l2[pli]) + 2 * OD_FILT_VBORDER;
(MI_SIZE_64X64 << mi_high_l2[pli]) + 2 * OD_FILT_VBORDER;
fill_rect(colbuf[pli], OD_FILT_HBORDER, block_height, OD_FILT_HBORDER,
OD_DERING_VERY_LARGE);
}
......@@ -213,41 +213,41 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
int nhb, nvb;
int cstart = 0;
curr_row_dering[sbc] = 0;
if (cm->mi_grid_visible[MAX_MIB_SIZE * sbr * cm->mi_stride +
MAX_MIB_SIZE * sbc] == NULL ||
cm->mi_grid_visible[MAX_MIB_SIZE * sbr * cm->mi_stride +
MAX_MIB_SIZE * sbc]
if (cm->mi_grid_visible[MI_SIZE_64X64 * sbr * cm->mi_stride +
MI_SIZE_64X64 * sbc] == NULL ||
cm->mi_grid_visible[MI_SIZE_64X64 * sbr * cm->mi_stride +
MI_SIZE_64X64 * sbc]
->mbmi.cdef_strength == -1) {
dering_left = 0;
continue;
}
if (!dering_left) cstart = -OD_FILT_HBORDER;
nhb = AOMMIN(MAX_MIB_SIZE, cm->mi_cols - MAX_MIB_SIZE * sbc);
nvb = AOMMIN(MAX_MIB_SIZE, cm->mi_rows - MAX_MIB_SIZE * sbr);
nhb = AOMMIN(MI_SIZE_64X64, cm->mi_cols - MI_SIZE_64X64 * sbc);
nvb = AOMMIN(MI_SIZE_64X64, cm->mi_rows - MI_SIZE_64X64 * sbr);
int tile_top, tile_left, tile_bottom, tile_right;
int mi_idx = MAX_MIB_SIZE * sbr * cm->mi_stride + MAX_MIB_SIZE * sbc;
int mi_idx = MI_SIZE_64X64 * sbr * cm->mi_stride + MI_SIZE_64X64 * sbc;
MODE_INFO *const mi_tl = cm->mi + mi_idx;
BOUNDARY_TYPE boundary_tl = mi_tl->mbmi.boundary_info;
tile_top = boundary_tl & TILE_ABOVE_BOUNDARY;
tile_left = boundary_tl & TILE_LEFT_BOUNDARY;
if (sbr != nvsb - 1 &&
(&cm->mi[mi_idx + (MAX_MIB_SIZE - 1) * cm->mi_stride]))
tile_bottom = cm->mi[mi_idx + (MAX_MIB_SIZE - 1) * cm->mi_stride]
(&cm->mi[mi_idx + (MI_SIZE_64X64 - 1) * cm->mi_stride]))
tile_bottom = cm->mi[mi_idx + (MI_SIZE_64X64 - 1) * cm->mi_stride]
.mbmi.boundary_info &
TILE_BOTTOM_BOUNDARY;
else
tile_bottom = 1;
if (sbc != nhsb - 1 && (&cm->mi[mi_idx + MAX_MIB_SIZE - 1]))
tile_right = cm->mi[mi_idx + MAX_MIB_SIZE - 1].mbmi.boundary_info &
if (sbc != nhsb - 1 && (&cm->mi[mi_idx + MI_SIZE_64X64 - 1]))
tile_right = cm->mi[mi_idx + MI_SIZE_64X64 - 1].mbmi.boundary_info &
TILE_RIGHT_BOUNDARY;
else
tile_right = 1;
const int mbmi_cdef_strength =
cm->mi_grid_visible[MAX_MIB_SIZE * sbr * cm->mi_stride +
MAX_MIB_SIZE * sbc]
cm->mi_grid_visible[MI_SIZE_64X64 * sbr * cm->mi_stride +
MI_SIZE_64X64 * sbc]
->mbmi.cdef_strength;
level = cm->cdef_strengths[mbmi_cdef_strength] / CLPF_STRENGTHS;
clpf_strength = cm->cdef_strengths[mbmi_cdef_strength] % CLPF_STRENGTHS;
......@@ -259,7 +259,7 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
if ((level == 0 && clpf_strength == 0 && uv_level == 0 &&
uv_clpf_strength == 0) ||
(dering_count = sb_compute_dering_list(
cm, sbr * MAX_MIB_SIZE, sbc * MAX_MIB_SIZE, dlist,
cm, sbr * MI_SIZE_64X64, sbc * MI_SIZE_64X64, dlist,
get_filter_skip(level) || get_filter_skip(uv_level))) == 0) {
dering_left = 0;
continue;
......@@ -293,7 +293,7 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
else
rend = vsize + OD_FILT_VBORDER;
coffset = sbc * MAX_MIB_SIZE << mi_wide_l2[pli];
coffset = sbc * MI_SIZE_64X64 << mi_wide_l2[pli];
if (sbc == nhsb - 1) {
/* On the last superblock column, fill in the right border with
OD_DERING_VERY_LARGE to avoid filtering with the outside. */
......@@ -314,14 +314,14 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
cm,
&src[OD_FILT_VBORDER * OD_FILT_BSTRIDE + OD_FILT_HBORDER + cstart],
OD_FILT_BSTRIDE, xd->plane[pli].dst.buf,
(MAX_MIB_SIZE << mi_high_l2[pli]) * sbr, coffset + cstart,
(MI_SIZE_64X64 << mi_high_l2[pli]) * sbr, coffset + cstart,
xd->plane[pli].dst.stride, rend, cend - cstart);
if (!prev_row_dering[sbc]) {
copy_sb8_16(cm, &src[OD_FILT_HBORDER], OD_FILT_BSTRIDE,
xd->plane[pli].dst.buf,
(MAX_MIB_SIZE << mi_high_l2[pli]) * sbr - OD_FILT_VBORDER,
coffset, xd->plane[pli].dst.stride, OD_FILT_VBORDER,
hsize);
copy_sb8_16(
cm, &src[OD_FILT_HBORDER], OD_FILT_BSTRIDE,
xd->plane[pli].dst.buf,
(MI_SIZE_64X64 << mi_high_l2[pli]) * sbr - OD_FILT_VBORDER,
coffset, xd->plane[pli].dst.stride, OD_FILT_VBORDER, hsize);
} else if (sbr > 0) {
copy_rect(&src[OD_FILT_HBORDER], OD_FILT_BSTRIDE,
&linebuf[pli][coffset], stride, OD_FILT_VBORDER, hsize);
......@@ -330,10 +330,11 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
hsize, OD_DERING_VERY_LARGE);
}
if (!prev_row_dering[sbc - 1]) {
copy_sb8_16(cm, src, OD_FILT_BSTRIDE, xd->plane[pli].dst.buf,
(MAX_MIB_SIZE << mi_high_l2[pli]) * sbr - OD_FILT_VBORDER,
coffset - OD_FILT_HBORDER, xd->plane[pli].dst.stride,
OD_FILT_VBORDER, OD_FILT_HBORDER);
copy_sb8_16(
cm, src, OD_FILT_BSTRIDE, xd->plane[pli].dst.buf,
(MI_SIZE_64X64 << mi_high_l2[pli]) * sbr - OD_FILT_VBORDER,
coffset - OD_FILT_HBORDER, xd->plane[pli].dst.stride,
OD_FILT_VBORDER, OD_FILT_HBORDER);
} else if (sbr > 0 && sbc > 0) {
copy_rect(src, OD_FILT_BSTRIDE,
&linebuf[pli][coffset - OD_FILT_HBORDER], stride,
......@@ -343,11 +344,12 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
OD_DERING_VERY_LARGE);
}
if (!prev_row_dering[sbc + 1]) {
copy_sb8_16(cm, &src[OD_FILT_HBORDER + (nhb << mi_wide_l2[pli])],
OD_FILT_BSTRIDE, xd->plane[pli].dst.buf,
(MAX_MIB_SIZE << mi_high_l2[pli]) * sbr - OD_FILT_VBORDER,
coffset + hsize, xd->plane[pli].dst.stride,
OD_FILT_VBORDER, OD_FILT_HBORDER);
copy_sb8_16(
cm, &src[OD_FILT_HBORDER + (nhb << mi_wide_l2[pli])],
OD_FILT_BSTRIDE, xd->plane[pli].dst.buf,
(MI_SIZE_64X64 << mi_high_l2[pli]) * sbr - OD_FILT_VBORDER,
coffset + hsize, xd->plane[pli].dst.stride, OD_FILT_VBORDER,
OD_FILT_HBORDER);
} else if (sbr > 0 && sbc < nhsb - 1) {
copy_rect(&src[hsize + OD_FILT_HBORDER], OD_FILT_BSTRIDE,
&linebuf[pli][coffset + hsize], stride, OD_FILT_VBORDER,
......@@ -368,7 +370,7 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
rend + OD_FILT_VBORDER, OD_FILT_HBORDER);
copy_sb8_16(
cm, &linebuf[pli][coffset], stride, xd->plane[pli].dst.buf,
(MAX_MIB_SIZE << mi_high_l2[pli]) * (sbr + 1) - OD_FILT_VBORDER,
(MI_SIZE_64X64 << mi_high_l2[pli]) * (sbr + 1) - OD_FILT_VBORDER,
coffset, xd->plane[pli].dst.stride, OD_FILT_VBORDER, hsize);
if (tile_top) {
......@@ -395,8 +397,8 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
(uint8_t *)&CONVERT_TO_SHORTPTR(
xd->plane[pli]
.dst.buf)[xd->plane[pli].dst.stride *
(MAX_MIB_SIZE * sbr << mi_high_l2[pli]) +
(sbc * MAX_MIB_SIZE << mi_wide_l2[pli])],
(MI_SIZE_64X64 * sbr << mi_high_l2[pli]) +
(sbc * MI_SIZE_64X64 << mi_wide_l2[pli])],
xd->plane[pli].dst.stride, dst,
&src[OD_FILT_VBORDER * OD_FILT_BSTRIDE + OD_FILT_HBORDER],
xdec[pli], ydec[pli], dir, NULL, var, pli, dlist, dering_count,
......@@ -406,8 +408,8 @@ void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
#endif
od_dering(&xd->plane[pli]
.dst.buf[xd->plane[pli].dst.stride *
(MAX_MIB_SIZE * sbr << mi_high_l2[pli]) +
(sbc * MAX_MIB_SIZE << mi_wide_l2[pli])],
(MI_SIZE_64X64 * sbr << mi_high_l2[pli]) +
(sbc * MI_SIZE_64X64 << mi_wide_l2[pli])],
xd->plane[pli].dst.stride, dst,
&src[OD_FILT_VBORDER * OD_FILT_BSTRIDE + OD_FILT_HBORDER],
xdec[pli], ydec[pli], dir, NULL, var, pli, dlist,
......
......@@ -65,6 +65,8 @@ extern "C" {
#define MAX_VARTX_DEPTH 2
#endif
#define MI_SIZE_64X64 (64 >> MI_SIZE_LOG2)
// Bitstream profiles indicated by 2-3 bits in the uncompressed header.
// 00: Profile 0. 8-bit 4:2:0 only.
// 10: Profile 1. 8-bit 4:4:4, 4:2:2, and 4:4:0.
......
......@@ -2686,12 +2686,21 @@ static void decode_partition(AV1Decoder *const pbi, MACROBLOCKD *const xd,
#if CONFIG_CDEF
if (bsize == cm->sb_size) {
if (!sb_all_skip(cm, mi_row, mi_col) && !cm->all_lossless) {
cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col]->mbmi.cdef_strength =
aom_read_literal(r, cm->cdef_bits, ACCT_STR);
} else {
cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col]->mbmi.cdef_strength =
-1;
int width_step = mi_size_wide[BLOCK_64X64];
int height_step = mi_size_wide[BLOCK_64X64];
int w, h;
for (h = 0; (h < mi_size_high[cm->sb_size]) && (mi_row + h < cm->mi_rows);
h += height_step) {
for (w = 0; (w < mi_size_wide[cm->sb_size]) && (mi_col + w < cm->mi_cols);
w += width_step) {
if (!cm->all_lossless && !sb_all_skip(cm, mi_row + h, mi_col + w))
cm->mi_grid_visible[(mi_row + h) * cm->mi_stride + (mi_col + w)]
->mbmi.cdef_strength =
aom_read_literal(r, cm->cdef_bits, ACCT_STR);
else
cm->mi_grid_visible[(mi_row + h) * cm->mi_stride + (mi_col + w)]
->mbmi.cdef_strength = -1;
}
}
}
#endif // CONFIG_CDEF
......
......@@ -3097,11 +3097,24 @@ static void write_modes_sb(AV1_COMP *const cpi, const TileInfo *const tile,
#endif // CONFIG_EXT_PARTITION_TYPES
#if CONFIG_CDEF
if (bsize == cm->sb_size && !sb_all_skip(cm, mi_row, mi_col) &&
cm->cdef_bits != 0 && !cm->all_lossless) {
aom_write_literal(w, cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col]
->mbmi.cdef_strength,
cm->cdef_bits);
if (bsize == cm->sb_size && cm->cdef_bits != 0 && !cm->all_lossless) {
int width_step = mi_size_wide[BLOCK_64X64];
int height_step = mi_size_high[BLOCK_64X64];
int width, height;
for (height = 0; (height < mi_size_high[cm->sb_size]) &&
(mi_row + height < cm->mi_rows);
height += height_step) {
for (width = 0; (width < mi_size_wide[cm->sb_size]) &&
(mi_col + width < cm->mi_cols);
width += width_step) {
if (!sb_all_skip(cm, mi_row + height, mi_col + width))
aom_write_literal(
w, cm->mi_grid_visible[(mi_row + height) * cm->mi_stride +
(mi_col + width)]
->mbmi.cdef_strength,
cm->cdef_bits);
}
}
}
#endif
}
......
......@@ -285,7 +285,7 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
int sbr, sbc;
uint16_t *src[3];
uint16_t *ref_coeff[3];
dering_list dlist[MAX_MIB_SIZE * MAX_MIB_SIZE];
dering_list dlist[MI_SIZE_64X64 * MI_SIZE_64X64];
int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = { { 0 } };
int var[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = { { 0 } };
int stride[3];
......@@ -300,8 +300,8 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
uint64_t best_tot_mse = (uint64_t)1 << 63;
uint64_t tot_mse;
int sb_count;
int nvsb = (cm->mi_rows + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE;
int nhsb = (cm->mi_cols + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE;
int nvsb = (cm->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
int nhsb = (cm->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
int *sb_index = aom_malloc(nvsb * nhsb * sizeof(*sb_index));
int *selected_strength = aom_malloc(nvsb * nhsb * sizeof(*sb_index));
uint64_t(*mse[2])[TOTAL_STRENGTHS];
......@@ -387,14 +387,14 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
int nvb, nhb;
int gi;
int dirinit = 0;
nhb = AOMMIN(MAX_MIB_SIZE, cm->mi_cols - MAX_MIB_SIZE * sbc);
nvb = AOMMIN(MAX_MIB_SIZE, cm->mi_rows - MAX_MIB_SIZE * sbr);
cm->mi_grid_visible[MAX_MIB_SIZE * sbr * cm->mi_stride +
MAX_MIB_SIZE * sbc]
nhb = AOMMIN(MI_SIZE_64X64, cm->mi_cols - MI_SIZE_64X64 * sbc);
nvb = AOMMIN(MI_SIZE_64X64, cm->mi_rows - MI_SIZE_64X64 * sbr);
cm->mi_grid_visible[MI_SIZE_64X64 * sbr * cm->mi_stride +
MI_SIZE_64X64 * sbc]
->mbmi.cdef_strength = -1;
if (sb_all_skip(cm, sbr * MAX_MIB_SIZE, sbc * MAX_MIB_SIZE)) continue;
dering_count = sb_compute_dering_list(cm, sbr * MAX_MIB_SIZE,
sbc * MAX_MIB_SIZE, dlist, 1);
if (sb_all_skip(cm, sbr * MI_SIZE_64X64, sbc * MI_SIZE_64X64)) continue;
dering_count = sb_compute_dering_list(cm, sbr * MI_SIZE_64X64,
sbc * MI_SIZE_64X64, dlist, 1);
for (pli = 0; pli < nplanes; pli++) {
for (i = 0; i < OD_DERING_INBUF_SIZE; i++)
inbuf[i] = OD_DERING_VERY_LARGE;
......@@ -419,8 +419,8 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
if (clpf_strength == 0)
copy_sb16_16(&in[(-yoff * OD_FILT_BSTRIDE - xoff)], OD_FILT_BSTRIDE,
src[pli],
(sbr * MAX_MIB_SIZE << mi_high_l2[pli]) - yoff,
(sbc * MAX_MIB_SIZE << mi_wide_l2[pli]) - xoff,
(sbr * MI_SIZE_64X64 << mi_high_l2[pli]) - yoff,
(sbc * MI_SIZE_64X64 << mi_wide_l2[pli]) - xoff,
stride[pli], ysize, xsize);
od_dering(clpf_strength ? NULL : (uint8_t *)in, OD_FILT_BSTRIDE,
tmp_dst, in, xdec[pli], ydec[pli], dir, &dirinit, var, pli,
......@@ -429,8 +429,8 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
dering_damping, coeff_shift, clpf_strength != 0, 1);
curr_mse = compute_dering_dist(
ref_coeff[pli] +
(sbr * MAX_MIB_SIZE << mi_high_l2[pli]) * stride[pli] +
(sbc * MAX_MIB_SIZE << mi_wide_l2[pli]),
(sbr * MI_SIZE_64X64 << mi_high_l2[pli]) * stride[pli] +
(sbc * MI_SIZE_64X64 << mi_wide_l2[pli]),
stride[pli], tmp_dst, dlist, dering_count, bsize[pli],
coeff_shift, pli);
if (pli < 2)
......@@ -438,7 +438,7 @@ void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref,
else
mse[1][sb_count][gi] += curr_mse;
sb_index[sb_count] =
MAX_MIB_SIZE * sbr * cm->mi_stride + MAX_MIB_SIZE * sbc;
MI_SIZE_64X64 * sbr * cm->mi_stride + MI_SIZE_64X64 * sbc;
}
}
sb_count++;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment