Commit 5589d71c authored by Cheng Chen's avatar Cheng Chen

Reorgnize loop filter range for superblock

For each superblock, its filtering range is shifted up and left by
8 pixels (half of the maximum loop filter length), such that
estimation of deblocking filtering will not cross two superblocks.

Change-Id: I82244b7c26ab8b2ac553037b3bb1fe1d82bf5704
parent c7855b1a
......@@ -679,6 +679,27 @@ void av1_loop_filter_init(AV1_COMMON *cm) {
memset(lfi->lfthr[lvl].hev_thr, (lvl >> 4), SIMD_WIDTH);
}
#if CONFIG_LPF_SB
void av1_loop_filter_sb_level_init(AV1_COMMON *cm, int mi_row, int mi_col,
int lvl) {
const int mi_row_start = AOMMAX(0, mi_row - FILT_BOUNDARY_MI_OFFSET);
const int mi_col_start = AOMMAX(0, mi_col - FILT_BOUNDARY_MI_OFFSET);
const int mi_row_range = mi_row - FILT_BOUNDARY_MI_OFFSET + MAX_MIB_SIZE;
const int mi_col_range = mi_col - FILT_BOUNDARY_MI_OFFSET + MAX_MIB_SIZE;
const int mi_row_end = AOMMIN(mi_row_range, cm->mi_rows);
const int mi_col_end = AOMMIN(mi_col_range, cm->mi_cols);
int row, col;
for (row = mi_row_start; row < mi_row_end; ++row) {
for (col = mi_col_start; col < mi_col_end; ++col) {
// Note: can't use cm->mi_grid_visible. Because for each partition,
// all visible pointers will point to the first of the partition.
cm->mi[row * cm->mi_stride + col].mbmi.filt_lvl = lvl;
}
}
}
#endif // CONFIG_LPF_SB
void av1_loop_filter_frame_init(AV1_COMMON *cm, int default_filt_lvl,
int default_filt_lvl_r) {
int seg_id;
......@@ -2958,9 +2979,21 @@ static void av1_filter_block_plane_vert(
const uint32_t scale_vert = plane_ptr->subsampling_y;
uint8_t *const dst_ptr = plane_ptr->dst.buf;
const int dst_stride = plane_ptr->dst.stride;
for (int y = 0; y < (MAX_MIB_SIZE >> scale_vert); y += row_step) {
#if CONFIG_LPF_SB
int y_range = mi_row ? MAX_MIB_SIZE : MAX_MIB_SIZE - FILT_BOUNDARY_MI_OFFSET;
y_range = AOMMIN(y_range, cm->mi_rows);
y_range >>= scale_vert;
int x_range = mi_col ? MAX_MIB_SIZE : MAX_MIB_SIZE - FILT_BOUNDARY_MI_OFFSET;
x_range = AOMMIN(x_range, cm->mi_cols);
x_range >>= scale_horz;
#else
const int y_range = (MAX_MIB_SIZE >> scale_vert);
const int x_range = (MAX_MIB_SIZE >> scale_horz);
#endif // CONFIG_LPF_SB
for (int y = 0; y < y_range; y += row_step) {
uint8_t *p = dst_ptr + y * MI_SIZE * dst_stride;
for (int x = 0; x < (MAX_MIB_SIZE >> scale_horz); x += col_step) {
for (int x = 0; x < x_range; x += col_step) {
// inner loop always filter vertical edges in a MI block. If MI size
// is 8x8, it will filter the vertical edge aligned with a 8x8 block.
// If 4x4 trasnform is used, it will then filter the internal edge
......@@ -3144,9 +3177,21 @@ static void av1_filter_block_plane_horz(
const uint32_t scale_vert = plane_ptr->subsampling_y;
uint8_t *const dst_ptr = plane_ptr->dst.buf;
const int dst_stride = plane_ptr->dst.stride;
for (int y = 0; y < (MAX_MIB_SIZE >> scale_vert); y += row_step) {
#if CONFIG_LPF_SB
int y_range = mi_row ? MAX_MIB_SIZE : MAX_MIB_SIZE - FILT_BOUNDARY_MI_OFFSET;
y_range = AOMMIN(y_range, cm->mi_rows);
y_range >>= scale_vert;
int x_range = mi_col ? MAX_MIB_SIZE : MAX_MIB_SIZE - FILT_BOUNDARY_MI_OFFSET;
x_range = AOMMIN(x_range, cm->mi_cols);
x_range >>= scale_horz;
#else
const int y_range = (MAX_MIB_SIZE >> scale_vert);
const int x_range = (MAX_MIB_SIZE >> scale_horz);
#endif // CONFIG_LPF_SB
for (int y = 0; y < y_range; y += row_step) {
uint8_t *p = dst_ptr + y * MI_SIZE * dst_stride;
for (int x = 0; x < (MAX_MIB_SIZE >> scale_horz); x += col_step) {
for (int x = 0; x < x_range; x += col_step) {
// inner loop always filter vertical edges in a MI block. If MI size
// is 8x8, it will first filter the vertical edge aligned with a 8x8
// block. If 4x4 trasnform is used, it will then filter the internal
......@@ -3503,19 +3548,14 @@ void av1_loop_filter_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
// for superblock, no longer for the whole frame.
// When partial_frame is 0, it's in the actual filtering stage for the frame
if (partial_frame) {
start_mi_row = mi_row;
end_mi_row = mi_row + cm->mib_size;
start_mi_col = mi_col;
end_mi_col = mi_col + cm->mib_size;
int row, col;
for (row = mi_row; row < mi_row + MAX_MIB_SIZE && row < cm->mi_rows;
++row) {
for (col = mi_col; col < mi_col + MAX_MIB_SIZE && col < cm->mi_cols;
++col) {
cm->mi_grid_visible[row * cm->mi_stride + col]->mbmi.filt_lvl =
frame_filter_level;
}
}
start_mi_row = AOMMAX(0, mi_row - FILT_BOUNDARY_MI_OFFSET);
start_mi_col = AOMMAX(0, mi_col - FILT_BOUNDARY_MI_OFFSET);
const int mi_row_range = mi_row - FILT_BOUNDARY_MI_OFFSET + MAX_MIB_SIZE;
const int mi_col_range = mi_col - FILT_BOUNDARY_MI_OFFSET + MAX_MIB_SIZE;
end_mi_row = AOMMIN(mi_row_range, cm->mi_rows);
end_mi_col = AOMMIN(mi_col_range, cm->mi_cols);
av1_loop_filter_sb_level_init(cm, mi_row, mi_col, frame_filter_level);
} else {
start_mi_row = 0;
mi_rows_to_filter = cm->mi_rows;
......
......@@ -152,6 +152,9 @@ void av1_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer,
struct macroblockd_plane planes[MAX_MB_PLANE],
int start, int stop, int col_start, int col_end,
int y_only);
void av1_loop_filter_sb_level_init(struct AV1Common *cm, int mi_row, int mi_col,
int lvl);
#else
void av1_loop_filter_frame(YV12_BUFFER_CONFIG *frame, struct AV1Common *cm,
struct macroblockd *mbd, int filter_level,
......
......@@ -85,6 +85,10 @@ extern "C" {
#if CONFIG_LPF_SB
#define LPF_DELTA_BITS 3
#define MAX_LPF_OFFSET ((1 << LPF_DELTA_BITS) - 1)
// Half of maximum loop filter length (15-tap)
#define FILT_BOUNDARY_OFFSET 8
#define FILT_BOUNDARY_MI_OFFSET (FILT_BOUNDARY_OFFSET >> MI_SIZE_LOG2)
#endif // CONFIG_LPF_SB
// Bitstream profiles indicated by 2-3 bits in the uncompressed header.
......
......@@ -2570,16 +2570,8 @@ static void decode_partition(AV1Decoder *const pbi, MACROBLOCKD *const xd,
}
}
}
int row, col;
// set filter level for each mbmi
for (row = mi_row; row < mi_row + MAX_MIB_SIZE && row < cm->mi_rows;
++row) {
for (col = mi_col; col < mi_col + MAX_MIB_SIZE && col < cm->mi_cols;
++col) {
cm->mi_grid_visible[row * cm->mi_stride + col]->mbmi.filt_lvl =
filt_lvl;
}
}
av1_loop_filter_sb_level_init(cm, mi_row, mi_col, filt_lvl);
}
#endif
......
......@@ -31,17 +31,28 @@
#if CONFIG_HIGHBITDEPTH
static int64_t compute_sb_y_sse_highbd(const YV12_BUFFER_CONFIG *src,
const YV12_BUFFER_CONFIG *frame,
int mi_row, int mi_col) {
AV1_COMMON *const cm, int mi_row,
int mi_col) {
int64_t sse = 0;
const int row = mi_row * MI_SIZE;
const int col = mi_col * MI_SIZE;
const int mi_row_start = AOMMAX(0, mi_row - FILT_BOUNDARY_MI_OFFSET);
const int mi_col_start = AOMMAX(0, mi_col - FILT_BOUNDARY_MI_OFFSET);
const int mi_row_range = mi_row - FILT_BOUNDARY_MI_OFFSET + MAX_MIB_SIZE;
const int mi_col_range = mi_col - FILT_BOUNDARY_MI_OFFSET + MAX_MIB_SIZE;
const int mi_row_end = AOMMIN(mi_row_range, cm->mi_rows);
const int mi_col_end = AOMMIN(mi_col_range, cm->mi_cols);
const int row = mi_row_start * MI_SIZE;
const int col = mi_col_start * MI_SIZE;
const uint16_t *src_y =
CONVERT_TO_SHORTPTR(src->y_buffer) + row * src->y_stride + col;
const uint16_t *frame_y =
CONVERT_TO_SHORTPTR(frame->y_buffer) + row * frame->y_stride + col;
const int row_end = (mi_row_end - mi_row_start) * MI_SIZE;
const int col_end = (mi_col_end - mi_col_start) * MI_SIZE;
int x, y;
for (y = 0; y < MAX_MIB_SIZE * MI_SIZE; ++y) {
for (x = 0; x < MAX_MIB_SIZE * MI_SIZE; ++x) {
for (y = 0; y < row_end; ++y) {
for (x = 0; x < col_end; ++x) {
const int diff = src_y[x] - frame_y[x];
sse += diff * diff;
}
......@@ -53,16 +64,26 @@ static int64_t compute_sb_y_sse_highbd(const YV12_BUFFER_CONFIG *src,
#endif
static int64_t compute_sb_y_sse(const YV12_BUFFER_CONFIG *src,
const YV12_BUFFER_CONFIG *frame, int mi_row,
int mi_col) {
const YV12_BUFFER_CONFIG *frame,
AV1_COMMON *const cm, int mi_row, int mi_col) {
int64_t sse = 0;
const int row = mi_row * MI_SIZE;
const int col = mi_col * MI_SIZE;
const int mi_row_start = AOMMAX(0, mi_row - FILT_BOUNDARY_MI_OFFSET);
const int mi_col_start = AOMMAX(0, mi_col - FILT_BOUNDARY_MI_OFFSET);
const int mi_row_range = mi_row - FILT_BOUNDARY_MI_OFFSET + MAX_MIB_SIZE;
const int mi_col_range = mi_col - FILT_BOUNDARY_MI_OFFSET + MAX_MIB_SIZE;
const int mi_row_end = AOMMIN(mi_row_range, cm->mi_rows);
const int mi_col_end = AOMMIN(mi_col_range, cm->mi_cols);
const int row = mi_row_start * MI_SIZE;
const int col = mi_col_start * MI_SIZE;
const uint8_t *src_y = src->y_buffer + row * src->y_stride + col;
const uint8_t *frame_y = frame->y_buffer + row * frame->y_stride + col;
const int row_end = (mi_row_end - mi_row_start) * MI_SIZE;
const int col_end = (mi_col_end - mi_col_start) * MI_SIZE;
int x, y;
for (y = 0; y < MAX_MIB_SIZE * MI_SIZE; ++y) {
for (x = 0; x < MAX_MIB_SIZE * MI_SIZE; ++x) {
for (y = 0; y < row_end; ++y) {
for (x = 0; x < col_end; ++x) {
const int diff = src_y[x] - frame_y[x];
sse += diff * diff;
}
......@@ -96,9 +117,10 @@ int av1_get_max_filter_level(const AV1_COMP *cpi) {
#if CONFIG_LPF_SB
// TODO(chengchen): reduce memory usage by copy superblock instead of frame
static int64_t try_filter_frame(const YV12_BUFFER_CONFIG *sd,
AV1_COMP *const cpi, int filt_level,
int partial_frame, int mi_row, int mi_col) {
static int64_t try_filter_superblock(const YV12_BUFFER_CONFIG *sd,
AV1_COMP *const cpi, int filt_level,
int partial_frame, int mi_row,
int mi_col) {
AV1_COMMON *const cm = &cpi->common;
int64_t filt_err;
......@@ -117,12 +139,13 @@ static int64_t try_filter_frame(const YV12_BUFFER_CONFIG *sd,
#if CONFIG_HIGHBITDEPTH
if (cm->use_highbitdepth) {
filt_err = compute_sb_y_sse_highbd(sd, cm->frame_to_show, mi_row, mi_col);
filt_err =
compute_sb_y_sse_highbd(sd, cm->frame_to_show, cm, mi_row, mi_col);
} else {
filt_err = compute_sb_y_sse(sd, cm->frame_to_show, mi_row, mi_col);
filt_err = compute_sb_y_sse(sd, cm->frame_to_show, cm, mi_row, mi_col);
}
#else
filt_err = compute_sb_y_sse(sd, cm->frame_to_show, mi_row, mi_col);
filt_err = compute_sb_y_sse(sd, cm->frame_to_show, cm, mi_row, mi_col);
#endif // CONFIG_HIGHBITDEPTH
// TODO(chengchen): Copy the superblock only
......@@ -135,6 +158,9 @@ static int64_t try_filter_frame(const YV12_BUFFER_CONFIG *sd,
static int search_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi,
int partial_frame, double *best_cost_ret,
int mi_row, int mi_col, int last_lvl) {
assert(partial_frame == 1);
assert(last_lvl >= 0);
const AV1_COMMON *const cm = &cpi->common;
const int min_filter_level = AOMMAX(0, last_lvl - MAX_LPF_OFFSET);
const int max_filter_level =
......@@ -143,18 +169,20 @@ static int search_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi,
int filt_best = last_lvl;
MACROBLOCK *x = &cpi->td.mb;
// TODO(chengchen): Copy for superblock only
// Make a copy of the unfiltered / processed recon buffer
aom_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_uf);
int64_t estimate_err =
try_filter_frame(sd, cpi, last_lvl, partial_frame, mi_row, mi_col);
try_filter_superblock(sd, cpi, last_lvl, partial_frame, mi_row, mi_col);
int i;
for (i = min_filter_level; i <= max_filter_level; ++i) {
if (i == last_lvl) continue;
int64_t filt_err =
try_filter_frame(sd, cpi, i, partial_frame, mi_row, mi_col);
try_filter_superblock(sd, cpi, i, partial_frame, mi_row, mi_col);
if (filt_err < best_err) {
best_err = filt_err;
filt_best = i;
......@@ -164,7 +192,27 @@ static int search_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi,
// If previous sb filter level has similar filtering performance as current
// best filter level, use previous level such that we can only send one bit
// to indicate current filter level is the same as the previous.
const int64_t threshold = 700;
int64_t threshold = 700;
// ratio = the filtering area / a superblock size
int64_t ratio = 1;
if (mi_row + MAX_MIB_SIZE > cm->mi_rows) {
ratio *= (cm->mi_rows - mi_row);
} else {
if (mi_row == 0) {
ratio *= (MAX_MIB_SIZE - FILT_BOUNDARY_MI_OFFSET);
}
}
if (mi_col + MAX_MIB_SIZE > cm->mi_cols) {
ratio *= (cm->mi_cols - mi_col);
} else {
if (mi_col == 0) {
ratio *= (MAX_MIB_SIZE - FILT_BOUNDARY_MI_OFFSET);
}
}
threshold = threshold * ratio / (MAX_MIB_SIZE * MAX_MIB_SIZE);
// TODO(chengchen): shall the first superblock always send full filter level?
if ((mi_row > 0 || mi_col > 0) && abs(estimate_err - best_err) < threshold) {
best_err = estimate_err;
filt_best = last_lvl;
......@@ -402,19 +450,18 @@ void av1_pick_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi,
} else {
#if CONFIG_LPF_SB
int mi_row, mi_col;
// TODO(chengchen): init last_lvl using previous frame's info?
int last_lvl = 0;
// TODO(chengchen): if the frame size makes the last superblock very small,
// consider merge it to the previous superblock to save bits.
// Example, if frame size 1080x720, then in the last row of superblock,
// there're (FILT_BOUNDAR_OFFSET + 16) pixels.
for (mi_row = 0; mi_row < cm->mi_rows; mi_row += MAX_MIB_SIZE) {
for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MAX_MIB_SIZE) {
int lvl =
search_filter_level(sd, cpi, 1, NULL, mi_row, mi_col, last_lvl);
int row, col;
for (row = mi_row; row < mi_row + MAX_MIB_SIZE && row < cm->mi_rows;
++row) {
for (col = mi_col; col < mi_col + MAX_MIB_SIZE && col < cm->mi_cols;
++col) {
cm->mi_grid_visible[row * cm->mi_stride + col]->mbmi.filt_lvl = lvl;
}
}
av1_loop_filter_sb_level_init(cm, mi_row, mi_col, lvl);
// For the superblock at row start, its previous filter level should be
// the one above it, not the one at the end of last row
......@@ -425,7 +472,7 @@ void av1_pick_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi,
}
}
}
#else
#else // CONFIG_LPF_SB
#if CONFIG_LOOPFILTER_LEVEL
lf->filter_level[0] = lf->filter_level[1] = search_filter_level(
sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, NULL, 0, 2);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment