Commit 879b4ff8 authored by Ryan Lei's avatar Ryan Lei Committed by Yaowu Xu
Browse files

Fix two bugs in parallel_deblocking experiment

This commit fixes two major bugs in parallel deblocking experiment, the
first one is missing initialization of lfm->lfl_uv array for horizontal
filtering. The second one is inconsistent order of vertical/horizontal
filtering of superblocks within a frame between encoder and decoder.

BUG=https://bugs.chromium.org/p/aomedia/issues/detail?id=45#c2
BUG=https://bugs.chromium.org/p/aomedia/issues/detail?id=53#c1

Change-Id: I2df7eb313d49203fb70efe2bdf957b9d7e0bf678
parent 585fc42b
......@@ -1185,8 +1185,9 @@ void av1_filter_block_plane_non420_ver(AV1_COMMON *cm,
unsigned int mask_8x8[MAX_MIB_SIZE] = { 0 };
unsigned int mask_4x4[MAX_MIB_SIZE] = { 0 };
unsigned int mask_4x4_int[MAX_MIB_SIZE] = { 0 };
uint8_t lfl[MAX_MIB_SIZE * MAX_MIB_SIZE];
uint8_t lfl[MAX_MIB_SIZE * MAX_MIB_SIZE] = { 0 };
int r, c;
MODE_INFO **tmp_mi = mi_8x8;
for (r = 0; r < MAX_MIB_SIZE && mi_row + r < cm->mi_rows; r += row_step) {
unsigned int mask_16x16_c = 0;
......@@ -1196,7 +1197,7 @@ void av1_filter_block_plane_non420_ver(AV1_COMMON *cm,
// Determine the vertical edges that need filtering
for (c = 0; c < MAX_MIB_SIZE && mi_col + c < cm->mi_cols; c += col_step) {
const MODE_INFO *mi = mi_8x8[c];
const MODE_INFO *mi = tmp_mi[c];
const BLOCK_SIZE sb_type = mi[0].mbmi.sb_type;
const int skip_this = mi[0].mbmi.skip && is_inter_block(&mi[0].mbmi);
// left edge of current unit is block/partition edge -> no skip
......@@ -1217,56 +1218,60 @@ void av1_filter_block_plane_non420_ver(AV1_COMMON *cm,
const int skip_border_4x4_c = ss_x && mi_col + c == cm->mi_cols - 1;
const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1;
const int c_step = (c >> ss_x);
const int r_step = (r >> ss_y);
const int col_mask = 1 << c_step;
// Filter level can vary per MI
if (!(lfl[(r << 3) + (c >> ss_x)] =
if (!(lfl[(r << 3) + c_step] =
get_filter_level(&cm->lf_info, &mi[0].mbmi)))
continue;
// Build masks based on the transform size of each block
if (tx_size == TX_32X32) {
if (!skip_this_c && ((c >> ss_x) & 3) == 0) {
if (!skip_this_c && (c_step & 3) == 0) {
if (!skip_border_4x4_c)
mask_16x16_c |= 1 << (c >> ss_x);
mask_16x16_c |= col_mask;
else
mask_8x8_c |= 1 << (c >> ss_x);
mask_8x8_c |= col_mask;
}
if (!skip_this_r && ((r >> ss_y) & 3) == 0) {
if (!skip_this_r && (r_step & 3) == 0) {
if (!skip_border_4x4_r)
mask_16x16[r] |= 1 << (c >> ss_x);
mask_16x16[r] |= col_mask;
else
mask_8x8[r] |= 1 << (c >> ss_x);
mask_8x8[r] |= col_mask;
}
} else if (tx_size == TX_16X16) {
if (!skip_this_c && ((c >> ss_x) & 1) == 0) {
if (!skip_this_c && (c_step & 1) == 0) {
if (!skip_border_4x4_c)
mask_16x16_c |= 1 << (c >> ss_x);
mask_16x16_c |= col_mask;
else
mask_8x8_c |= 1 << (c >> ss_x);
mask_8x8_c |= col_mask;
}
if (!skip_this_r && ((r >> ss_y) & 1) == 0) {
if (!skip_this_r && (r_step & 1) == 0) {
if (!skip_border_4x4_r)
mask_16x16[r] |= 1 << (c >> ss_x);
mask_16x16[r] |= col_mask;
else
mask_8x8[r] |= 1 << (c >> ss_x);
mask_8x8[r] |= col_mask;
}
} else {
// force 8x8 filtering on 32x32 boundaries
if (!skip_this_c) {
if (tx_size == TX_8X8 || ((c >> ss_x) & 3) == 0)
mask_8x8_c |= 1 << (c >> ss_x);
if (tx_size == TX_8X8 || (c_step & 3) == 0)
mask_8x8_c |= col_mask;
else
mask_4x4_c |= 1 << (c >> ss_x);
mask_4x4_c |= col_mask;
}
if (!skip_this_r) {
if (tx_size == TX_8X8 || ((r >> ss_y) & 3) == 0)
mask_8x8[r] |= 1 << (c >> ss_x);
if (tx_size == TX_8X8 || (r_step & 3) == 0)
mask_8x8[r] |= col_mask;
else
mask_4x4[r] |= 1 << (c >> ss_x);
mask_4x4[r] |= col_mask;
}
if (!skip_this && tx_size < TX_8X8 && !skip_border_4x4_c)
mask_4x4_int[r] |= 1 << (c >> ss_x);
mask_4x4_int[r] |= col_mask;
}
}
......@@ -1291,7 +1296,7 @@ void av1_filter_block_plane_non420_ver(AV1_COMMON *cm,
mask_4x4_int[r], &cm->lf_info, &lfl[r << 3]);
#endif // CONFIG_AOM_HIGHBITDEPTH
dst->buf += 8 * dst->stride;
mi_8x8 += row_step_stride;
tmp_mi += row_step_stride;
}
// restore the buf pointer in case there is additional filter pass.
......@@ -1300,17 +1305,112 @@ void av1_filter_block_plane_non420_ver(AV1_COMMON *cm,
void av1_filter_block_plane_non420_hor(AV1_COMMON *cm,
struct macroblockd_plane *plane,
int mi_row) {
MODE_INFO **mi_8x8, int mi_row,
int mi_col) {
const int ss_x = plane->subsampling_x;
const int ss_y = plane->subsampling_y;
const int row_step = 1 << ss_y;
const int col_step = 1 << ss_x;
const int row_step_stride = cm->mi_stride * row_step;
struct buf_2d *const dst = &plane->dst;
uint8_t *const dst0 = dst->buf;
unsigned int mask_16x16[MAX_MIB_SIZE] = { 0 };
unsigned int mask_8x8[MAX_MIB_SIZE] = { 0 };
unsigned int mask_4x4[MAX_MIB_SIZE] = { 0 };
unsigned int mask_4x4_int[MAX_MIB_SIZE] = { 0 };
uint8_t lfl[MAX_MIB_SIZE * MAX_MIB_SIZE];
int r;
uint8_t lfl[MAX_MIB_SIZE * MAX_MIB_SIZE] = { 0 };
int r, c;
MODE_INFO **tmp_mi = mi_8x8;
// re-populate the filter mask for horizontal pass, it is the same as code
// in the av1_filter_block_plane_non420_ver
for (r = 0; r < MAX_MIB_SIZE && mi_row + r < cm->mi_rows; r += row_step) {
unsigned int mask_16x16_c = 0;
unsigned int mask_8x8_c = 0;
unsigned int mask_4x4_c = 0;
// Determine the horizontal edges that need filtering
for (c = 0; c < MAX_MIB_SIZE && mi_col + c < cm->mi_cols; c += col_step) {
const MODE_INFO *mi = tmp_mi[c];
const BLOCK_SIZE sb_type = mi[0].mbmi.sb_type;
const int skip_this = mi[0].mbmi.skip && is_inter_block(&mi[0].mbmi);
// left edge of current unit is block/partition edge -> no skip
const int block_edge_left =
(num_4x4_blocks_wide_lookup[sb_type] > 1)
? !(c & (num_8x8_blocks_wide_lookup[sb_type] - 1))
: 1;
const int skip_this_c = skip_this && !block_edge_left;
// top edge of current unit is block/partition edge -> no skip
const int block_edge_above =
(num_4x4_blocks_high_lookup[sb_type] > 1)
? !(r & (num_8x8_blocks_high_lookup[sb_type] - 1))
: 1;
const int skip_this_r = skip_this && !block_edge_above;
const TX_SIZE tx_size = (plane->plane_type == PLANE_TYPE_UV)
? get_uv_tx_size(&mi[0].mbmi, plane)
: mi[0].mbmi.tx_size;
const int skip_border_4x4_c = ss_x && mi_col + c == cm->mi_cols - 1;
const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1;
const int c_step = (c >> ss_x);
const int r_step = (r >> ss_y);
const int col_mask = 1 << c_step;
// Filter level can vary per MI
if (!(lfl[(r << 3) + c_step] =
get_filter_level(&cm->lf_info, &mi[0].mbmi)))
continue;
// Build masks based on the transform size of each block
if (tx_size == TX_32X32) {
if (!skip_this_c && (c_step & 3) == 0) {
if (!skip_border_4x4_c)
mask_16x16_c |= col_mask;
else
mask_8x8_c |= col_mask;
}
if (!skip_this_r && (r_step & 3) == 0) {
if (!skip_border_4x4_r)
mask_16x16[r] |= col_mask;
else
mask_8x8[r] |= col_mask;
}
} else if (tx_size == TX_16X16) {
if (!skip_this_c && (c_step & 1) == 0) {
if (!skip_border_4x4_c)
mask_16x16_c |= col_mask;
else
mask_8x8_c |= col_mask;
}
if (!skip_this_r && (r_step & 1) == 0) {
if (!skip_border_4x4_r)
mask_16x16[r] |= col_mask;
else
mask_8x8[r] |= col_mask;
}
} else {
// force 8x8 filtering on 32x32 boundaries
if (!skip_this_c) {
if (tx_size == TX_8X8 || (c_step & 3) == 0)
mask_8x8_c |= col_mask;
else
mask_4x4_c |= col_mask;
}
if (!skip_this_r) {
if (tx_size == TX_8X8 || (r_step & 3) == 0)
mask_8x8[r] |= col_mask;
else
mask_4x4[r] |= col_mask;
}
if (!skip_this && tx_size < TX_8X8 && !skip_border_4x4_c)
mask_4x4_int[r] |= col_mask;
}
}
tmp_mi += row_step_stride;
}
for (r = 0; r < MAX_MIB_SIZE && mi_row + r < cm->mi_rows; r += row_step) {
const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1;
......@@ -1472,6 +1572,7 @@ void av1_filter_block_plane_ss11_ver(AV1_COMMON *const cm,
#endif
assert(plane->subsampling_x == 1 && plane->subsampling_y == 1);
memset(lfm->lfl_uv, 0, sizeof(lfm->lfl_uv));
// Vertical pass: do 2 rows at one time
for (r = 0; r < MAX_MIB_SIZE && mi_row + r < cm->mi_rows; r += 4) {
......@@ -1524,7 +1625,7 @@ void av1_filter_block_plane_ss11_hor(AV1_COMMON *const cm,
int mi_row, LOOP_FILTER_MASK *lfm) {
struct buf_2d *const dst = &plane->dst;
uint8_t *const dst0 = dst->buf;
int r;
int r, c;
uint64_t mask_16x16 = lfm->above_uv[TX_16X16];
uint64_t mask_8x8 = lfm->above_uv[TX_8X8];
uint64_t mask_4x4 = lfm->above_uv[TX_4X4];
......@@ -1536,6 +1637,18 @@ void av1_filter_block_plane_ss11_hor(AV1_COMMON *const cm,
assert(plane->subsampling_x == 1 && plane->subsampling_y == 1);
// re-porpulate the filter level for uv, same as the code for vertical
// filter in av1_filter_block_plane_ss11_ver
memset(lfm->lfl_uv, 0, sizeof(lfm->lfl_uv));
for (r = 0; r < MAX_MIB_SIZE && mi_row + r < cm->mi_rows; r += 4) {
if (plane->plane_type == 1) {
for (c = 0; c < (MAX_MIB_SIZE >> 1); c++) {
lfm->lfl_uv[(r << 1) + c] = lfm->lfl_y[(r << 3) + (c << 1)];
lfm->lfl_uv[((r + 2) << 1) + c] = lfm->lfl_y[((r + 2) << 3) + (c << 1)];
}
}
}
for (r = 0; r < MAX_MIB_SIZE && mi_row + r < cm->mi_rows; r += 2) {
const int skip_border_4x4_r = mi_row + r == cm->mi_rows - 1;
const unsigned int mask_4x4_int_r =
......@@ -1648,7 +1761,8 @@ void av1_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, AV1_COMMON *cm,
av1_filter_block_plane_ss00_hor(cm, &planes[plane], mi_row, &lfm);
break;
case LF_PATH_SLOW:
av1_filter_block_plane_non420_hor(cm, &planes[plane], mi_row);
av1_filter_block_plane_non420_hor(cm, &planes[plane], mi + mi_col,
mi_row, mi_col);
break;
}
}
......@@ -1679,7 +1793,8 @@ void av1_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, AV1_COMMON *cm,
case LF_PATH_SLOW:
av1_filter_block_plane_non420_ver(cm, &planes[plane], mi + mi_col,
mi_row, mi_col);
av1_filter_block_plane_non420_hor(cm, &planes[plane], mi_row);
av1_filter_block_plane_non420_hor(cm, &planes[plane], mi + mi_col,
mi_row, mi_col);
break;
}
}
......
......@@ -123,7 +123,8 @@ void av1_filter_block_plane_non420_ver(struct AV1Common *cm,
int mi_col);
void av1_filter_block_plane_non420_hor(struct AV1Common *cm,
struct macroblockd_plane *plane,
int mi_row);
MODE_INFO **mi_8x8, int mi_row,
int mi_col);
void av1_loop_filter_init(struct AV1Common *cm);
......
......@@ -112,8 +112,8 @@ static INLINE void loop_filter_block_plane_ver(
av1_filter_block_plane_ss00_ver(cm, &planes[plane], mi_row, lfm);
break;
case LF_PATH_SLOW:
av1_filter_block_plane_non420_ver(cm, &planes[plane], mi + mi_col,
mi_row, mi_col);
av1_filter_block_plane_non420_ver(cm, &planes[plane], mi, mi_row,
mi_col);
break;
}
}
......@@ -121,7 +121,8 @@ static INLINE void loop_filter_block_plane_ver(
static INLINE void loop_filter_block_plane_hor(
AV1_COMMON *cm, struct macroblockd_plane planes[MAX_MB_PLANE], int plane,
int mi_row, enum lf_path path, LOOP_FILTER_MASK *lfm) {
MODE_INFO **mi, int mi_row, int mi_col, enum lf_path path,
LOOP_FILTER_MASK *lfm) {
if (plane == 0) {
av1_filter_block_plane_ss00_hor(cm, &planes[0], mi_row, lfm);
} else {
......@@ -133,7 +134,8 @@ static INLINE void loop_filter_block_plane_hor(
av1_filter_block_plane_ss00_hor(cm, &planes[plane], mi_row, lfm);
break;
case LF_PATH_SLOW:
av1_filter_block_plane_non420_hor(cm, &planes[plane], mi_row);
av1_filter_block_plane_non420_hor(cm, &planes[plane], mi, mi_row,
mi_col);
break;
}
}
......@@ -162,8 +164,8 @@ static int loop_filter_ver_row_worker(AV1LfSync *const lf_sync,
lf_data->cm->mi_stride, &lfm);
for (plane = 0; plane < num_planes; ++plane)
loop_filter_block_plane_ver(lf_data->cm, lf_data->planes, plane, mi,
mi_row, mi_col, path, &lfm);
loop_filter_block_plane_ver(lf_data->cm, lf_data->planes, plane,
mi + mi_col, mi_row, mi_col, path, &lfm);
}
}
return 1;
......@@ -198,9 +200,8 @@ static int loop_filter_hor_row_worker(AV1LfSync *const lf_sync,
lf_data->cm->mi_stride, &lfm);
for (plane = 0; plane < num_planes; ++plane)
loop_filter_block_plane_hor(lf_data->cm, lf_data->planes, plane, mi_row,
path, &lfm);
loop_filter_block_plane_hor(lf_data->cm, lf_data->planes, plane,
mi + mi_col, mi_row, mi_col, path, &lfm);
sync_write(lf_sync, r, c, sb_cols);
}
}
......@@ -234,10 +235,10 @@ static int loop_filter_row_worker(AV1LfSync *const lf_sync,
lf_data->cm->mi_stride, &lfm);
for (plane = 0; plane < num_planes; ++plane) {
loop_filter_block_plane_ver(lf_data->cm, lf_data->planes, plane, mi,
mi_row, mi_col, path, &lfm);
loop_filter_block_plane_hor(lf_data->cm, lf_data->planes, plane, mi_row,
path, &lfm);
loop_filter_block_plane_ver(lf_data->cm, lf_data->planes, plane,
mi + mi_col, mi_row, mi_col, path, &lfm);
loop_filter_block_plane_hor(lf_data->cm, lf_data->planes, plane,
mi + mi_col, mi_row, mi_col, path, &lfm);
}
sync_write(lf_sync, r, c, sb_cols);
......
......@@ -1371,6 +1371,11 @@ static const uint8_t *decode_tiles(AV1Decoder *pbi, const uint8_t *data,
aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
"Failed to decode tile data");
}
// when Parallel deblocking is enabled, deblocking should not
// be interleaved with decoding. Instead, deblocking should be done
// after the entire frame is decoded.
#if !CONFIG_PARALLEL_DEBLOCKING
// Loopfilter one row.
if (cm->lf.filter_level && !cm->skip_loop_filter) {
const int lf_start = mi_row - MAX_MIB_SIZE;
......@@ -1391,9 +1396,11 @@ static const uint8_t *decode_tiles(AV1Decoder *pbi, const uint8_t *data,
winterface->execute(&pbi->lf_worker);
}
}
// After loopfiltering, the last 7 row pixels in each superblock row may
// still be changed by the longest loopfilter of the next superblock
// row.
// After loopfiltering, the last 7 row pixels in each superblock row may
// still be changed by the longest loopfilter of the next superblock
// row.
#endif // !CONFIG_PARALLEL_DEBLOCKING
if (cm->frame_parallel_decode)
av1_frameworker_broadcast(pbi->cur_buf, mi_row << MAX_MIB_SIZE_LOG2);
}
......@@ -1403,6 +1410,16 @@ static const uint8_t *decode_tiles(AV1Decoder *pbi, const uint8_t *data,
// aom_accounting_dump(&pbi->accounting);
#endif
#if CONFIG_PARALLEL_DEBLOCKING
// Loopfilter all rows in the frame in the frame.
if (cm->lf.filter_level && !cm->skip_loop_filter) {
LFWorkerData *const lf_data = (LFWorkerData *)pbi->lf_worker.data1;
winterface->sync(&pbi->lf_worker);
lf_data->start = 0;
lf_data->stop = cm->mi_rows;
winterface->execute(&pbi->lf_worker);
}
#else
// Loopfilter remaining rows in the frame.
if (cm->lf.filter_level && !cm->skip_loop_filter) {
LFWorkerData *const lf_data = (LFWorkerData *)pbi->lf_worker.data1;
......@@ -1411,6 +1428,7 @@ static const uint8_t *decode_tiles(AV1Decoder *pbi, const uint8_t *data,
lf_data->stop = cm->mi_rows;
winterface->execute(&pbi->lf_worker);
}
#endif // CONFIG_PARALLEL_DEBLOCKING
// Get last tile data.
tile_data = pbi->tile_data + tile_cols * tile_rows - 1;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment