Commit 42ff3881 authored by Ryan Lei's avatar Ryan Lei
Browse files

check in the final code implementation for parallel deblocking proposal from...

check in the final code implementation for parallel deblocking proposal from Intel. code changes have been reviewed and approved.

Change-Id: I4a3cdb939b7b96a3aa27f6a00da7a0e73222f3f3
parent 8234d97a
......@@ -1151,9 +1151,10 @@ static void highbd_filter_selectively_vert(
}
#endif // CONFIG_AOM_HIGHBITDEPTH
void av1_filter_block_plane_non420(AV1_COMMON *cm,
struct macroblockd_plane *plane,
MODE_INFO **mi_8x8, int mi_row, int mi_col) {
void av1_filter_block_plane_non420_ver(AV1_COMMON *cm,
struct macroblockd_plane *plane,
MODE_INFO **mi_8x8, int mi_row,
int mi_col) {
const int ss_x = plane->subsampling_x;
const int ss_y = plane->subsampling_y;
const int row_step = 1 << ss_y;
......@@ -1274,8 +1275,24 @@ void av1_filter_block_plane_non420(AV1_COMMON *cm,
mi_8x8 += row_step_stride;
}
// Now do horizontal pass
// restore the buf pointer in case there is additional filter pass.
dst->buf = dst0;
}
void av1_filter_block_plane_non420_hor(AV1_COMMON *cm,
struct macroblockd_plane *plane,
int mi_row) {
const int ss_y = plane->subsampling_y;
const int row_step = 1 << ss_y;
struct buf_2d *const dst = &plane->dst;
uint8_t *const dst0 = dst->buf;
unsigned int mask_16x16[MI_BLOCK_SIZE] = { 0 };
unsigned int mask_8x8[MI_BLOCK_SIZE] = { 0 };
unsigned int mask_4x4[MI_BLOCK_SIZE] = { 0 };
unsigned int mask_4x4_int[MI_BLOCK_SIZE] = { 0 };
uint8_t lfl[MI_BLOCK_SIZE * MI_BLOCK_SIZE];
int r;
for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += row_step) {
const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1;
const unsigned int mask_4x4_int_r = skip_border_4x4_r ? 0 : mask_4x4_int[r];
......@@ -1311,11 +1328,14 @@ void av1_filter_block_plane_non420(AV1_COMMON *cm,
#endif // CONFIG_AOM_HIGHBITDEPTH
dst->buf += 8 * dst->stride;
}
// restore the buf pointer in case there is additional filter pass.
dst->buf = dst0;
}
void av1_filter_block_plane_ss00(AV1_COMMON *const cm,
struct macroblockd_plane *const plane,
int mi_row, LOOP_FILTER_MASK *lfm) {
void av1_filter_block_plane_ss00_ver(AV1_COMMON *const cm,
struct macroblockd_plane *const plane,
int mi_row, LOOP_FILTER_MASK *lfm) {
struct buf_2d *const dst = &plane->dst;
uint8_t *const dst0 = dst->buf;
int r;
......@@ -1356,13 +1376,22 @@ void av1_filter_block_plane_ss00(AV1_COMMON *const cm,
mask_4x4 >>= 16;
mask_4x4_int >>= 16;
}
// Horizontal pass
// restore the buf pointer in case there is additional filter pass.
dst->buf = dst0;
mask_16x16 = lfm->above_y[TX_16X16];
mask_8x8 = lfm->above_y[TX_8X8];
mask_4x4 = lfm->above_y[TX_4X4];
mask_4x4_int = lfm->int_4x4_y;
}
void av1_filter_block_plane_ss00_hor(AV1_COMMON *const cm,
struct macroblockd_plane *const plane,
int mi_row, LOOP_FILTER_MASK *lfm) {
struct buf_2d *const dst = &plane->dst;
uint8_t *const dst0 = dst->buf;
int r;
uint64_t mask_16x16 = lfm->above_y[TX_16X16];
uint64_t mask_8x8 = lfm->above_y[TX_8X8];
uint64_t mask_4x4 = lfm->above_y[TX_4X4];
uint64_t mask_4x4_int = lfm->int_4x4_y;
assert(plane->subsampling_x == 0 && plane->subsampling_y == 0);
for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r++) {
unsigned int mask_16x16_r;
......@@ -1402,11 +1431,14 @@ void av1_filter_block_plane_ss00(AV1_COMMON *const cm,
mask_4x4 >>= 8;
mask_4x4_int >>= 8;
}
// restore the buf pointer in case there is additional filter pass.
dst->buf = dst0;
}
void av1_filter_block_plane_ss11(AV1_COMMON *const cm,
struct macroblockd_plane *const plane,
int mi_row, LOOP_FILTER_MASK *lfm) {
void av1_filter_block_plane_ss11_ver(AV1_COMMON *const cm,
struct macroblockd_plane *const plane,
int mi_row, LOOP_FILTER_MASK *lfm) {
struct buf_2d *const dst = &plane->dst;
uint8_t *const dst0 = dst->buf;
int r, c;
......@@ -1464,17 +1496,27 @@ void av1_filter_block_plane_ss11(AV1_COMMON *const cm,
}
}
// Horizontal pass
// restore the buf pointer in case there is additional filter pass.
dst->buf = dst0;
mask_16x16 = lfm->above_uv[TX_16X16];
mask_8x8 = lfm->above_uv[TX_8X8];
mask_4x4 = lfm->above_uv[TX_4X4];
}
void av1_filter_block_plane_ss11_hor(AV1_COMMON *const cm,
struct macroblockd_plane *const plane,
int mi_row, LOOP_FILTER_MASK *lfm) {
struct buf_2d *const dst = &plane->dst;
uint8_t *const dst0 = dst->buf;
int r;
uint64_t mask_16x16 = lfm->above_uv[TX_16X16];
uint64_t mask_8x8 = lfm->above_uv[TX_8X8];
uint64_t mask_4x4 = lfm->above_uv[TX_4X4];
#if CONFIG_MISC_FIXES
mask_4x4_int = lfm->above_int_4x4_uv;
uint64_t mask_4x4_int = lfm->above_int_4x4_uv;
#else
mask_4x4_int = lfm->int_4x4_uv;
uint64_t mask_4x4_int = lfm->int_4x4_uv;
#endif
assert(plane->subsampling_x == 1 && plane->subsampling_y == 1);
for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) {
const int skip_border_4x4_r = mi_row + r == cm->mi_rows - 1;
const unsigned int mask_4x4_int_r =
......@@ -1516,6 +1558,9 @@ void av1_filter_block_plane_ss11(AV1_COMMON *const cm,
mask_4x4 >>= 4;
mask_4x4_int >>= 4;
}
// restore the buf pointer in case there is additional filter pass.
dst->buf = dst0;
}
void av1_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, AV1_COMMON *cm,
......@@ -1535,6 +1580,8 @@ void av1_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, AV1_COMMON *cm,
else
path = LF_PATH_SLOW;
#if CONFIG_PARALLEL_DEBLOCKING
// Filter all the vertical edges in the whole frame
for (mi_row = start; mi_row < stop; mi_row += MI_BLOCK_SIZE) {
MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
......@@ -1542,27 +1589,84 @@ void av1_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, AV1_COMMON *cm,
int plane;
av1_setup_dst_planes(planes, frame_buffer, mi_row, mi_col);
av1_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, &lfm);
// TODO(JBB): Make setup_mask work for non 420.
av1_filter_block_plane_ss00_ver(cm, &planes[0], mi_row, &lfm);
for (plane = 1; plane < num_planes; ++plane) {
switch (path) {
case LF_PATH_420:
av1_filter_block_plane_ss11_ver(cm, &planes[plane], mi_row, &lfm);
break;
case LF_PATH_444:
av1_filter_block_plane_ss00_ver(cm, &planes[plane], mi_row, &lfm);
break;
case LF_PATH_SLOW:
av1_filter_block_plane_non420_ver(cm, &planes[plane], mi + mi_col,
mi_row, mi_col);
break;
}
}
}
}
// Filter all the horizontal edges in the whole frame
for (mi_row = start; mi_row < stop; mi_row += MI_BLOCK_SIZE) {
MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) {
int plane;
av1_setup_dst_planes(planes, frame_buffer, mi_row, mi_col);
av1_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, &lfm);
av1_filter_block_plane_ss00_hor(cm, &planes[0], mi_row, &lfm);
for (plane = 1; plane < num_planes; ++plane) {
switch (path) {
case LF_PATH_420:
av1_filter_block_plane_ss11_hor(cm, &planes[plane], mi_row, &lfm);
break;
case LF_PATH_444:
av1_filter_block_plane_ss00_hor(cm, &planes[plane], mi_row, &lfm);
break;
case LF_PATH_SLOW:
av1_filter_block_plane_non420_hor(cm, &planes[plane], mi_row);
break;
}
}
}
}
#else // CONFIG_PARALLEL_DEBLOCKING
for (mi_row = start; mi_row < stop; mi_row += MI_BLOCK_SIZE) {
MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) {
int plane;
av1_setup_dst_planes(planes, frame_buffer, mi_row, mi_col);
av1_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, &lfm);
av1_filter_block_plane_ss00(cm, &planes[0], mi_row, &lfm);
av1_filter_block_plane_ss00_ver(cm, &planes[0], mi_row, &lfm);
av1_filter_block_plane_ss00_hor(cm, &planes[0], mi_row, &lfm);
for (plane = 1; plane < num_planes; ++plane) {
switch (path) {
case LF_PATH_420:
av1_filter_block_plane_ss11(cm, &planes[plane], mi_row, &lfm);
av1_filter_block_plane_ss11_ver(cm, &planes[plane], mi_row, &lfm);
av1_filter_block_plane_ss11_hor(cm, &planes[plane], mi_row, &lfm);
break;
case LF_PATH_444:
av1_filter_block_plane_ss00(cm, &planes[plane], mi_row, &lfm);
av1_filter_block_plane_ss00_ver(cm, &planes[plane], mi_row, &lfm);
av1_filter_block_plane_ss00_hor(cm, &planes[plane], mi_row, &lfm);
break;
case LF_PATH_SLOW:
av1_filter_block_plane_non420(cm, &planes[plane], mi + mi_col,
mi_row, mi_col);
av1_filter_block_plane_non420_ver(cm, &planes[plane], mi + mi_col,
mi_row, mi_col);
av1_filter_block_plane_non420_hor(cm, &planes[plane], mi_row);
break;
}
}
}
}
#endif // CONFIG_PARALLEL_DEBLOCKING
}
void av1_loop_filter_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
......
......@@ -103,17 +103,27 @@ void av1_setup_mask(struct AV1Common *const cm, const int mi_row,
const int mi_col, MODE_INFO **mi_8x8,
const int mode_info_stride, LOOP_FILTER_MASK *lfm);
void av1_filter_block_plane_ss00(struct AV1Common *const cm,
struct macroblockd_plane *const plane,
int mi_row, LOOP_FILTER_MASK *lfm);
void av1_filter_block_plane_ss11(struct AV1Common *const cm,
struct macroblockd_plane *const plane,
int mi_row, LOOP_FILTER_MASK *lfm);
void av1_filter_block_plane_non420(struct AV1Common *cm,
struct macroblockd_plane *plane,
MODE_INFO **mi_8x8, int mi_row, int mi_col);
void av1_filter_block_plane_ss00_ver(struct AV1Common *const cm,
struct macroblockd_plane *const plane,
int mi_row, LOOP_FILTER_MASK *lfm);
void av1_filter_block_plane_ss00_hor(struct AV1Common *const cm,
struct macroblockd_plane *const plane,
int mi_row, LOOP_FILTER_MASK *lfm);
void av1_filter_block_plane_ss11_ver(struct AV1Common *const cm,
struct macroblockd_plane *const plane,
int mi_row, LOOP_FILTER_MASK *lfm);
void av1_filter_block_plane_ss11_hor(struct AV1Common *const cm,
struct macroblockd_plane *const plane,
int mi_row, LOOP_FILTER_MASK *lfm);
void av1_filter_block_plane_non420_ver(struct AV1Common *cm,
struct macroblockd_plane *plane,
MODE_INFO **mi_8x8, int mi_row,
int mi_col);
void av1_filter_block_plane_non420_hor(struct AV1Common *cm,
struct macroblockd_plane *plane,
int mi_row);
void av1_loop_filter_init(struct AV1Common *cm);
......
......@@ -85,70 +85,167 @@ static INLINE void sync_write(AV1LfSync *const lf_sync, int r, int c,
#endif // CONFIG_MULTITHREAD
}
// Implement row loopfiltering for each thread.
static INLINE void thread_loop_filter_rows(
const YV12_BUFFER_CONFIG *const frame_buffer, AV1_COMMON *const cm,
struct macroblockd_plane planes[MAX_MB_PLANE], int start, int stop,
int y_only, AV1LfSync *const lf_sync) {
const int num_planes = y_only ? 1 : MAX_MB_PLANE;
const int sb_cols = mi_cols_aligned_to_sb(cm->mi_cols) >> MI_BLOCK_SIZE_LOG2;
int mi_row, mi_col;
enum lf_path path;
static INLINE enum lf_path get_loop_filter_path(
int y_only, struct macroblockd_plane planes[MAX_MB_PLANE]) {
if (y_only)
path = LF_PATH_444;
return LF_PATH_444;
else if (planes[1].subsampling_y == 1 && planes[1].subsampling_x == 1)
path = LF_PATH_420;
return LF_PATH_420;
else if (planes[1].subsampling_y == 0 && planes[1].subsampling_x == 0)
path = LF_PATH_444;
return LF_PATH_444;
else
path = LF_PATH_SLOW;
return LF_PATH_SLOW;
}
static INLINE void loop_filter_block_plane_ver(
AV1_COMMON *cm, struct macroblockd_plane planes[MAX_MB_PLANE], int plane,
MODE_INFO **mi, int mi_row, int mi_col, enum lf_path path,
LOOP_FILTER_MASK *lfm) {
if (plane == 0) {
av1_filter_block_plane_ss00_ver(cm, &planes[0], mi_row, lfm);
} else {
switch (path) {
case LF_PATH_420:
av1_filter_block_plane_ss11_ver(cm, &planes[plane], mi_row, lfm);
break;
case LF_PATH_444:
av1_filter_block_plane_ss00_ver(cm, &planes[plane], mi_row, lfm);
break;
case LF_PATH_SLOW:
av1_filter_block_plane_non420_ver(cm, &planes[plane], mi + mi_col,
mi_row, mi_col);
break;
}
}
}
static INLINE void loop_filter_block_plane_hor(
AV1_COMMON *cm, struct macroblockd_plane planes[MAX_MB_PLANE], int plane,
int mi_row, enum lf_path path, LOOP_FILTER_MASK *lfm) {
if (plane == 0) {
av1_filter_block_plane_ss00_hor(cm, &planes[0], mi_row, lfm);
} else {
switch (path) {
case LF_PATH_420:
av1_filter_block_plane_ss11_hor(cm, &planes[plane], mi_row, lfm);
break;
case LF_PATH_444:
av1_filter_block_plane_ss00_hor(cm, &planes[plane], mi_row, lfm);
break;
case LF_PATH_SLOW:
av1_filter_block_plane_non420_hor(cm, &planes[plane], mi_row);
break;
}
}
}
// Row-based multi-threaded loopfilter hook
#if CONFIG_PARALLEL_DEBLOCKING
static int loop_filter_ver_row_worker(AV1LfSync *const lf_sync,
LFWorkerData *const lf_data) {
const int num_planes = lf_data->y_only ? 1 : MAX_MB_PLANE;
int mi_row, mi_col;
enum lf_path path = get_loop_filter_path(lf_data->y_only, lf_data->planes);
for (mi_row = lf_data->start; mi_row < lf_data->stop;
mi_row += lf_sync->num_workers * MI_BLOCK_SIZE) {
MODE_INFO **const mi =
lf_data->cm->mi_grid_visible + mi_row * lf_data->cm->mi_stride;
for (mi_col = 0; mi_col < lf_data->cm->mi_cols; mi_col += MI_BLOCK_SIZE) {
LOOP_FILTER_MASK lfm;
int plane;
av1_setup_dst_planes(lf_data->planes, lf_data->frame_buffer, mi_row,
mi_col);
av1_setup_mask(lf_data->cm, mi_row, mi_col, mi + mi_col,
lf_data->cm->mi_stride, &lfm);
for (plane = 0; plane < num_planes; ++plane)
loop_filter_block_plane_ver(lf_data->cm, lf_data->planes, plane, mi,
mi_row, mi_col, path, &lfm);
}
}
return 1;
}
static int loop_filter_hor_row_worker(AV1LfSync *const lf_sync,
LFWorkerData *const lf_data) {
const int num_planes = lf_data->y_only ? 1 : MAX_MB_PLANE;
const int sb_cols =
mi_cols_aligned_to_sb(lf_data->cm->mi_cols) >> MI_BLOCK_SIZE_LOG2;
int mi_row, mi_col;
enum lf_path path = get_loop_filter_path(lf_data->y_only, lf_data->planes);
for (mi_row = start; mi_row < stop;
for (mi_row = lf_data->start; mi_row < lf_data->stop;
mi_row += lf_sync->num_workers * MI_BLOCK_SIZE) {
MODE_INFO **const mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
MODE_INFO **const mi =
lf_data->cm->mi_grid_visible + mi_row * lf_data->cm->mi_stride;
for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) {
for (mi_col = 0; mi_col < lf_data->cm->mi_cols; mi_col += MI_BLOCK_SIZE) {
const int r = mi_row >> MI_BLOCK_SIZE_LOG2;
const int c = mi_col >> MI_BLOCK_SIZE_LOG2;
LOOP_FILTER_MASK lfm;
int plane;
// TODO(wenhao.zhang@intel.com): For better parallelization, reorder
// the outer loop to column-based and remove the synchronizations here.
sync_read(lf_sync, r, c);
av1_setup_dst_planes(planes, frame_buffer, mi_row, mi_col);
// TODO(JBB): Make setup_mask work for non 420.
av1_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, &lfm);
av1_filter_block_plane_ss00(cm, &planes[0], mi_row, &lfm);
for (plane = 1; plane < num_planes; ++plane) {
switch (path) {
case LF_PATH_420:
av1_filter_block_plane_ss11(cm, &planes[plane], mi_row, &lfm);
break;
case LF_PATH_444:
av1_filter_block_plane_ss00(cm, &planes[plane], mi_row, &lfm);
break;
case LF_PATH_SLOW:
av1_filter_block_plane_non420(cm, &planes[plane], mi + mi_col,
mi_row, mi_col);
break;
}
}
av1_setup_dst_planes(lf_data->planes, lf_data->frame_buffer, mi_row,
mi_col);
av1_setup_mask(lf_data->cm, mi_row, mi_col, mi + mi_col,
lf_data->cm->mi_stride, &lfm);
for (plane = 0; plane < num_planes; ++plane)
loop_filter_block_plane_hor(lf_data->cm, lf_data->planes, plane, mi_row,
path, &lfm);
sync_write(lf_sync, r, c, sb_cols);
}
}
return 1;
}
// Row-based multi-threaded loopfilter hook
#else // CONFIG_PARALLEL_DEBLOCKING
static int loop_filter_row_worker(AV1LfSync *const lf_sync,
LFWorkerData *const lf_data) {
thread_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes,
lf_data->start, lf_data->stop, lf_data->y_only,
lf_sync);
const int num_planes = lf_data->y_only ? 1 : MAX_MB_PLANE;
const int sb_cols =
mi_cols_aligned_to_sb(lf_data->cm->mi_cols) >> MI_BLOCK_SIZE_LOG2;
int mi_row, mi_col;
enum lf_path path = get_loop_filter_path(lf_data->y_only, lf_data->planes);
for (mi_row = lf_data->start; mi_row < lf_data->stop;
mi_row += lf_sync->num_workers * MI_BLOCK_SIZE) {
MODE_INFO **const mi =
lf_data->cm->mi_grid_visible + mi_row * lf_data->cm->mi_stride;
for (mi_col = 0; mi_col < lf_data->cm->mi_cols; mi_col += MI_BLOCK_SIZE) {
const int r = mi_row >> MI_BLOCK_SIZE_LOG2;
const int c = mi_col >> MI_BLOCK_SIZE_LOG2;
LOOP_FILTER_MASK lfm;
int plane;
sync_read(lf_sync, r, c);
av1_setup_dst_planes(lf_data->planes, lf_data->frame_buffer, mi_row,
mi_col);
av1_setup_mask(lf_data->cm, mi_row, mi_col, mi + mi_col,
lf_data->cm->mi_stride, &lfm);
for (plane = 0; plane < num_planes; ++plane) {
loop_filter_block_plane_ver(lf_data->cm, lf_data->planes, plane, mi,
mi_row, mi_col, path, &lfm);
loop_filter_block_plane_hor(lf_data->cm, lf_data->planes, plane, mi_row,
path, &lfm);
}
sync_write(lf_sync, r, c, sb_cols);
}
}
return 1;
}
#endif // CONFIG_PARALLEL_DEBLOCKING
static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
struct macroblockd_plane planes[MAX_MB_PLANE],
......@@ -170,9 +267,6 @@ static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
av1_loop_filter_alloc(lf_sync, cm, sb_rows, cm->width, num_workers);
}
// Initialize cur_sb_col to -1 for all SB rows.
memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows);
// Set up loopfilter thread data.
// The decoder is capping num_workers because it has been observed that using
// more threads on the loopfilter than there are cores will hurt performance
......@@ -181,6 +275,71 @@ static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
// tries to use more threads for the loopfilter, it will hurt performance
// because of contention. If the multithreading code changes in the future
// then the number of workers used by the loopfilter should be revisited.
#if CONFIG_PARALLEL_DEBLOCKING
// Initialize cur_sb_col to -1 for all SB rows.
memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows);
// Filter all the vertical edges in the whole frame
for (i = 0; i < num_workers; ++i) {
AVxWorker *const worker = &workers[i];
LFWorkerData *const lf_data = &lf_sync->lfdata[i];
worker->hook = (AVxWorkerHook)loop_filter_ver_row_worker;
worker->data1 = lf_sync;
worker->data2 = lf_data;
// Loopfilter data
av1_loop_filter_data_reset(lf_data, frame, cm, planes);
lf_data->start = start + i * MI_BLOCK_SIZE;
lf_data->stop = stop;
lf_data->y_only = y_only;
// Start loopfiltering
if (i == num_workers - 1) {
winterface->execute(worker);
} else {
winterface->launch(worker);
}
}
// Wait till all rows are finished
for (i = 0; i < num_workers; ++i) {
winterface->sync(&workers[i]);
}
memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows);
// Filter all the horizontal edges in the whole frame
for (i = 0; i < num_workers; ++i) {
AVxWorker *const worker = &workers[i];
LFWorkerData *const lf_data = &lf_sync->lfdata[i];
worker->hook = (AVxWorkerHook)loop_filter_hor_row_worker;
worker->data1 = lf_sync;
worker->data2 = lf_data;
// Loopfilter data
av1_loop_filter_data_reset(lf_data, frame, cm, planes);
lf_data->start = start + i * MI_BLOCK_SIZE;
lf_data->stop = stop;
lf_data->y_only = y_only;
// Start loopfiltering
if (i == num_workers - 1) {
winterface->execute(worker);
} else {
winterface->launch(worker);
}
}
// Wait till all rows are finished
for (i = 0; i < num_workers; ++i) {
winterface->sync(&workers[i]);
}
#else // CONFIG_PARALLEL_DEBLOCKING
// Initialize cur_sb_col to -1 for all SB rows.
memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows);
for (i = 0; i < num_workers; ++i) {
AVxWorker *const worker = &workers[i];
LFWorkerData *const lf_data = &lf_sync->lfdata[i];
......@@ -207,6 +366,7 @@ static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
for (i = 0; i < num_workers; ++i) {
winterface->sync(&workers[i]);
}
#endif // CONFIG_PARALLEL_DEBLOCKING
}
void av1_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
......
......@@ -263,6 +263,7 @@ EXPERIMENT_LIST="
supertx
ans
daala_ec
parallel_deblocking
"
CONFIG_LIST="
dependency_tracking
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment