Commit 15149484 authored by Ryan Lei's avatar Ryan Lei Committed by Yaowu Xu

Add parallel-deblocking experiment

This commit is a manual cherry-pick from aom/master:
42ff3881

Change-Id: I4a3cdb939b7b96a3aa27f6a00da7a0e73222f3f3
parent 5a1fedfd
...@@ -1183,9 +1183,10 @@ static void highbd_filter_selectively_vert( ...@@ -1183,9 +1183,10 @@ static void highbd_filter_selectively_vert(
} }
#endif // CONFIG_AOM_HIGHBITDEPTH #endif // CONFIG_AOM_HIGHBITDEPTH
void av1_filter_block_plane_non420(AV1_COMMON *cm, void av1_filter_block_plane_non420_ver(AV1_COMMON *cm,
struct macroblockd_plane *plane, struct macroblockd_plane *plane,
MODE_INFO **mib, int mi_row, int mi_col) { MODE_INFO **mib, int mi_row,
int mi_col) {
const int ss_x = plane->subsampling_x; const int ss_x = plane->subsampling_x;
const int ss_y = plane->subsampling_y; const int ss_y = plane->subsampling_y;
const int row_step = 1 << ss_y; const int row_step = 1 << ss_y;
...@@ -1369,6 +1370,22 @@ void av1_filter_block_plane_non420(AV1_COMMON *cm, ...@@ -1369,6 +1370,22 @@ void av1_filter_block_plane_non420(AV1_COMMON *cm,
// Now do horizontal pass // Now do horizontal pass
dst->buf = dst0; dst->buf = dst0;
}
void av1_filter_block_plane_non420_hor(AV1_COMMON *cm,
struct macroblockd_plane *plane,
int mi_row) {
const int ss_y = plane->subsampling_y;
const int row_step = 1 << ss_y;
struct buf_2d *const dst = &plane->dst;
uint8_t *const dst0 = dst->buf;
unsigned int mask_16x16[MAX_MIB_SIZE] = { 0 };
unsigned int mask_8x8[MAX_MIB_SIZE] = { 0 };
unsigned int mask_4x4[MAX_MIB_SIZE] = { 0 };
unsigned int mask_4x4_int[MAX_MIB_SIZE] = { 0 };
uint8_t lfl[MAX_MIB_SIZE][MAX_MIB_SIZE];
int r;
for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r += row_step) { for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r += row_step) {
const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1; const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1;
const unsigned int mask_4x4_int_r = skip_border_4x4_r ? 0 : mask_4x4_int[r]; const unsigned int mask_4x4_int_r = skip_border_4x4_r ? 0 : mask_4x4_int[r];
...@@ -1404,11 +1421,12 @@ void av1_filter_block_plane_non420(AV1_COMMON *cm, ...@@ -1404,11 +1421,12 @@ void av1_filter_block_plane_non420(AV1_COMMON *cm,
#endif // CONFIG_AOM_HIGHBITDEPTH #endif // CONFIG_AOM_HIGHBITDEPTH
dst->buf += MI_SIZE * dst->stride; dst->buf += MI_SIZE * dst->stride;
} }
dst->buf = dst0;
} }
void av1_filter_block_plane_ss00(AV1_COMMON *const cm, void av1_filter_block_plane_ss00_ver(AV1_COMMON *const cm,
struct macroblockd_plane *const plane, struct macroblockd_plane *const plane,
int mi_row, LOOP_FILTER_MASK *lfm) { int mi_row, LOOP_FILTER_MASK *lfm) {
struct buf_2d *const dst = &plane->dst; struct buf_2d *const dst = &plane->dst;
uint8_t *const dst0 = dst->buf; uint8_t *const dst0 = dst->buf;
int r; int r;
...@@ -1452,10 +1470,20 @@ void av1_filter_block_plane_ss00(AV1_COMMON *const cm, ...@@ -1452,10 +1470,20 @@ void av1_filter_block_plane_ss00(AV1_COMMON *const cm,
// Horizontal pass // Horizontal pass
dst->buf = dst0; dst->buf = dst0;
mask_16x16 = lfm->above_y[TX_16X16]; }
mask_8x8 = lfm->above_y[TX_8X8];
mask_4x4 = lfm->above_y[TX_4X4]; void av1_filter_block_plane_ss00_hor(AV1_COMMON *const cm,
mask_4x4_int = lfm->int_4x4_y; struct macroblockd_plane *const plane,
int mi_row, LOOP_FILTER_MASK *lfm) {
struct buf_2d *const dst = &plane->dst;
uint8_t *const dst0 = dst->buf;
int r;
uint64_t mask_16x16 = lfm->above_y[TX_16X16];
uint64_t mask_8x8 = lfm->above_y[TX_8X8];
uint64_t mask_4x4 = lfm->above_y[TX_4X4];
uint64_t mask_4x4_int = lfm->int_4x4_y;
assert(plane->subsampling_x == 0 && plane->subsampling_y == 0);
for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r++) { for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r++) {
unsigned int mask_16x16_r; unsigned int mask_16x16_r;
...@@ -1495,11 +1523,13 @@ void av1_filter_block_plane_ss00(AV1_COMMON *const cm, ...@@ -1495,11 +1523,13 @@ void av1_filter_block_plane_ss00(AV1_COMMON *const cm,
mask_4x4 >>= MI_SIZE; mask_4x4 >>= MI_SIZE;
mask_4x4_int >>= MI_SIZE; mask_4x4_int >>= MI_SIZE;
} }
// restore the buf pointer in case there is additional filter pass.
dst->buf = dst0;
} }
void av1_filter_block_plane_ss11(AV1_COMMON *const cm, void av1_filter_block_plane_ss11_ver(AV1_COMMON *const cm,
struct macroblockd_plane *const plane, struct macroblockd_plane *const plane,
int mi_row, LOOP_FILTER_MASK *lfm) { int mi_row, LOOP_FILTER_MASK *lfm) {
struct buf_2d *const dst = &plane->dst; struct buf_2d *const dst = &plane->dst;
uint8_t *const dst0 = dst->buf; uint8_t *const dst0 = dst->buf;
int r, c; int r, c;
...@@ -1554,10 +1584,20 @@ void av1_filter_block_plane_ss11(AV1_COMMON *const cm, ...@@ -1554,10 +1584,20 @@ void av1_filter_block_plane_ss11(AV1_COMMON *const cm,
// Horizontal pass // Horizontal pass
dst->buf = dst0; dst->buf = dst0;
mask_16x16 = lfm->above_uv[TX_16X16]; }
mask_8x8 = lfm->above_uv[TX_8X8];
mask_4x4 = lfm->above_uv[TX_4X4]; void av1_filter_block_plane_ss11_hor(AV1_COMMON *const cm,
mask_4x4_int = lfm->above_int_4x4_uv; struct macroblockd_plane *const plane,
int mi_row, LOOP_FILTER_MASK *lfm) {
struct buf_2d *const dst = &plane->dst;
uint8_t *const dst0 = dst->buf;
int r;
uint64_t mask_16x16 = lfm->above_uv[TX_16X16];
uint64_t mask_8x8 = lfm->above_uv[TX_8X8];
uint64_t mask_4x4 = lfm->above_uv[TX_4X4];
uint64_t mask_4x4_int = lfm->above_int_4x4_uv;
assert(plane->subsampling_x == 1 && plane->subsampling_y == 1);
for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r += 2) { for (r = 0; r < cm->mib_size && mi_row + r < cm->mi_rows; r += 2) {
const int skip_border_4x4_r = mi_row + r == cm->mi_rows - 1; const int skip_border_4x4_r = mi_row + r == cm->mi_rows - 1;
...@@ -1600,6 +1640,8 @@ void av1_filter_block_plane_ss11(AV1_COMMON *const cm, ...@@ -1600,6 +1640,8 @@ void av1_filter_block_plane_ss11(AV1_COMMON *const cm,
mask_4x4 >>= MI_SIZE / 2; mask_4x4 >>= MI_SIZE / 2;
mask_4x4_int >>= MI_SIZE / 2; mask_4x4_int >>= MI_SIZE / 2;
} }
// restore the buf pointer in case there is additional filter pass.
dst->buf = dst0;
} }
void av1_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, AV1_COMMON *cm, void av1_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, AV1_COMMON *cm,
...@@ -1622,12 +1664,14 @@ void av1_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, AV1_COMMON *cm, ...@@ -1622,12 +1664,14 @@ void av1_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, AV1_COMMON *cm,
av1_setup_dst_planes(planes, frame_buffer, mi_row, mi_col); av1_setup_dst_planes(planes, frame_buffer, mi_row, mi_col);
for (plane = 0; plane < num_planes; ++plane) for (plane = 0; plane < num_planes; ++plane) {
av1_filter_block_plane_non420(cm, &planes[plane], mi + mi_col, mi_row, av1_filter_block_plane_non420_ver(cm, &planes[plane], mi + mi_col,
mi_col); mi_row, mi_col);
av1_filter_block_plane_non420_hor(cm, &planes[plane], mi_row);
}
} }
} }
#else #else // CONFIG_VAR_TX || CONFIG_EXT_PARTITION || CONFIG_EXT_PARTITION_TYPES
const int num_planes = y_only ? 1 : MAX_MB_PLANE; const int num_planes = y_only ? 1 : MAX_MB_PLANE;
int mi_row, mi_col; int mi_row, mi_col;
enum lf_path path; enum lf_path path;
...@@ -1641,7 +1685,34 @@ void av1_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, AV1_COMMON *cm, ...@@ -1641,7 +1685,34 @@ void av1_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, AV1_COMMON *cm,
path = LF_PATH_444; path = LF_PATH_444;
else else
path = LF_PATH_SLOW; path = LF_PATH_SLOW;
#if CONFIG_PARALLEL_DEBLOCKING
for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MAX_MIB_SIZE) {
int plane;
av1_setup_dst_planes(planes, frame_buffer, mi_row, mi_col);
// TODO(JBB): Make setup_mask work for non 420.
av1_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, &lfm);
av1_filter_block_plane_ss00_ver(cm, &planes[0], mi_row, &lfm);
for (plane = 1; plane < num_planes; ++plane) {
switch (path) {
case LF_PATH_420:
av1_filter_block_plane_ss11_ver(cm, &planes[plane], mi_row, &lfm);
break;
case LF_PATH_444:
av1_filter_block_plane_ss00_ver(cm, &planes[plane], mi_row, &lfm);
break;
case LF_PATH_SLOW:
av1_filter_block_plane_non420_ver(cm, &planes[plane], mi + mi_col,
mi_row, mi_col);
break;
}
}
}
}
for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) { for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride; MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MAX_MIB_SIZE) { for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MAX_MIB_SIZE) {
...@@ -1652,23 +1723,56 @@ void av1_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, AV1_COMMON *cm, ...@@ -1652,23 +1723,56 @@ void av1_loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, AV1_COMMON *cm,
// TODO(JBB): Make setup_mask work for non 420. // TODO(JBB): Make setup_mask work for non 420.
av1_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, &lfm); av1_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, &lfm);
av1_filter_block_plane_ss00(cm, &planes[0], mi_row, &lfm); av1_filter_block_plane_ss00_hor(cm, &planes[0], mi_row, &lfm);
for (plane = 1; plane < num_planes; ++plane) { for (plane = 1; plane < num_planes; ++plane) {
switch (path) { switch (path) {
case LF_PATH_420: case LF_PATH_420:
av1_filter_block_plane_ss11(cm, &planes[plane], mi_row, &lfm); av1_filter_block_plane_ss11_hor(cm, &planes[plane], mi_row, &lfm);
break; break;
case LF_PATH_444: case LF_PATH_444:
av1_filter_block_plane_ss00(cm, &planes[plane], mi_row, &lfm); av1_filter_block_plane_ss00_hor(cm, &planes[plane], mi_row, &lfm);
break; break;
case LF_PATH_SLOW: case LF_PATH_SLOW:
av1_filter_block_plane_non420(cm, &planes[plane], mi + mi_col, av1_filter_block_plane_non420_hor(cm, &planes[plane], mi_row);
mi_row, mi_col); break;
}
}
}
}
#else // CONFIG_PARALLEL_DEBLOCKING
for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MAX_MIB_SIZE) {
int plane;
av1_setup_dst_planes(planes, frame_buffer, mi_row, mi_col);
// TODO(JBB): Make setup_mask work for non 420.
av1_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, &lfm);
av1_filter_block_plane_ss00_ver(cm, &planes[0], mi_row, &lfm);
av1_filter_block_plane_ss00_hor(cm, &planes[0], mi_row, &lfm);
for (plane = 1; plane < num_planes; ++plane) {
switch (path) {
case LF_PATH_420:
av1_filter_block_plane_ss11_ver(cm, &planes[plane], mi_row, &lfm);
av1_filter_block_plane_ss11_hor(cm, &planes[plane], mi_row, &lfm);
break;
case LF_PATH_444:
av1_filter_block_plane_ss00_ver(cm, &planes[plane], mi_row, &lfm);
av1_filter_block_plane_ss00_hor(cm, &planes[plane], mi_row, &lfm);
break;
case LF_PATH_SLOW:
av1_filter_block_plane_non420_ver(cm, &planes[plane], mi + mi_col,
mi_row, mi_col);
av1_filter_block_plane_non420_hor(cm, &planes[plane], mi_row);
break; break;
} }
} }
} }
} }
#endif // CONFIG_PARALLEL_DEBLOCKING
#endif // CONFIG_VAR_TX || CONFIG_EXT_PARTITION || CONFIG_EXT_PARTITION_TYPES #endif // CONFIG_VAR_TX || CONFIG_EXT_PARTITION || CONFIG_EXT_PARTITION_TYPES
} }
......
...@@ -99,17 +99,26 @@ void av1_setup_mask(struct AV1Common *const cm, const int mi_row, ...@@ -99,17 +99,26 @@ void av1_setup_mask(struct AV1Common *const cm, const int mi_row,
const int mi_col, MODE_INFO **mi_8x8, const int mi_col, MODE_INFO **mi_8x8,
const int mode_info_stride, LOOP_FILTER_MASK *lfm); const int mode_info_stride, LOOP_FILTER_MASK *lfm);
void av1_filter_block_plane_ss00(struct AV1Common *const cm, void av1_filter_block_plane_ss00_ver(struct AV1Common *const cm,
struct macroblockd_plane *const plane, struct macroblockd_plane *const plane,
int mi_row, LOOP_FILTER_MASK *lfm); int mi_row, LOOP_FILTER_MASK *lfm);
void av1_filter_block_plane_ss00_hor(struct AV1Common *const cm,
void av1_filter_block_plane_ss11(struct AV1Common *const cm, struct macroblockd_plane *const plane,
struct macroblockd_plane *const plane, int mi_row, LOOP_FILTER_MASK *lfm);
int mi_row, LOOP_FILTER_MASK *lfm); void av1_filter_block_plane_ss11_ver(struct AV1Common *const cm,
struct macroblockd_plane *const plane,
void av1_filter_block_plane_non420(struct AV1Common *cm, int mi_row, LOOP_FILTER_MASK *lfm);
struct macroblockd_plane *plane, void av1_filter_block_plane_ss11_hor(struct AV1Common *const cm,
MODE_INFO **mi_8x8, int mi_row, int mi_col); struct macroblockd_plane *const plane,
int mi_row, LOOP_FILTER_MASK *lfm);
void av1_filter_block_plane_non420_ver(struct AV1Common *cm,
struct macroblockd_plane *plane,
MODE_INFO **mi_8x8, int mi_row,
int mi_col);
void av1_filter_block_plane_non420_hor(struct AV1Common *cm,
struct macroblockd_plane *plane,
int mi_row);
void av1_loop_filter_init(struct AV1Common *cm); void av1_loop_filter_init(struct AV1Common *cm);
......
...@@ -85,25 +85,153 @@ static INLINE void sync_write(AV1LfSync *const lf_sync, int r, int c, ...@@ -85,25 +85,153 @@ static INLINE void sync_write(AV1LfSync *const lf_sync, int r, int c,
#endif // CONFIG_MULTITHREAD #endif // CONFIG_MULTITHREAD
} }
// Implement row loopfiltering for each thread.
static INLINE void thread_loop_filter_rows(
const YV12_BUFFER_CONFIG *const frame_buffer, AV1_COMMON *const cm,
struct macroblockd_plane planes[MAX_MB_PLANE], int start, int stop,
int y_only, AV1LfSync *const lf_sync) {
const int num_planes = y_only ? 1 : MAX_MB_PLANE;
const int sb_cols = mi_cols_aligned_to_sb(cm) >> cm->mib_size_log2;
int mi_row, mi_col;
#if !CONFIG_EXT_PARTITION_TYPES #if !CONFIG_EXT_PARTITION_TYPES
enum lf_path path; static INLINE enum lf_path get_loop_filter_path(
LOOP_FILTER_MASK lfm; int y_only, struct macroblockd_plane planes[MAX_MB_PLANE]) {
if (y_only) if (y_only)
path = LF_PATH_444; return LF_PATH_444;
else if (planes[1].subsampling_y == 1 && planes[1].subsampling_x == 1) else if (planes[1].subsampling_y == 1 && planes[1].subsampling_x == 1)
path = LF_PATH_420; return LF_PATH_420;
else if (planes[1].subsampling_y == 0 && planes[1].subsampling_x == 0) else if (planes[1].subsampling_y == 0 && planes[1].subsampling_x == 0)
path = LF_PATH_444; return LF_PATH_444;
else else
path = LF_PATH_SLOW; return LF_PATH_SLOW;
}
static INLINE void loop_filter_block_plane_ver(
AV1_COMMON *cm, struct macroblockd_plane planes[MAX_MB_PLANE], int plane,
MODE_INFO **mi, int mi_row, int mi_col, enum lf_path path,
LOOP_FILTER_MASK *lfm) {
if (plane == 0) {
av1_filter_block_plane_ss00_ver(cm, &planes[0], mi_row, lfm);
} else {
switch (path) {
case LF_PATH_420:
av1_filter_block_plane_ss11_ver(cm, &planes[plane], mi_row, lfm);
break;
case LF_PATH_444:
av1_filter_block_plane_ss00_ver(cm, &planes[plane], mi_row, lfm);
break;
case LF_PATH_SLOW:
av1_filter_block_plane_non420_ver(cm, &planes[plane], mi + mi_col,
mi_row, mi_col);
break;
}
}
}
static INLINE void loop_filter_block_plane_hor(
AV1_COMMON *cm, struct macroblockd_plane planes[MAX_MB_PLANE], int plane,
int mi_row, enum lf_path path, LOOP_FILTER_MASK *lfm) {
if (plane == 0) {
av1_filter_block_plane_ss00_hor(cm, &planes[0], mi_row, lfm);
} else {
switch (path) {
case LF_PATH_420:
av1_filter_block_plane_ss11_hor(cm, &planes[plane], mi_row, lfm);
break;
case LF_PATH_444:
av1_filter_block_plane_ss00_hor(cm, &planes[plane], mi_row, lfm);
break;
case LF_PATH_SLOW:
av1_filter_block_plane_non420_hor(cm, &planes[plane], mi_row);
break;
}
}
}
#endif
// Row-based multi-threaded loopfilter hook
#if CONFIG_PARALLEL_DEBLOCKING
static int loop_filter_ver_row_worker(AV1LfSync *const lf_sync,
LFWorkerData *const lf_data) {
const int num_planes = lf_data->y_only ? 1 : MAX_MB_PLANE;
int mi_row, mi_col;
#if !CONFIG_EXT_PARTITION_TYPES
enum lf_path path = get_loop_filter_path(lf_data->y_only, lf_data->planes);
#endif
for (mi_row = lf_data->start; mi_row < lf_data->stop;
mi_row += lf_sync->num_workers * lf_data->cm->mib_size) {
MODE_INFO **const mi =
lf_data->cm->mi_grid_visible + mi_row * lf_data->cm->mi_stride;
for (mi_col = 0; mi_col < lf_data->cm->mi_cols;
mi_col += lf_data->cm->mib_size) {
LOOP_FILTER_MASK lfm;
int plane;
av1_setup_dst_planes(lf_data->planes, lf_data->frame_buffer, mi_row,
mi_col);
av1_setup_mask(lf_data->cm, mi_row, mi_col, mi + mi_col,
lf_data->cm->mi_stride, &lfm);
#if CONFIG_EXT_PARTITION_TYPES
for (plane = 0; plane < num_planes; ++plane)
av1_filter_block_plane_non420_ver(lf_data->cm, &lf_data->planes[plane],
mi + mi_col, mi_row, mi_col);
#else
for (plane = 0; plane < num_planes; ++plane)
loop_filter_block_plane_ver(lf_data->cm, lf_data->planes, plane, mi,
mi_row, mi_col, path, &lfm);
#endif
}
}
return 1;
}
static int loop_filter_hor_row_worker(AV1LfSync *const lf_sync,
LFWorkerData *const lf_data) {
const int num_planes = lf_data->y_only ? 1 : MAX_MB_PLANE;
const int sb_cols =
mi_cols_aligned_to_sb(lf_data->cm) >> lf_data->cm->mib_size_log2;
int mi_row, mi_col;
#if !CONFIG_EXT_PARTITION_TYPES
enum lf_path path = get_loop_filter_path(lf_data->y_only, lf_data->planes);
#endif
for (mi_row = lf_data->start; mi_row < lf_data->stop;
mi_row += lf_sync->num_workers * lf_data->cm->mib_size) {
MODE_INFO **const mi =
lf_data->cm->mi_grid_visible + mi_row * lf_data->cm->mi_stride;
for (mi_col = 0; mi_col < lf_data->cm->mi_cols;
mi_col += lf_data->cm->mib_size) {
const int r = mi_row >> lf_data->cm->mib_size_log2;
const int c = mi_col >> lf_data->cm->mib_size_log2;
LOOP_FILTER_MASK lfm;
int plane;
// TODO(wenhao.zhang@intel.com): For better parallelization, reorder
// the outer loop to column-based and remove the synchronizations here.
sync_read(lf_sync, r, c);
av1_setup_dst_planes(lf_data->planes, lf_data->frame_buffer, mi_row,
mi_col);
av1_setup_mask(lf_data->cm, mi_row, mi_col, mi + mi_col,
lf_data->cm->mi_stride, &lfm);
#if CONFIG_EXT_PARTITION_TYPES
for (plane = 0; plane < num_planes; ++plane)
av1_filter_block_plane_non420_hor(lf_data->cm, &lf_data->planes[plane],
mi_row);
#else
for (plane = 0; plane < num_planes; ++plane)
loop_filter_block_plane_hor(lf_data->cm, lf_data->planes, plane, mi_row,
path, &lfm);
#endif
sync_write(lf_sync, r, c, sb_cols);
}
}
return 1;
}
#else // CONFIG_PARALLEL_DEBLOCKING
static int loop_filter_row_worker(AV1LfSync *const lf_sync,
LFWorkerData *const lf_data) {
const int num_planes = lf_data->y_only ? 1 : MAX_MB_PLANE;
const int sb_cols =
mi_cols_aligned_to_sb(lf_data->cm) >> lf_data->cm->mib_size_log2;
int mi_row, mi_col;
#if !CONFIG_EXT_PARTITION_TYPES
enum lf_path path = get_loop_filter_path(lf_data->y_only, lf_data->planes);
#endif // !CONFIG_EXT_PARTITION_TYPES #endif // !CONFIG_EXT_PARTITION_TYPES
#if CONFIG_EXT_PARTITION #if CONFIG_EXT_PARTITION
...@@ -113,56 +241,48 @@ static INLINE void thread_loop_filter_rows( ...@@ -113,56 +241,48 @@ static INLINE void thread_loop_filter_rows(
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
#endif // CONFIG_EXT_PARTITION #endif // CONFIG_EXT_PARTITION
for (mi_row = start; mi_row < stop; for (mi_row = lf_data->start; mi_row < lf_data->stop;
mi_row += lf_sync->num_workers * cm->mib_size) { mi_row += lf_sync->num_workers * lf_data->cm->mib_size) {
MODE_INFO **const mi = cm->mi_grid_visible + mi_row * cm->mi_stride; MODE_INFO **const mi =
lf_data->cm->mi_grid_visible + mi_row * lf_data->cm->mi_stride;
for (mi_col = 0; mi_col < cm->mi_cols; mi_col += cm->mib_size) { for (mi_col = 0; mi_col < lf_data->cm->mi_cols;
const int r = mi_row >> cm->mib_size_log2; mi_col += lf_data->cm->mib_size) {
const int c = mi_col >> cm->mib_size_log2; const int r = mi_row >> lf_data->cm->mib_size_log2;
const int c = mi_col >> lf_data->cm->mib_size_log2;
#if !CONFIG_EXT_PARTITION_TYPES
LOOP_FILTER_MASK lfm;
#endif
int plane; int plane;
sync_read(lf_sync, r, c); sync_read(lf_sync, r, c);
av1_setup_dst_planes(planes, frame_buffer, mi_row, mi_col); av1_setup_dst_planes(lf_data->planes, lf_data->frame_buffer, mi_row,
mi_col);
#if CONFIG_EXT_PARTITION_TYPES #if CONFIG_EXT_PARTITION_TYPES
for (plane = 0; plane < num_planes; ++plane) for (plane = 0; plane < num_planes; ++plane) {
av1_filter_block_plane_non420(cm, &planes[plane], mi + mi_col, mi_row, av1_filter_block_plane_non420_ver(lf_data->cm, &lf_data->planes[plane],
mi_col); mi + mi_col, mi_row, mi_col);
av1_filter_block_plane_non420_hor(lf_data->cm, &lf_data->planes[plane],
mi_row);
}
#else #else
// TODO(JBB): Make setup_mask work for non 420. av1_setup_mask(lf_data->cm, mi_row, mi_col, mi + mi_col,
av1_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, &lfm); lf_data->cm->mi_stride, &lfm);
av1_filter_block_plane_ss00(cm, &planes[0], mi_row, &lfm); for (plane = 0; plane < num_planes; ++plane) {
for (plane = 1; plane < num_planes; ++plane) { loop_filter_block_plane_ver(lf_data->cm, lf_data->planes, plane, mi,
switch (path) { mi_row, mi_col, path, &lfm);
case LF_PATH_420: loop_filter_block_plane_hor(lf_data->cm, lf_data->planes, plane, mi_row,
av1_filter_block_plane_ss11(cm, &planes[plane], mi_row, &lfm); path, &lfm);
break;
case LF_PATH_444:
av1_filter_block_plane_ss00(cm, &planes[plane], mi_row, &lfm);
break;
case LF_PATH_SLOW:
av1_filter_block_plane_non420(cm, &planes[plane], mi + mi_col,
mi_row, mi_col);
break;
}
} }
#endif // CONFIG_EXT_PARTITION_TYPES #endif // CONFIG_EXT_PARTITION_TYPES
sync_write(lf_sync, r, c, sb_cols); sync_write(lf_sync, r, c, sb_cols);
} }
} }
}
// Row-based multi-threaded loopfilter hook