Commit dee00eb0 authored by Rupert Swarbrick's avatar Rupert Swarbrick Committed by Debargha Mukherjee
Browse files

Correct striped-loop-restoration with multiple tile rows

Before this patch, striped loop restoration didn't restart correctly
on each tile row. Now, the loop restoration stripes start at the top
of a tile row in the same way as if it were the top of the entire
frame.

Change-Id: I0a88a28d7804b2f09d792ecbbf4f22f666f67012
parent 369d8f22
......@@ -141,37 +141,60 @@ void av1_alloc_restoration_buffers(AV1_COMMON *cm) {
(int32_t *)aom_memalign(16, RESTORATION_TMPBUF_SIZE));
#if CONFIG_STRIPED_LOOP_RESTORATION
// For striped loop restoration, we divide each row of tiles into "stripes",
// of height 64 luma pixels but with an offset by RESTORATION_TILE_OFFSET
// luma pixels to match the output from CDEF. We will need to store 2 *
// RESTORATION_CTX_VERT lines of data for each stripe, and also need to be
// able to quickly answer the question "Where is the <n>'th stripe for tile
// row <m>?" To make that efficient, we generate the rst_last_stripe array.
int num_stripes = 0;
for (int i = 0; i < cm->tile_rows; ++i) {
#if CONFIG_MAX_TILE
const int sb_h = cm->tile_row_start_sb[i + 1] - cm->tile_row_start_sb[i];
const int mi_h = sb_h << MAX_MIB_SIZE_LOG2;
#else
const int mi_h = ((i + 1) < cm->tile_rows)
? cm->tile_height
: (cm->mi_rows - i * cm->tile_height);
#endif
const int ext_h = RESTORATION_TILE_OFFSET + (mi_h << MI_SIZE_LOG2);
const int tile_stripes = (ext_h + 63) / 64;
num_stripes += tile_stripes;
cm->rst_end_stripe[i] = num_stripes;
}
// Now we need to allocate enough space to store the line buffers for the
// stripes
#if CONFIG_FRAME_SUPERRES
int width = cm->superres_upscaled_width;
int height = cm->superres_upscaled_height;
const int frame_w = cm->superres_upscaled_width;
#else
int width = cm->width;
int height = cm->height;
const int frame_w = cm->width;
#endif // CONFIG_FRAME_SUPERRES
// Allocate internal storage for the loop restoration stripe boundary lines
#if CONFIG_HIGHBITDEPTH
const int use_highbd = cm->use_highbitdepth ? 1 : 0;
#else
const int use_highbd = 0;
#endif
for (int p = 0; p < MAX_MB_PLANE; ++p) {
int w = p == 0 ? width : ROUND_POWER_OF_TWO(width, cm->subsampling_x);
int align_bits = 5; // align for efficiency
int stride = ALIGN_POWER_OF_TWO(w + 2 * RESTORATION_EXTRA_HORZ, align_bits);
int num_stripes = (height + 63) / 64;
// for each processing stripe: 2 lines above, 2 below
int buf_size = num_stripes * RESTORATION_CTX_VERT * stride;
uint8_t *above_buf, *below_buf;
const int is_uv = p > 0;
const int ss_x = is_uv && cm->subsampling_x;
const int plane_w = (frame_w + 2 * RESTORATION_EXTRA_HORZ + ss_x) >> ss_x;
const int align_bits = 5; // align for efficiency
const int stride = ALIGN_POWER_OF_TWO(plane_w, align_bits);
const int buf_size = num_stripes * stride * RESTORATION_CTX_VERT
<< use_highbd;
RestorationStripeBoundaries *boundaries = &cm->rst_info[p].boundaries;
aom_free(boundaries->stripe_boundary_above);
aom_free(boundaries->stripe_boundary_below);
#if CONFIG_HIGHBITDEPTH
if (cm->use_highbitdepth) buf_size = buf_size * 2;
#endif
CHECK_MEM_ERROR(cm, above_buf,
CHECK_MEM_ERROR(cm, boundaries->stripe_boundary_above,
(uint8_t *)aom_memalign(1 << align_bits, buf_size));
CHECK_MEM_ERROR(cm, below_buf,
CHECK_MEM_ERROR(cm, boundaries->stripe_boundary_below,
(uint8_t *)aom_memalign(1 << align_bits, buf_size));
boundaries->stripe_boundary_above = above_buf;
boundaries->stripe_boundary_below = below_buf;
boundaries->stripe_boundary_stride = stride;
}
#endif // CONFIG_STRIPED_LOOP_RESTORATION
......
......@@ -370,6 +370,12 @@ typedef struct AV1Common {
#endif // CONFIG_FRAME_SUPERRES
#if CONFIG_LOOP_RESTORATION
RestorationInfo rst_info[MAX_MB_PLANE];
// rst_end_stripe[i] is one more than the index of the bottom stripe
// for tile row i.
int rst_end_stripe[MAX_TILE_ROWS];
// Pointer to a scratch buffer used by self-guided restoration
int32_t *rst_tmpbuf;
#endif // CONFIG_LOOP_RESTORATION
......
This diff is collapsed.
......@@ -296,7 +296,8 @@ void decode_xq(const int *xqd, int *xq);
// limits is the limits of the unit. rui gives the mode to use for this unit
// and its coefficients. If striped loop restoration is enabled, rsb contains
// deblocked pixels to use for stripe boundaries; rlbs is just some space to
// use as a scratch buffer.
// use as a scratch buffer. tile_rect gives the limits of the tile containing
// this unit. tile_stripe0 is the index of the first stripe in this tile.
//
// ss_x and ss_y are flags which should be 1 if this is a plane with
// horizontal/vertical subsampling, respectively. highbd is a flag which should
......@@ -313,6 +314,7 @@ void av1_loop_restoration_filter_unit(
const RestorationTileLimits *limits, const RestorationUnitInfo *rui,
#if CONFIG_STRIPED_LOOP_RESTORATION
const RestorationStripeBoundaries *rsb, RestorationLineBuffers *rlbs,
const AV1PixelRect *tile_rect, int tile_stripe0,
#endif
int ss_x, int ss_y, int highbd, int bit_depth, uint8_t *data8, int stride,
uint8_t *dst8, int dst_stride, int32_t *tmpbuf);
......@@ -325,6 +327,7 @@ void av1_loop_restoration_filter_frame(YV12_BUFFER_CONFIG *frame,
void av1_loop_restoration_precal();
typedef void (*rest_unit_visitor_t)(const RestorationTileLimits *limits,
const AV1PixelRect *tile_rect,
int rest_unit_idx, void *priv);
typedef void (*rest_tile_start_visitor_t)(int tile_row, int tile_col,
......
......@@ -70,15 +70,103 @@ static int64_t sse_restoration_tile(const RestorationTileLimits *limits,
limits->v_start, limits->v_end - limits->v_start);
}
static int64_t try_restoration_tile(const AV1_COMMON *cm,
const YV12_BUFFER_CONFIG *src,
typedef struct {
// The best coefficients for Wiener or Sgrproj restoration
WienerInfo wiener;
SgrprojInfo sgrproj;
// The sum of squared errors for this rtype.
int64_t sse[RESTORE_SWITCHABLE_TYPES];
// The rtype to use for this unit given a frame rtype as
// index. Indices: WIENER, SGRPROJ, SWITCHABLE.
RestorationType best_rtype[RESTORE_TYPES - 1];
} RestUnitSearchInfo;
typedef struct {
const YV12_BUFFER_CONFIG *src;
YV12_BUFFER_CONFIG *dst;
const AV1_COMMON *cm;
const MACROBLOCK *x;
int plane;
int plane_width;
int plane_height;
RestUnitSearchInfo *rusi;
uint8_t *dgd_buffer;
int dgd_stride;
const uint8_t *src_buffer;
int src_stride;
// sse and bits are initialised by reset_rsc in search_rest_type
int64_t sse;
int64_t bits;
#if CONFIG_STRIPED_LOOP_RESTORATION
int tile_y0, tile_stripe0;
#endif
// sgrproj and wiener are initialised by rsc_on_tile when starting the first
// tile in the frame.
SgrprojInfo sgrproj;
WienerInfo wiener;
} RestSearchCtxt;
static void rsc_on_tile(int tile_row, int tile_col, void *priv) {
(void)tile_col;
RestSearchCtxt *rsc = (RestSearchCtxt *)priv;
set_default_sgrproj(&rsc->sgrproj);
set_default_wiener(&rsc->wiener);
#if CONFIG_STRIPED_LOOP_RESTORATION
rsc->tile_stripe0 =
(tile_row == 0) ? 0 : rsc->cm->rst_end_stripe[tile_row - 1];
#else
(void)tile_row;
#endif
}
static void reset_rsc(RestSearchCtxt *rsc) {
rsc->sse = 0;
rsc->bits = 0;
}
static void init_rsc(const YV12_BUFFER_CONFIG *src, const AV1_COMMON *cm,
const MACROBLOCK *x, int plane, RestUnitSearchInfo *rusi,
YV12_BUFFER_CONFIG *dst, RestSearchCtxt *rsc) {
rsc->src = src;
rsc->dst = dst;
rsc->cm = cm;
rsc->x = x;
rsc->plane = plane;
rsc->rusi = rusi;
const YV12_BUFFER_CONFIG *dgd = cm->frame_to_show;
const int is_uv = plane != AOM_PLANE_Y;
rsc->plane_width = src->crop_widths[is_uv];
rsc->plane_height = src->crop_heights[is_uv];
rsc->src_buffer = src->buffers[plane];
rsc->src_stride = src->strides[is_uv];
rsc->dgd_buffer = dgd->buffers[plane];
rsc->dgd_stride = dgd->strides[is_uv];
assert(src->crop_widths[is_uv] == dgd->crop_widths[is_uv]);
assert(src->crop_heights[is_uv] == dgd->crop_heights[is_uv]);
}
static int64_t try_restoration_tile(const RestSearchCtxt *rsc,
const RestorationTileLimits *limits,
const RestorationUnitInfo *rui,
YV12_BUFFER_CONFIG *dst, int plane) {
const AV1PixelRect *tile_rect,
const RestorationUnitInfo *rui) {
const AV1_COMMON *const cm = rsc->cm;
const int plane = rsc->plane;
const int is_uv = plane > 0;
#if CONFIG_STRIPED_LOOP_RESTORATION
const RestorationInfo *rsi = &cm->rst_info[plane];
RestorationLineBuffers rlbs;
#else
(void)tile_rect;
#endif
#if CONFIG_HIGHBITDEPTH
const int bit_depth = cm->bit_depth;
......@@ -93,13 +181,13 @@ static int64_t try_restoration_tile(const AV1_COMMON *cm,
av1_loop_restoration_filter_unit(
limits, rui,
#if CONFIG_STRIPED_LOOP_RESTORATION
&rsi->boundaries, &rlbs,
&rsi->boundaries, &rlbs, tile_rect, rsc->tile_stripe0,
#endif
is_uv && cm->subsampling_x, is_uv && cm->subsampling_y, highbd, bit_depth,
fts->buffers[plane], fts->strides[is_uv], dst->buffers[plane],
dst->strides[is_uv], cm->rst_tmpbuf);
fts->buffers[plane], fts->strides[is_uv], rsc->dst->buffers[plane],
rsc->dst->strides[is_uv], cm->rst_tmpbuf);
return sse_restoration_tile(limits, src, dst, plane, highbd);
return sse_restoration_tile(limits, rsc->src, rsc->dst, plane, highbd);
}
static int64_t get_pixel_proj_error(const uint8_t *src8, int width, int height,
......@@ -390,82 +478,9 @@ static int count_sgrproj_bits(SgrprojInfo *sgrproj_info,
return bits;
}
typedef struct {
// The best coefficients for Wiener or Sgrproj restoration
WienerInfo wiener;
SgrprojInfo sgrproj;
// The sum of squared errors for this rtype.
int64_t sse[RESTORE_SWITCHABLE_TYPES];
// The rtype to use for this unit given a frame rtype as
// index. Indices: WIENER, SGRPROJ, SWITCHABLE.
RestorationType best_rtype[RESTORE_TYPES - 1];
} RestUnitSearchInfo;
typedef struct {
const YV12_BUFFER_CONFIG *src;
const AV1_COMMON *cm;
const MACROBLOCK *x;
int plane;
int plane_width;
int plane_height;
RestUnitSearchInfo *rusi;
YV12_BUFFER_CONFIG *dst_frame;
uint8_t *dgd_buffer;
int dgd_stride;
const uint8_t *src_buffer;
int src_stride;
// sse and bits are initialised by reset_rsc in search_rest_type
int64_t sse;
int64_t bits;
// sgrproj and wiener are initialised by rsc_on_tile when starting the first
// tile in the frame.
SgrprojInfo sgrproj;
WienerInfo wiener;
} RestSearchCtxt;
static void rsc_on_tile(int tile_row, int tile_col, void *priv) {
(void)tile_row;
(void)tile_col;
RestSearchCtxt *rsc = (RestSearchCtxt *)priv;
set_default_sgrproj(&rsc->sgrproj);
set_default_wiener(&rsc->wiener);
}
static void reset_rsc(RestSearchCtxt *rsc) {
rsc->sse = 0;
rsc->bits = 0;
}
static void init_rsc(const YV12_BUFFER_CONFIG *src, const AV1_COMMON *cm,
const MACROBLOCK *x, int plane, RestUnitSearchInfo *rusi,
YV12_BUFFER_CONFIG *dst_frame, RestSearchCtxt *rsc) {
rsc->src = src;
rsc->cm = cm;
rsc->x = x;
rsc->plane = plane;
rsc->rusi = rusi;
rsc->dst_frame = dst_frame;
const YV12_BUFFER_CONFIG *dgd = cm->frame_to_show;
const int is_uv = plane != AOM_PLANE_Y;
rsc->plane_width = src->crop_widths[is_uv];
rsc->plane_height = src->crop_heights[is_uv];
rsc->src_buffer = src->buffers[plane];
rsc->src_stride = src->strides[is_uv];
rsc->dgd_buffer = dgd->buffers[plane];
rsc->dgd_stride = dgd->strides[is_uv];
assert(src->crop_widths[is_uv] == dgd->crop_widths[is_uv]);
assert(src->crop_heights[is_uv] == dgd->crop_heights[is_uv]);
}
static void search_sgrproj(const RestorationTileLimits *limits,
int rest_unit_idx, void *priv) {
const AV1PixelRect *tile, int rest_unit_idx,
void *priv) {
RestSearchCtxt *rsc = (RestSearchCtxt *)priv;
RestUnitSearchInfo *rusi = &rsc->rusi[rest_unit_idx];
......@@ -500,8 +515,7 @@ static void search_sgrproj(const RestorationTileLimits *limits,
rui.restoration_type = RESTORE_SGRPROJ;
rui.sgrproj_info = rusi->sgrproj;
rusi->sse[RESTORE_SGRPROJ] = try_restoration_tile(cm, rsc->src, limits, &rui,
rsc->dst_frame, rsc->plane);
rusi->sse[RESTORE_SGRPROJ] = try_restoration_tile(rsc, limits, tile, &rui);
const int64_t bits_none = x->sgrproj_restore_cost[0];
const int64_t bits_sgr = x->sgrproj_restore_cost[1] +
......@@ -878,13 +892,13 @@ static int count_wiener_bits(int wiener_win, WienerInfo *wiener_info,
}
#define USE_WIENER_REFINEMENT_SEARCH 1
static int64_t finer_tile_search_wiener(
const AV1_COMMON *cm, const YV12_BUFFER_CONFIG *src,
const RestorationTileLimits *limits, RestorationUnitInfo *rui,
int start_step, int plane, int wiener_win, YV12_BUFFER_CONFIG *dst_frame) {
static int64_t finer_tile_search_wiener(const RestSearchCtxt *rsc,
const RestorationTileLimits *limits,
const AV1PixelRect *tile,
RestorationUnitInfo *rui,
int wiener_win) {
const int plane_off = (WIENER_WIN - wiener_win) >> 1;
int64_t err = try_restoration_tile(cm, src, limits, rui, dst_frame, plane);
(void)start_step;
int64_t err = try_restoration_tile(rsc, limits, tile, rui);
#if USE_WIENER_REFINEMENT_SEARCH
int64_t err2;
int tap_min[] = { WIENER_FILT_TAP0_MINV, WIENER_FILT_TAP1_MINV,
......@@ -895,6 +909,7 @@ static int64_t finer_tile_search_wiener(
WienerInfo *plane_wiener = &rui->wiener_info;
// printf("err pre = %"PRId64"\n", err);
const int start_step = 4;
for (int s = start_step; s >= 1; s >>= 1) {
for (int p = plane_off; p < WIENER_HALFWIN; ++p) {
int skip = 0;
......@@ -903,7 +918,7 @@ static int64_t finer_tile_search_wiener(
plane_wiener->hfilter[p] -= s;
plane_wiener->hfilter[WIENER_WIN - p - 1] -= s;
plane_wiener->hfilter[WIENER_HALFWIN] += 2 * s;
err2 = try_restoration_tile(cm, src, limits, rui, dst_frame, plane);
err2 = try_restoration_tile(rsc, limits, tile, rui);
if (err2 > err) {
plane_wiener->hfilter[p] += s;
plane_wiener->hfilter[WIENER_WIN - p - 1] += s;
......@@ -923,7 +938,7 @@ static int64_t finer_tile_search_wiener(
plane_wiener->hfilter[p] += s;
plane_wiener->hfilter[WIENER_WIN - p - 1] += s;
plane_wiener->hfilter[WIENER_HALFWIN] -= 2 * s;
err2 = try_restoration_tile(cm, src, limits, rui, dst_frame, plane);
err2 = try_restoration_tile(rsc, limits, tile, rui);
if (err2 > err) {
plane_wiener->hfilter[p] -= s;
plane_wiener->hfilter[WIENER_WIN - p - 1] -= s;
......@@ -944,7 +959,7 @@ static int64_t finer_tile_search_wiener(
plane_wiener->vfilter[p] -= s;
plane_wiener->vfilter[WIENER_WIN - p - 1] -= s;
plane_wiener->vfilter[WIENER_HALFWIN] += 2 * s;
err2 = try_restoration_tile(cm, src, limits, rui, dst_frame, plane);
err2 = try_restoration_tile(rsc, limits, tile, rui);
if (err2 > err) {
plane_wiener->vfilter[p] += s;
plane_wiener->vfilter[WIENER_WIN - p - 1] += s;
......@@ -964,7 +979,7 @@ static int64_t finer_tile_search_wiener(
plane_wiener->vfilter[p] += s;
plane_wiener->vfilter[WIENER_WIN - p - 1] += s;
plane_wiener->vfilter[WIENER_HALFWIN] -= 2 * s;
err2 = try_restoration_tile(cm, src, limits, rui, dst_frame, plane);
err2 = try_restoration_tile(rsc, limits, tile, rui);
if (err2 > err) {
plane_wiener->vfilter[p] -= s;
plane_wiener->vfilter[WIENER_WIN - p - 1] -= s;
......@@ -985,7 +1000,8 @@ static int64_t finer_tile_search_wiener(
}
static void search_wiener(const RestorationTileLimits *limits,
int rest_unit_idx, void *priv) {
const AV1PixelRect *tile_rect, int rest_unit_idx,
void *priv) {
RestSearchCtxt *rsc = (RestSearchCtxt *)priv;
RestUnitSearchInfo *rusi = &rsc->rusi[rest_unit_idx];
......@@ -1040,8 +1056,7 @@ static void search_wiener(const RestorationTileLimits *limits,
aom_clear_system_state();
rusi->sse[RESTORE_WIENER] =
finer_tile_search_wiener(rsc->cm, rsc->src, limits, &rui, 4, rsc->plane,
wiener_win, rsc->dst_frame);
finer_tile_search_wiener(rsc, limits, tile_rect, &rui, wiener_win);
rusi->wiener = rui.wiener_info;
if (wiener_win != WIENER_WIN) {
......@@ -1071,7 +1086,10 @@ static void search_wiener(const RestorationTileLimits *limits,
}
static void search_norestore(const RestorationTileLimits *limits,
int rest_unit_idx, void *priv) {
const AV1PixelRect *tile_rect, int rest_unit_idx,
void *priv) {
(void)tile_rect;
RestSearchCtxt *rsc = (RestSearchCtxt *)priv;
RestUnitSearchInfo *rusi = &rsc->rusi[rest_unit_idx];
......@@ -1088,8 +1106,10 @@ static void search_norestore(const RestorationTileLimits *limits,
}
static void search_switchable(const RestorationTileLimits *limits,
int rest_unit_idx, void *priv) {
const AV1PixelRect *tile_rect, int rest_unit_idx,
void *priv) {
(void)limits;
(void)tile_rect;
RestSearchCtxt *rsc = (RestSearchCtxt *)priv;
RestUnitSearchInfo *rusi = &rsc->rusi[rest_unit_idx];
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment