Commit 13927866 authored by Rupert Swarbrick's avatar Rupert Swarbrick Committed by Debargha Mukherjee
Browse files

Refactor to allow optimization in SGR code

The first stage of the selfguided filter is to generate box sums of
the input image (and its squares). This is done with a pair of
integral images, which are the same for both calls in
apply_selfguided_restoration.

This patch refactors things so that av1_selfguided_restoration
calculates both "flt" buffers, allowing it to reuse the integral
images that it calculated.

Change-Id: Ica2f6f66e41bea38eb1a135c78c1d7ddab434d8e
parent 0ef61dd1
......@@ -14,6 +14,10 @@ print <<EOF
#include "av1/common/av1_txfm.h"
#include "av1/common/odintrin.h"
#if CONFIG_LOOP_RESTORATION
#include "av1/common/restoration.h"
#endif
struct macroblockd;
/* Encoder forward decls */
......@@ -554,7 +558,7 @@ if (aom_config("CONFIG_LOOP_RESTORATION") eq "yes") {
add_proto qw/void apply_selfguided_restoration/, "const uint8_t *dat, int width, int height, int stride, int eps, const int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf, int bit_depth, int highbd";
specialize qw/apply_selfguided_restoration sse4_1/;
add_proto qw/void av1_selfguided_restoration/, "const uint8_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps, int bit_depth, int highbd";
add_proto qw/void av1_selfguided_restoration/, "const uint8_t *dgd, int width, int height, int stride, int32_t *flt1, int32_t *flt2, int flt_stride, const sgr_params_type *params, int bit_depth, int highbd";
specialize qw/av1_selfguided_restoration sse4_1/;
}
......
......@@ -1086,8 +1086,9 @@ static void av1_selfguided_restoration_internal(int32_t *dgd, int width,
}
void av1_selfguided_restoration_c(const uint8_t *dgd8, int width, int height,
int stride, int32_t *dst, int dst_stride,
int r, int eps, int bit_depth, int highbd) {
int dgd_stride, int32_t *flt1, int32_t *flt2,
int flt_stride, const sgr_params_type *params,
int bit_depth, int highbd) {
int32_t dgd32_[RESTORATION_PROC_UNIT_PELS];
const int dgd32_stride = width + 2 * SGRPROJ_BORDER_HORZ;
int32_t *dgd32 =
......@@ -1097,19 +1098,23 @@ void av1_selfguided_restoration_c(const uint8_t *dgd8, int width, int height,
const uint16_t *dgd16 = CONVERT_TO_SHORTPTR(dgd8);
for (int i = -SGRPROJ_BORDER_VERT; i < height + SGRPROJ_BORDER_VERT; ++i) {
for (int j = -SGRPROJ_BORDER_HORZ; j < width + SGRPROJ_BORDER_HORZ; ++j) {
dgd32[i * dgd32_stride + j] = dgd16[i * stride + j];
dgd32[i * dgd32_stride + j] = dgd16[i * dgd_stride + j];
}
}
} else {
for (int i = -SGRPROJ_BORDER_VERT; i < height + SGRPROJ_BORDER_VERT; ++i) {
for (int j = -SGRPROJ_BORDER_HORZ; j < width + SGRPROJ_BORDER_HORZ; ++j) {
dgd32[i * dgd32_stride + j] = dgd8[i * stride + j];
dgd32[i * dgd32_stride + j] = dgd8[i * dgd_stride + j];
}
}
}
av1_selfguided_restoration_internal(dgd32, width, height, dgd32_stride, dst,
dst_stride, bit_depth, r, eps);
av1_selfguided_restoration_internal(dgd32, width, height, dgd32_stride, flt1,
flt_stride, bit_depth, params->r1,
params->e1);
av1_selfguided_restoration_internal(dgd32, width, height, dgd32_stride, flt2,
flt_stride, bit_depth, params->r2,
params->e2);
}
void apply_selfguided_restoration_c(const uint8_t *dat8, int width, int height,
......@@ -1121,12 +1126,8 @@ void apply_selfguided_restoration_c(const uint8_t *dat8, int width, int height,
int32_t *flt1 = tmpbuf;
int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX;
assert(width * height <= RESTORATION_TILEPELS_MAX);
av1_selfguided_restoration_c(dat8, width, height, stride, flt1, width,
sgr_params[eps].r1, sgr_params[eps].e1,
bit_depth, highbd);
av1_selfguided_restoration_c(dat8, width, height, stride, flt2, width,
sgr_params[eps].r2, sgr_params[eps].e2,
bit_depth, highbd);
av1_selfguided_restoration_c(dat8, width, height, stride, flt1, flt2, width,
&sgr_params[eps], bit_depth, highbd);
decode_xq(xqd, xq);
for (int i = 0; i < height; ++i) {
for (int j = 0; j < width; ++j) {
......
......@@ -16,6 +16,7 @@
#include "./aom_config.h"
#include "av1/common/blockd.h"
#include "av1/common/enums.h"
#ifdef __cplusplus
extern "C" {
......
......@@ -283,8 +283,10 @@ static void final_filter(int32_t *dst, int dst_stride, const int32_t *A,
}
void av1_selfguided_restoration_sse4_1(const uint8_t *dgd8, int width,
int height, int dgd_stride, int32_t *dst,
int dst_stride, int r, int eps,
int height, int dgd_stride,
int32_t *flt1, int32_t *flt2,
int flt_stride,
const sgr_params_type *params,
int bit_depth, int highbd) {
DECLARE_ALIGNED(16, int32_t, buf[4 * RESTORATION_PROC_UNIT_PELS]);
......@@ -320,12 +322,12 @@ void av1_selfguided_restoration_sse4_1(const uint8_t *dgd8, int width,
int32_t *C = C0 + buf_diag_border;
int32_t *D = D0 + buf_diag_border;
assert(r + 1 <= AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ));
const int dgd_diag_border =
SGRPROJ_BORDER_HORZ + dgd_stride * SGRPROJ_BORDER_VERT;
const uint8_t *dgd0 = dgd8 - dgd_diag_border;
// Generate integral images from the input. C will contain sums of squares; D
// will contain just sums
#if CONFIG_HIGHBITDEPTH
if (highbd)
integral_images_highbd(CONVERT_TO_SHORTPTR(dgd0), dgd_stride, width_ext,
......@@ -335,10 +337,17 @@ void av1_selfguided_restoration_sse4_1(const uint8_t *dgd8, int width,
integral_images(dgd0, dgd_stride, width_ext, height_ext, Ctl, Dtl,
buf_stride);
calc_ab(A, B, C, D, width, height, buf_stride, eps, bit_depth, r);
// Write to flt1 and flt2
for (int i = 0; i < 2; ++i) {
int r = i ? params->r2 : params->r1;
int e = i ? params->e2 : params->e1;
int32_t *flt = i ? flt2 : flt1;
final_filter(dst, dst_stride, A, B, buf_stride, dgd8, dgd_stride, width,
height, highbd);
assert(r + 1 <= AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ));
calc_ab(A, B, C, D, width, height, buf_stride, e, bit_depth, r);
final_filter(flt, flt_stride, A, B, buf_stride, dgd8, dgd_stride, width,
height, highbd);
}
}
void apply_selfguided_restoration_sse4_1(const uint8_t *dat8, int width,
......@@ -349,12 +358,9 @@ void apply_selfguided_restoration_sse4_1(const uint8_t *dat8, int width,
int32_t *flt1 = tmpbuf;
int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX;
assert(width * height <= RESTORATION_TILEPELS_MAX);
av1_selfguided_restoration_sse4_1(dat8, width, height, stride, flt1, width,
sgr_params[eps].r1, sgr_params[eps].e1,
bit_depth, highbd);
av1_selfguided_restoration_sse4_1(dat8, width, height, stride, flt2, width,
sgr_params[eps].r2, sgr_params[eps].e2,
bit_depth, highbd);
av1_selfguided_restoration_sse4_1(dat8, width, height, stride, flt1, flt2,
width, &sgr_params[eps], bit_depth, highbd);
int xq[2];
decode_xq(xqd, xq);
......
......@@ -372,10 +372,8 @@ static void sgr_filter_block(const sgr_params_type *params, const uint8_t *dat8,
int width, int height, int dat_stride,
int use_highbd, int bit_depth, int32_t *flt1,
int32_t *flt2, int flt_stride) {
av1_selfguided_restoration(dat8, width, height, dat_stride, flt1, flt_stride,
params->r1, params->e1, bit_depth, use_highbd);
av1_selfguided_restoration(dat8, width, height, dat_stride, flt2, flt_stride,
params->r2, params->e2, bit_depth, use_highbd);
av1_selfguided_restoration(dat8, width, height, dat_stride, flt1, flt2,
flt_stride, params, bit_depth, use_highbd);
}
// Apply the self-guided filter across an entire restoration unit.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment