Commit b7bb0976 authored by Debargha Mukherjee's avatar Debargha Mukherjee

Replace one self guided filter with highpass

Adds an option controlled by a macro to replace one of
the guided filters in the self-guided tool with a simple
bandpass filtered version generated with a 3x3 kernel.
By default the macro USE_HIGHPASS_IN_SGRPROJ is 0 (turned
off), that defaults us to the dual self-guided filter.
When the macro is turned on, the larger radius guided
filter is replaced by a simpler filter that is much faster.

Results (if USE_HIGHPASS_IN_SGRPROJ is on vs. off):
lowres: performance drop by +0.14% (BDRATE)
midres: performance drop by +0.27% (BDRATE)

Further experiments on this variation of guided filters is
pending.

Change-Id: I7bbcfcad7ee266cd49a8dc6d96795a454feb1a94
parent bcf25cda
......@@ -786,12 +786,18 @@ if (aom_config("CONFIG_LOOP_RESTORATION") eq "yes") {
add_proto qw/void av1_selfguided_restoration/, "uint8_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps, int32_t *tmpbuf";
specialize qw/av1_selfguided_restoration sse4_1/;
add_proto qw/void av1_highpass_filter/, "uint8_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps, int32_t *tmpbuf";
specialize qw/av1_highpass_filter/;
if (aom_config("CONFIG_AOM_HIGHBITDEPTH") eq "yes") {
add_proto qw/void apply_selfguided_restoration_highbd/, "uint16_t *dat, int width, int height, int stride, int bit_depth, int eps, int *xqd, uint16_t *dst, int dst_stride, int32_t *tmpbuf";
specialize qw/apply_selfguided_restoration_highbd sse4_1/;
add_proto qw/void av1_selfguided_restoration_highbd/, "uint16_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int bit_depth, int r, int eps, int32_t *tmpbuf";
specialize qw/av1_selfguided_restoration_highbd sse4_1/;
add_proto qw/void av1_highpass_filter_highbd/, "uint16_t *dgd, int width, int height, int stride, int32_t *dst, int dst_stride, int r, int eps, int32_t *tmpbuf";
specialize qw/av1_highpass_filter_highbd/;
}
}
......
......@@ -22,11 +22,19 @@
#include "aom_ports/mem.h"
const sgr_params_type sgr_params[SGRPROJ_PARAMS] = {
#if USE_HIGHPASS_IN_SGRPROJ
// corner, edge, r2, eps2
{ -1, 2, 1, 1 }, { -1, 2, 1, 2 }, { -1, 2, 1, 3 }, { -1, 2, 1, 4 },
{ -1, 2, 1, 5 }, { -2, 3, 1, 2 }, { -2, 3, 1, 3 }, { -2, 3, 1, 4 },
{ -2, 3, 1, 5 }, { -2, 3, 1, 6 }, { -3, 4, 1, 3 }, { -3, 4, 1, 4 },
{ -3, 4, 1, 5 }, { -3, 4, 1, 6 }, { -3, 4, 1, 7 }, { -3, 4, 1, 8 }
#else
// r1, eps1, r2, eps2
{ 2, 12, 1, 4 }, { 2, 15, 1, 6 }, { 2, 18, 1, 8 }, { 2, 20, 1, 9 },
{ 2, 22, 1, 10 }, { 2, 25, 1, 11 }, { 2, 35, 1, 12 }, { 2, 45, 1, 13 },
{ 2, 55, 1, 14 }, { 2, 65, 1, 15 }, { 2, 75, 1, 16 }, { 3, 30, 1, 10 },
{ 3, 50, 1, 12 }, { 3, 50, 2, 25 }, { 3, 60, 2, 35 }, { 3, 70, 2, 45 },
#endif
};
typedef void (*restore_func_type)(uint8_t *data8, int width, int height,
......@@ -518,7 +526,7 @@ static void boxnum(int width, int height, int r, int8_t *num, int num_stride) {
}
void decode_xq(int *xqd, int *xq) {
xq[0] = -xqd[0];
xq[0] = xqd[0];
xq[1] = (1 << SGRPROJ_PRJ_BITS) - xq[0] - xqd[1];
}
......@@ -742,6 +750,108 @@ void av1_selfguided_restoration_c(uint8_t *dgd, int width, int height,
tmpbuf);
}
#if USE_HIGHPASS_IN_SGRPROJ
void av1_highpass_filter_internal(int32_t *A, int width, int height, int stride,
int corner, int edge, int32_t *tmpbuf) {
const int center = (1 << SGRPROJ_RST_BITS) - 4 * (corner + edge);
int i, j;
int buf_stride = ((width + 3) & ~3) + 16;
i = 0;
j = 0;
{
const int k = i * stride + j;
const int l = i * buf_stride + j;
tmpbuf[l] = center * A[k] + edge * (A[k + 1] + A[k + stride] + A[k] * 2) +
corner * (A[k + stride + 1] + A[k + 1] + A[k + stride] + A[k]);
}
i = 0;
j = width - 1;
{
const int k = i * stride + j;
const int l = i * buf_stride + j;
tmpbuf[l] = center * A[k] + edge * (A[k - 1] + A[k + stride] + A[k] * 2) +
corner * (A[k + stride - 1] + A[k - 1] + A[k + stride] + A[k]);
}
i = height - 1;
j = 0;
{
const int k = i * stride + j;
const int l = i * buf_stride + j;
tmpbuf[l] = center * A[k] + edge * (A[k + 1] + A[k - stride] + A[k] * 2) +
corner * (A[k - stride + 1] + A[k + 1] + A[k - stride] + A[k]);
}
i = height - 1;
j = width - 1;
{
const int k = i * stride + j;
const int l = i * buf_stride + j;
tmpbuf[l] = center * A[k] + edge * (A[k - 1] + A[k - stride] + A[k] * 2) +
corner * (A[k - stride - 1] + A[k - 1] + A[k - stride] + A[k]);
}
i = 0;
for (j = 1; j < width - 1; ++j) {
const int k = i * stride + j;
const int l = i * buf_stride + j;
tmpbuf[l] =
center * A[k] + edge * (A[k - 1] + A[k + stride] + A[k + 1] + A[k]) +
corner * (A[k + stride - 1] + A[k + stride + 1] + A[k - 1] + A[k + 1]);
}
i = height - 1;
for (j = 1; j < width - 1; ++j) {
const int k = i * stride + j;
const int l = i * buf_stride + j;
tmpbuf[l] =
center * A[k] + edge * (A[k - 1] + A[k - stride] + A[k + 1] + A[k]) +
corner * (A[k - stride - 1] + A[k - stride + 1] + A[k - 1] + A[k + 1]);
}
j = 0;
for (i = 1; i < height - 1; ++i) {
const int k = i * stride + j;
const int l = i * buf_stride + j;
tmpbuf[l] = center * A[k] +
edge * (A[k - stride] + A[k + 1] + A[k + stride] + A[k]) +
corner * (A[k + stride + 1] + A[k - stride + 1] +
A[k - stride] + A[k + stride]);
}
j = width - 1;
for (i = 1; i < height - 1; ++i) {
const int k = i * stride + j;
const int l = i * buf_stride + j;
tmpbuf[l] = center * A[k] +
edge * (A[k - stride] + A[k - 1] + A[k + stride] + A[k]) +
corner * (A[k + stride - 1] + A[k - stride - 1] +
A[k - stride] + A[k + stride]);
}
for (i = 1; i < height - 1; ++i) {
for (j = 1; j < width - 1; ++j) {
const int k = i * stride + j;
const int l = i * buf_stride + j;
tmpbuf[l] = center * A[k] +
edge * (A[k - stride] + A[k - 1] + A[k + stride] + A[k + 1]) +
corner * (A[k + stride - 1] + A[k - stride - 1] +
A[k - stride + 1] + A[k + stride + 1]);
}
}
for (i = 0; i < height; ++i) {
memcpy(A + stride * i, tmpbuf + buf_stride * i, sizeof(*A) * width);
}
}
void av1_highpass_filter_c(uint8_t *dgd, int width, int height, int stride,
int32_t *dst, int dst_stride, int corner, int edge,
int32_t *tmpbuf) {
int i, j;
for (i = 0; i < height; ++i) {
for (j = 0; j < width; ++j) {
dst[i * dst_stride + j] = dgd[i * stride + j];
}
}
av1_highpass_filter_internal(dst, width, height, dst_stride, corner, edge,
tmpbuf);
}
#endif // USE_HIGHPASS_IN_SGRPROJ
void apply_selfguided_restoration_c(uint8_t *dat, int width, int height,
int stride, int eps, int *xqd, uint8_t *dst,
int dst_stride, int32_t *tmpbuf) {
......@@ -751,8 +861,13 @@ void apply_selfguided_restoration_c(uint8_t *dat, int width, int height,
int32_t *tmpbuf2 = flt2 + RESTORATION_TILEPELS_MAX;
int i, j;
assert(width * height <= RESTORATION_TILEPELS_MAX);
#if USE_HIGHPASS_IN_SGRPROJ
av1_highpass_filter_c(dat, width, height, stride, flt1, width,
sgr_params[eps].corner, sgr_params[eps].edge, tmpbuf2);
#else
av1_selfguided_restoration_c(dat, width, height, stride, flt1, width,
sgr_params[eps].r1, sgr_params[eps].e1, tmpbuf2);
#endif // USE_HIGHPASS_IN_SGRPROJ
av1_selfguided_restoration_c(dat, width, height, stride, flt2, width,
sgr_params[eps].r2, sgr_params[eps].e2, tmpbuf2);
decode_xq(xqd, xq);
......@@ -924,6 +1039,19 @@ void av1_selfguided_restoration_highbd_c(uint16_t *dgd, int width, int height,
r, eps, tmpbuf);
}
void av1_highpass_filter_highbd_c(uint16_t *dgd, int width, int height,
int stride, int32_t *dst, int dst_stride,
int corner, int edge, int32_t *tmpbuf) {
int i, j;
for (i = 0; i < height; ++i) {
for (j = 0; j < width; ++j) {
dst[i * dst_stride + j] = dgd[i * stride + j];
}
}
av1_highpass_filter_internal(dst, width, height, dst_stride, corner, edge,
tmpbuf);
}
void apply_selfguided_restoration_highbd_c(uint16_t *dat, int width, int height,
int stride, int bit_depth, int eps,
int *xqd, uint16_t *dst,
......@@ -934,9 +1062,15 @@ void apply_selfguided_restoration_highbd_c(uint16_t *dat, int width, int height,
int32_t *tmpbuf2 = flt2 + RESTORATION_TILEPELS_MAX;
int i, j;
assert(width * height <= RESTORATION_TILEPELS_MAX);
#if USE_HIGHPASS_IN_SGRPROJ
av1_highpass_filter_highbd_c(dat, width, height, stride, flt1, width,
sgr_params[eps].corner, sgr_params[eps].edge,
tmpbuf2);
#else
av1_selfguided_restoration_highbd_c(dat, width, height, stride, flt1, width,
bit_depth, sgr_params[eps].r1,
sgr_params[eps].e1, tmpbuf2);
#endif // USE_HIGHPASS_IN_SGRPROJ
av1_selfguided_restoration_highbd_c(dat, width, height, stride, flt2, width,
bit_depth, sgr_params[eps].r2,
sgr_params[eps].e2, tmpbuf2);
......
......@@ -39,6 +39,7 @@ extern "C" {
#define SGRPROJ_EXTBUF_SIZE (0)
#define SGRPROJ_PARAMS_BITS 4
#define SGRPROJ_PARAMS (1 << SGRPROJ_PARAMS_BITS)
#define USE_HIGHPASS_IN_SGRPROJ 0
// Precision bits for projection
#define SGRPROJ_PRJ_BITS 7
......@@ -48,10 +49,17 @@ extern "C" {
#define SGRPROJ_SGR_BITS 8
#define SGRPROJ_SGR (1 << SGRPROJ_SGR_BITS)
#define SGRPROJ_PRJ_MIN0 (-(1 << SGRPROJ_PRJ_BITS) / 4)
#if USE_HIGHPASS_IN_SGRPROJ
#define SGRPROJ_PRJ_MIN0 (-(1 << SGRPROJ_PRJ_BITS) / 8)
#define SGRPROJ_PRJ_MAX0 (SGRPROJ_PRJ_MIN0 + (1 << SGRPROJ_PRJ_BITS) - 1)
#define SGRPROJ_PRJ_MIN1 (-(1 << SGRPROJ_PRJ_BITS) / 2)
#define SGRPROJ_PRJ_MAX1 (SGRPROJ_PRJ_MIN1 + (1 << SGRPROJ_PRJ_BITS) - 1)
#else
#define SGRPROJ_PRJ_MIN0 (-(1 << SGRPROJ_PRJ_BITS) * 3 / 4)
#define SGRPROJ_PRJ_MAX0 (SGRPROJ_PRJ_MIN0 + (1 << SGRPROJ_PRJ_BITS) - 1)
#define SGRPROJ_PRJ_MIN1 (-(1 << SGRPROJ_PRJ_BITS) / 4)
#define SGRPROJ_PRJ_MAX1 (SGRPROJ_PRJ_MIN1 + (1 << SGRPROJ_PRJ_BITS) - 1)
#endif // USE_HIGHPASS_IN_SGRPROJ
#define SGRPROJ_BITS (SGRPROJ_PRJ_BITS * 2 + SGRPROJ_PARAMS_BITS)
......@@ -116,8 +124,13 @@ typedef struct {
} WienerInfo;
typedef struct {
#if USE_HIGHPASS_IN_SGRPROJ
int corner;
int edge;
#else
int r1;
int e1;
#endif // USE_HIGHPASS_IN_SGRPROJ
int r2;
int e2;
} sgr_params_type;
......
......@@ -893,9 +893,14 @@ void apply_selfguided_restoration_sse4_1(uint8_t *dat, int width, int height,
int32_t *tmpbuf2 = flt2 + RESTORATION_TILEPELS_MAX;
int i, j;
assert(width * height <= RESTORATION_TILEPELS_MAX);
av1_selfguided_restoration_sse4_1(dat, width, height, stride, flt1, width,
sgr_params[eps].r1, sgr_params[eps].e1,
tmpbuf2);
#if USE_HIGHPASS_IN_SGRPROJ
av1_highpass_filter_c(dat, width, height, stride, flt1, width,
sgr_params[eps].corner, sgr_params[eps].edge, tmpbuf2);
#else
av1_selfguided_restoration_sse4_1(dat, width, height, stride, flt1, width,
sgr_params[eps].r1, sgr_params[eps].e1,
tmpbuf2);
#endif // USE_HIGHPASS_IN_SGRPROJ
av1_selfguided_restoration_sse4_1(dat, width, height, stride, flt2, width,
sgr_params[eps].r2, sgr_params[eps].e2,
tmpbuf2);
......@@ -1431,9 +1436,15 @@ void apply_selfguided_restoration_highbd_sse4_1(
int32_t *tmpbuf2 = flt2 + RESTORATION_TILEPELS_MAX;
int i, j;
assert(width * height <= RESTORATION_TILEPELS_MAX);
#if USE_HIGHPASS_IN_SGRPROJ
av1_highpass_filter_highbd_c(dat, width, height, stride, flt1, width,
sgr_params[eps].corner, sgr_params[eps].edge,
tmpbuf2);
#else
av1_selfguided_restoration_highbd_sse4_1(dat, width, height, stride, flt1,
width, bit_depth, sgr_params[eps].r1,
sgr_params[eps].e1, tmpbuf2);
#endif // USE_HIGHPASS_IN_SGRPROJ
av1_selfguided_restoration_highbd_sse4_1(dat, width, height, stride, flt2,
width, bit_depth, sgr_params[eps].r2,
sgr_params[eps].e2, tmpbuf2);
......
......@@ -218,8 +218,9 @@ static void get_proj_subspace(uint8_t *src8, int width, int height,
double x[2];
const int size = width * height;
xq[0] = -(1 << SGRPROJ_PRJ_BITS) / 4;
xq[1] = (1 << SGRPROJ_PRJ_BITS) - xq[0];
// Default
xq[0] = 0;
xq[1] = 0;
if (bit_depth == 8) {
const uint8_t *src = src8;
const uint8_t *dat = dat8;
......@@ -270,9 +271,9 @@ static void get_proj_subspace(uint8_t *src8, int width, int height,
}
void encode_xq(int *xq, int *xqd) {
xqd[0] = -xq[0];
xqd[0] = xq[0];
xqd[0] = clamp(xqd[0], SGRPROJ_PRJ_MIN0, SGRPROJ_PRJ_MAX0);
xqd[1] = (1 << SGRPROJ_PRJ_BITS) + xqd[0] - xq[1];
xqd[1] = (1 << SGRPROJ_PRJ_BITS) - xqd[0] - xq[1];
xqd[1] = clamp(xqd[1], SGRPROJ_PRJ_MIN1, SGRPROJ_PRJ_MAX1);
}
......@@ -292,16 +293,27 @@ static void search_selfguided_restoration(uint8_t *dat8, int width, int height,
#if CONFIG_AOM_HIGHBITDEPTH
if (bit_depth > 8) {
uint16_t *dat = CONVERT_TO_SHORTPTR(dat8);
#if USE_HIGHPASS_IN_SGRPROJ
av1_highpass_filter_highbd(dat, width, height, dat_stride, flt1, width,
sgr_params[ep].corner, sgr_params[ep].edge,
tmpbuf2);
#else
av1_selfguided_restoration_highbd(dat, width, height, dat_stride, flt1,
width, bit_depth, sgr_params[ep].r1,
sgr_params[ep].e1, tmpbuf2);
#endif // USE_HIGHPASS_IN_SGRPROJ
av1_selfguided_restoration_highbd(dat, width, height, dat_stride, flt2,
width, bit_depth, sgr_params[ep].r2,
sgr_params[ep].e2, tmpbuf2);
} else {
#endif
av1_selfguided_restoration(dat8, width, height, dat_stride, flt1, width,
sgr_params[ep].r1, sgr_params[ep].e1, tmpbuf2);
#if USE_HIGHPASS_IN_SGRPROJ
av1_highpass_filter(dat8, width, height, dat_stride, flt1, width,
sgr_params[ep].corner, sgr_params[ep].edge, tmpbuf2);
#else
av1_selfguided_restoration(dat8, width, height, dat_stride, flt1, width,
sgr_params[ep].r1, sgr_params[ep].e1, tmpbuf2);
#endif // USE_HIGHPASS_IN_SGRPROJ
av1_selfguided_restoration(dat8, width, height, dat_stride, flt2, width,
sgr_params[ep].r2, sgr_params[ep].e2, tmpbuf2);
#if CONFIG_AOM_HIGHBITDEPTH
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment