Commit f7d1ff49 authored by Debargha Mukherjee's avatar Debargha Mukherjee

Don't use extra lines for r=2 guided filter

Changes the CONFIG_FAST_SGR=1 strategy to not use any
subsampling for the r=1 filter, but for the r=2 filter
sub-sample vertically but combine only by filtering
horizontally in the last stage for odd rows.

Coding efficiency loss sems quite minimal.

Change-Id: I5644ac400b387c37a2d278db7f6ad3ac0a6b5e93
parent 23b54841
...@@ -1024,62 +1024,98 @@ static void av1_selfguided_restoration_fast_internal( ...@@ -1024,62 +1024,98 @@ static void av1_selfguided_restoration_fast_internal(
} }
} }
// Use the A[] and B[] arrays to calculate the filtered image // Use the A[] and B[] arrays to calculate the filtered image
for (i = 0; i < height; ++i) { if (r == 1) {
if (!(i & 1)) { // even row for (i = 0; i < height; ++i) {
for (j = 0; j < width; ++j) { if (!(i & 1)) { // even row
const int k = i * buf_stride + j; for (j = 0; j < width; ++j) {
const int l = i * dgd_stride + j; const int k = i * buf_stride + j;
const int m = i * dst_stride + j; const int l = i * dgd_stride + j;
const int nb = 5; const int m = i * dst_stride + j;
const int32_t a = (A[k - buf_stride] + A[k + buf_stride]) * 6 + const int nb = 5;
(A[k - 1 - buf_stride] + A[k - 1 + buf_stride] + const int32_t a = (A[k - buf_stride] + A[k + buf_stride]) * 6 +
A[k + 1 - buf_stride] + A[k + 1 + buf_stride]) * (A[k - 1 - buf_stride] + A[k - 1 + buf_stride] +
5; A[k + 1 - buf_stride] + A[k + 1 + buf_stride]) *
const int32_t b = (B[k - buf_stride] + B[k + buf_stride]) * 6 + 5;
(B[k - 1 - buf_stride] + B[k - 1 + buf_stride] + const int32_t b = (B[k - buf_stride] + B[k + buf_stride]) * 6 +
B[k + 1 - buf_stride] + B[k + 1 + buf_stride]) * (B[k - 1 - buf_stride] + B[k - 1 + buf_stride] +
5; B[k + 1 - buf_stride] + B[k + 1 + buf_stride]) *
const int32_t v = a * dgd[l] + b; 5;
dst[m] = const int32_t v = a * dgd[l] + b;
ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS); dst[m] =
} ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
} else if (i != height - 1) { // odd row and not last }
for (j = 0; j < width; ++j) { } else if (i != height - 1) { // odd row and not last
const int k = i * buf_stride + j; for (j = 0; j < width; ++j) {
const int l = i * dgd_stride + j; const int k = i * buf_stride + j;
const int m = i * dst_stride + j; const int l = i * dgd_stride + j;
const int nb = 6; const int m = i * dst_stride + j;
const int buf_stride2 = 2 * buf_stride; const int nb = 6;
const int32_t a = A[k] * 16 + (A[k - 1] + A[k + 1]) * 14 + const int buf_stride2 = 2 * buf_stride;
(A[k - buf_stride2] + A[k + buf_stride2]) * 4 + const int32_t a = A[k] * 16 + (A[k - 1] + A[k + 1]) * 14 +
(A[k - 1 - buf_stride2] + A[k - 1 + buf_stride2] + (A[k - buf_stride2] + A[k + buf_stride2]) * 4 +
A[k + 1 - buf_stride2] + A[k + 1 + buf_stride2]) * (A[k - 1 - buf_stride2] + A[k - 1 + buf_stride2] +
3; A[k + 1 - buf_stride2] + A[k + 1 + buf_stride2]) *
const int32_t b = B[k] * 16 + (B[k - 1] + B[k + 1]) * 14 + 3;
(B[k - buf_stride2] + B[k + buf_stride2]) * 4 + const int32_t b = B[k] * 16 + (B[k - 1] + B[k + 1]) * 14 +
(B[k - 1 - buf_stride2] + B[k - 1 + buf_stride2] + (B[k - buf_stride2] + B[k + buf_stride2]) * 4 +
B[k + 1 - buf_stride2] + B[k + 1 + buf_stride2]) * (B[k - 1 - buf_stride2] + B[k - 1 + buf_stride2] +
3; B[k + 1 - buf_stride2] + B[k + 1 + buf_stride2]) *
const int32_t v = a * dgd[l] + b; 3;
dst[m] = const int32_t v = a * dgd[l] + b;
ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS); dst[m] =
ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
}
} else { // odd row and last
for (j = 0; j < width; ++j) {
const int k = i * buf_stride + j;
const int l = i * dgd_stride + j;
const int m = i * dst_stride + j;
const int nb = 6;
const int buf_stride2 = 2 * buf_stride;
const int32_t a =
A[k] * 18 + (A[k - 1] + A[k + 1]) * 16 + A[k - buf_stride2] * 6 +
(A[k - 1 - buf_stride2] + A[k + 1 - buf_stride2]) * 4;
const int32_t b =
B[k] * 18 + (B[k - 1] + B[k + 1]) * 16 + B[k - buf_stride2] * 6 +
(B[k - 1 - buf_stride2] + B[k + 1 - buf_stride2]) * 4;
const int32_t v = a * dgd[l] + b;
dst[m] =
ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
}
} }
} else { // odd row and last }
for (j = 0; j < width; ++j) { } else { // r = 2
const int k = i * buf_stride + j; for (i = 0; i < height; ++i) {
const int l = i * dgd_stride + j; if (!(i & 1)) { // even row
const int m = i * dst_stride + j; for (j = 0; j < width; ++j) {
const int nb = 6; const int k = i * buf_stride + j;
const int buf_stride2 = 2 * buf_stride; const int l = i * dgd_stride + j;
const int32_t a = A[k] * 18 + (A[k - 1] + A[k + 1]) * 16 + const int m = i * dst_stride + j;
A[k - buf_stride2] * 6 + const int nb = 5;
(A[k - 1 - buf_stride2] + A[k + 1 - buf_stride2]) * 4; const int32_t a = (A[k - buf_stride] + A[k + buf_stride]) * 6 +
const int32_t b = B[k] * 18 + (B[k - 1] + B[k + 1]) * 16 + (A[k - 1 - buf_stride] + A[k - 1 + buf_stride] +
B[k - buf_stride2] * 6 + A[k + 1 - buf_stride] + A[k + 1 + buf_stride]) *
(B[k - 1 - buf_stride2] + B[k + 1 - buf_stride2]) * 4; 5;
const int32_t v = a * dgd[l] + b; const int32_t b = (B[k - buf_stride] + B[k + buf_stride]) * 6 +
dst[m] = (B[k - 1 - buf_stride] + B[k - 1 + buf_stride] +
ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS); B[k + 1 - buf_stride] + B[k + 1 + buf_stride]) *
5;
const int32_t v = a * dgd[l] + b;
dst[m] =
ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
}
} else { // odd row
for (j = 0; j < width; ++j) {
const int k = i * buf_stride + j;
const int l = i * dgd_stride + j;
const int m = i * dst_stride + j;
const int nb = 4;
const int32_t a = A[k] * 6 + (A[k - 1] + A[k + 1]) * 5;
const int32_t b = B[k] * 6 + (B[k - 1] + B[k + 1]) * 5;
const int32_t v = a * dgd[l] + b;
dst[m] =
ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS);
}
} }
} }
} }
...@@ -1242,9 +1278,9 @@ void av1_selfguided_restoration_c(const uint8_t *dgd8, int width, int height, ...@@ -1242,9 +1278,9 @@ void av1_selfguided_restoration_c(const uint8_t *dgd8, int width, int height,
av1_selfguided_restoration_fast_internal(dgd32, width, height, dgd32_stride, av1_selfguided_restoration_fast_internal(dgd32, width, height, dgd32_stride,
flt1, flt_stride, bit_depth, flt1, flt_stride, bit_depth,
params->r1, params->e1); params->r1, params->e1);
av1_selfguided_restoration_fast_internal(dgd32, width, height, dgd32_stride, av1_selfguided_restoration_internal(dgd32, width, height, dgd32_stride, flt2,
flt2, flt_stride, bit_depth, flt_stride, bit_depth, params->r2,
params->r2, params->e2); params->e2);
#else #else
av1_selfguided_restoration_internal(dgd32, width, height, dgd32_stride, flt1, av1_selfguided_restoration_internal(dgd32, width, height, dgd32_stride, flt1,
flt_stride, bit_depth, params->r1, flt_stride, bit_depth, params->r1,
...@@ -1300,7 +1336,7 @@ static void sgrproj_filter_stripe(const RestorationUnitInfo *rui, ...@@ -1300,7 +1336,7 @@ static void sgrproj_filter_stripe(const RestorationUnitInfo *rui,
for (int j = 0; j < stripe_width; j += procunit_width) { for (int j = 0; j < stripe_width; j += procunit_width) {
int w = AOMMIN(procunit_width, stripe_width - j); int w = AOMMIN(procunit_width, stripe_width - j);
#if CONFIG_FAST_SGR == 2 #if CONFIG_FAST_SGR
apply_selfguided_restoration_c(src + j, w, stripe_height, src_stride, apply_selfguided_restoration_c(src + j, w, stripe_height, src_stride,
rui->sgrproj_info.ep, rui->sgrproj_info.xqd, rui->sgrproj_info.ep, rui->sgrproj_info.xqd,
dst + j, dst_stride, tmpbuf, bit_depth, 0); dst + j, dst_stride, tmpbuf, bit_depth, 0);
...@@ -1308,7 +1344,7 @@ static void sgrproj_filter_stripe(const RestorationUnitInfo *rui, ...@@ -1308,7 +1344,7 @@ static void sgrproj_filter_stripe(const RestorationUnitInfo *rui,
apply_selfguided_restoration(src + j, w, stripe_height, src_stride, apply_selfguided_restoration(src + j, w, stripe_height, src_stride,
rui->sgrproj_info.ep, rui->sgrproj_info.xqd, rui->sgrproj_info.ep, rui->sgrproj_info.xqd,
dst + j, dst_stride, tmpbuf, bit_depth, 0); dst + j, dst_stride, tmpbuf, bit_depth, 0);
#endif // CONFIG_FAST_SGR == 2 #endif // CONFIG_FAST_SGR
} }
} }
...@@ -1344,7 +1380,7 @@ static void sgrproj_filter_stripe_highbd(const RestorationUnitInfo *rui, ...@@ -1344,7 +1380,7 @@ static void sgrproj_filter_stripe_highbd(const RestorationUnitInfo *rui,
int32_t *tmpbuf, int bit_depth) { int32_t *tmpbuf, int bit_depth) {
for (int j = 0; j < stripe_width; j += procunit_width) { for (int j = 0; j < stripe_width; j += procunit_width) {
int w = AOMMIN(procunit_width, stripe_width - j); int w = AOMMIN(procunit_width, stripe_width - j);
#if CONFIG_FAST_SGR == 2 #if CONFIG_FAST_SGR
apply_selfguided_restoration_c(src8 + j, w, stripe_height, src_stride, apply_selfguided_restoration_c(src8 + j, w, stripe_height, src_stride,
rui->sgrproj_info.ep, rui->sgrproj_info.xqd, rui->sgrproj_info.ep, rui->sgrproj_info.xqd,
dst8 + j, dst_stride, tmpbuf, bit_depth, 1); dst8 + j, dst_stride, tmpbuf, bit_depth, 1);
...@@ -1352,7 +1388,7 @@ static void sgrproj_filter_stripe_highbd(const RestorationUnitInfo *rui, ...@@ -1352,7 +1388,7 @@ static void sgrproj_filter_stripe_highbd(const RestorationUnitInfo *rui,
apply_selfguided_restoration(src8 + j, w, stripe_height, src_stride, apply_selfguided_restoration(src8 + j, w, stripe_height, src_stride,
rui->sgrproj_info.ep, rui->sgrproj_info.xqd, rui->sgrproj_info.ep, rui->sgrproj_info.xqd,
dst8 + j, dst_stride, tmpbuf, bit_depth, 1); dst8 + j, dst_stride, tmpbuf, bit_depth, 1);
#endif // CONFIG_FAST_SGR == 2 #endif // CONFIG_FAST_SGR
} }
} }
......
...@@ -353,7 +353,7 @@ static void sgr_filter_block(const sgr_params_type *params, const uint8_t *dat8, ...@@ -353,7 +353,7 @@ static void sgr_filter_block(const sgr_params_type *params, const uint8_t *dat8,
int width, int height, int dat_stride, int width, int height, int dat_stride,
int use_highbd, int bit_depth, int32_t *flt1, int use_highbd, int bit_depth, int32_t *flt1,
int32_t *flt2, int flt_stride) { int32_t *flt2, int flt_stride) {
#if CONFIG_FAST_SGR == 2 #if CONFIG_FAST_SGR
av1_selfguided_restoration_c(dat8, width, height, dat_stride, flt1, flt2, av1_selfguided_restoration_c(dat8, width, height, dat_stride, flt1, flt2,
flt_stride, params, bit_depth, use_highbd); flt_stride, params, bit_depth, use_highbd);
#else #else
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment