From f7d1ff49eb5a0cbe584839913eac10763c8a73da Mon Sep 17 00:00:00 2001 From: Debargha Mukherjee Date: Wed, 31 Jan 2018 10:42:43 -0800 Subject: [PATCH] Don't use extra lines for r=2 guided filter Changes the CONFIG_FAST_SGR=1 strategy to not use any subsampling for the r=1 filter, but for the r=2 filter sub-sample vertically but combine only by filtering horizontally in the last stage for odd rows. Coding efficiency loss sems quite minimal. Change-Id: I5644ac400b387c37a2d278db7f6ad3ac0a6b5e93 --- av1/common/restoration.c | 160 ++++++++++++++++++++++++--------------- av1/encoder/pickrst.c | 2 +- 2 files changed, 99 insertions(+), 63 deletions(-) diff --git a/av1/common/restoration.c b/av1/common/restoration.c index 1bc25ef24..b49e24835 100644 --- a/av1/common/restoration.c +++ b/av1/common/restoration.c @@ -1024,62 +1024,98 @@ static void av1_selfguided_restoration_fast_internal( } } // Use the A[] and B[] arrays to calculate the filtered image - for (i = 0; i < height; ++i) { - if (!(i & 1)) { // even row - for (j = 0; j < width; ++j) { - const int k = i * buf_stride + j; - const int l = i * dgd_stride + j; - const int m = i * dst_stride + j; - const int nb = 5; - const int32_t a = (A[k - buf_stride] + A[k + buf_stride]) * 6 + - (A[k - 1 - buf_stride] + A[k - 1 + buf_stride] + - A[k + 1 - buf_stride] + A[k + 1 + buf_stride]) * - 5; - const int32_t b = (B[k - buf_stride] + B[k + buf_stride]) * 6 + - (B[k - 1 - buf_stride] + B[k - 1 + buf_stride] + - B[k + 1 - buf_stride] + B[k + 1 + buf_stride]) * - 5; - const int32_t v = a * dgd[l] + b; - dst[m] = - ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS); - } - } else if (i != height - 1) { // odd row and not last - for (j = 0; j < width; ++j) { - const int k = i * buf_stride + j; - const int l = i * dgd_stride + j; - const int m = i * dst_stride + j; - const int nb = 6; - const int buf_stride2 = 2 * buf_stride; - const int32_t a = A[k] * 16 + (A[k - 1] + A[k + 1]) * 14 + - (A[k - buf_stride2] + A[k + buf_stride2]) * 4 + - (A[k - 1 - buf_stride2] + A[k - 1 + buf_stride2] + - A[k + 1 - buf_stride2] + A[k + 1 + buf_stride2]) * - 3; - const int32_t b = B[k] * 16 + (B[k - 1] + B[k + 1]) * 14 + - (B[k - buf_stride2] + B[k + buf_stride2]) * 4 + - (B[k - 1 - buf_stride2] + B[k - 1 + buf_stride2] + - B[k + 1 - buf_stride2] + B[k + 1 + buf_stride2]) * - 3; - const int32_t v = a * dgd[l] + b; - dst[m] = - ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS); + if (r == 1) { + for (i = 0; i < height; ++i) { + if (!(i & 1)) { // even row + for (j = 0; j < width; ++j) { + const int k = i * buf_stride + j; + const int l = i * dgd_stride + j; + const int m = i * dst_stride + j; + const int nb = 5; + const int32_t a = (A[k - buf_stride] + A[k + buf_stride]) * 6 + + (A[k - 1 - buf_stride] + A[k - 1 + buf_stride] + + A[k + 1 - buf_stride] + A[k + 1 + buf_stride]) * + 5; + const int32_t b = (B[k - buf_stride] + B[k + buf_stride]) * 6 + + (B[k - 1 - buf_stride] + B[k - 1 + buf_stride] + + B[k + 1 - buf_stride] + B[k + 1 + buf_stride]) * + 5; + const int32_t v = a * dgd[l] + b; + dst[m] = + ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS); + } + } else if (i != height - 1) { // odd row and not last + for (j = 0; j < width; ++j) { + const int k = i * buf_stride + j; + const int l = i * dgd_stride + j; + const int m = i * dst_stride + j; + const int nb = 6; + const int buf_stride2 = 2 * buf_stride; + const int32_t a = A[k] * 16 + (A[k - 1] + A[k + 1]) * 14 + + (A[k - buf_stride2] + A[k + buf_stride2]) * 4 + + (A[k - 1 - buf_stride2] + A[k - 1 + buf_stride2] + + A[k + 1 - buf_stride2] + A[k + 1 + buf_stride2]) * + 3; + const int32_t b = B[k] * 16 + (B[k - 1] + B[k + 1]) * 14 + + (B[k - buf_stride2] + B[k + buf_stride2]) * 4 + + (B[k - 1 - buf_stride2] + B[k - 1 + buf_stride2] + + B[k + 1 - buf_stride2] + B[k + 1 + buf_stride2]) * + 3; + const int32_t v = a * dgd[l] + b; + dst[m] = + ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS); + } + } else { // odd row and last + for (j = 0; j < width; ++j) { + const int k = i * buf_stride + j; + const int l = i * dgd_stride + j; + const int m = i * dst_stride + j; + const int nb = 6; + const int buf_stride2 = 2 * buf_stride; + const int32_t a = + A[k] * 18 + (A[k - 1] + A[k + 1]) * 16 + A[k - buf_stride2] * 6 + + (A[k - 1 - buf_stride2] + A[k + 1 - buf_stride2]) * 4; + const int32_t b = + B[k] * 18 + (B[k - 1] + B[k + 1]) * 16 + B[k - buf_stride2] * 6 + + (B[k - 1 - buf_stride2] + B[k + 1 - buf_stride2]) * 4; + const int32_t v = a * dgd[l] + b; + dst[m] = + ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS); + } } - } else { // odd row and last - for (j = 0; j < width; ++j) { - const int k = i * buf_stride + j; - const int l = i * dgd_stride + j; - const int m = i * dst_stride + j; - const int nb = 6; - const int buf_stride2 = 2 * buf_stride; - const int32_t a = A[k] * 18 + (A[k - 1] + A[k + 1]) * 16 + - A[k - buf_stride2] * 6 + - (A[k - 1 - buf_stride2] + A[k + 1 - buf_stride2]) * 4; - const int32_t b = B[k] * 18 + (B[k - 1] + B[k + 1]) * 16 + - B[k - buf_stride2] * 6 + - (B[k - 1 - buf_stride2] + B[k + 1 - buf_stride2]) * 4; - const int32_t v = a * dgd[l] + b; - dst[m] = - ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS); + } + } else { // r = 2 + for (i = 0; i < height; ++i) { + if (!(i & 1)) { // even row + for (j = 0; j < width; ++j) { + const int k = i * buf_stride + j; + const int l = i * dgd_stride + j; + const int m = i * dst_stride + j; + const int nb = 5; + const int32_t a = (A[k - buf_stride] + A[k + buf_stride]) * 6 + + (A[k - 1 - buf_stride] + A[k - 1 + buf_stride] + + A[k + 1 - buf_stride] + A[k + 1 + buf_stride]) * + 5; + const int32_t b = (B[k - buf_stride] + B[k + buf_stride]) * 6 + + (B[k - 1 - buf_stride] + B[k - 1 + buf_stride] + + B[k + 1 - buf_stride] + B[k + 1 + buf_stride]) * + 5; + const int32_t v = a * dgd[l] + b; + dst[m] = + ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS); + } + } else { // odd row + for (j = 0; j < width; ++j) { + const int k = i * buf_stride + j; + const int l = i * dgd_stride + j; + const int m = i * dst_stride + j; + const int nb = 4; + const int32_t a = A[k] * 6 + (A[k - 1] + A[k + 1]) * 5; + const int32_t b = B[k] * 6 + (B[k - 1] + B[k + 1]) * 5; + const int32_t v = a * dgd[l] + b; + dst[m] = + ROUND_POWER_OF_TWO(v, SGRPROJ_SGR_BITS + nb - SGRPROJ_RST_BITS); + } } } } @@ -1242,9 +1278,9 @@ void av1_selfguided_restoration_c(const uint8_t *dgd8, int width, int height, av1_selfguided_restoration_fast_internal(dgd32, width, height, dgd32_stride, flt1, flt_stride, bit_depth, params->r1, params->e1); - av1_selfguided_restoration_fast_internal(dgd32, width, height, dgd32_stride, - flt2, flt_stride, bit_depth, - params->r2, params->e2); + av1_selfguided_restoration_internal(dgd32, width, height, dgd32_stride, flt2, + flt_stride, bit_depth, params->r2, + params->e2); #else av1_selfguided_restoration_internal(dgd32, width, height, dgd32_stride, flt1, flt_stride, bit_depth, params->r1, @@ -1300,7 +1336,7 @@ static void sgrproj_filter_stripe(const RestorationUnitInfo *rui, for (int j = 0; j < stripe_width; j += procunit_width) { int w = AOMMIN(procunit_width, stripe_width - j); -#if CONFIG_FAST_SGR == 2 +#if CONFIG_FAST_SGR apply_selfguided_restoration_c(src + j, w, stripe_height, src_stride, rui->sgrproj_info.ep, rui->sgrproj_info.xqd, dst + j, dst_stride, tmpbuf, bit_depth, 0); @@ -1308,7 +1344,7 @@ static void sgrproj_filter_stripe(const RestorationUnitInfo *rui, apply_selfguided_restoration(src + j, w, stripe_height, src_stride, rui->sgrproj_info.ep, rui->sgrproj_info.xqd, dst + j, dst_stride, tmpbuf, bit_depth, 0); -#endif // CONFIG_FAST_SGR == 2 +#endif // CONFIG_FAST_SGR } } @@ -1344,7 +1380,7 @@ static void sgrproj_filter_stripe_highbd(const RestorationUnitInfo *rui, int32_t *tmpbuf, int bit_depth) { for (int j = 0; j < stripe_width; j += procunit_width) { int w = AOMMIN(procunit_width, stripe_width - j); -#if CONFIG_FAST_SGR == 2 +#if CONFIG_FAST_SGR apply_selfguided_restoration_c(src8 + j, w, stripe_height, src_stride, rui->sgrproj_info.ep, rui->sgrproj_info.xqd, dst8 + j, dst_stride, tmpbuf, bit_depth, 1); @@ -1352,7 +1388,7 @@ static void sgrproj_filter_stripe_highbd(const RestorationUnitInfo *rui, apply_selfguided_restoration(src8 + j, w, stripe_height, src_stride, rui->sgrproj_info.ep, rui->sgrproj_info.xqd, dst8 + j, dst_stride, tmpbuf, bit_depth, 1); -#endif // CONFIG_FAST_SGR == 2 +#endif // CONFIG_FAST_SGR } } diff --git a/av1/encoder/pickrst.c b/av1/encoder/pickrst.c index e8b40c068..2e18bb67b 100644 --- a/av1/encoder/pickrst.c +++ b/av1/encoder/pickrst.c @@ -353,7 +353,7 @@ static void sgr_filter_block(const sgr_params_type *params, const uint8_t *dat8, int width, int height, int dat_stride, int use_highbd, int bit_depth, int32_t *flt1, int32_t *flt2, int flt_stride) { -#if CONFIG_FAST_SGR == 2 +#if CONFIG_FAST_SGR av1_selfguided_restoration_c(dat8, width, height, dat_stride, flt1, flt2, flt_stride, params, bit_depth, use_highbd); #else -- GitLab