Commit 81307a33 authored by Imdad Sardharwalla's avatar Imdad Sardharwalla Committed by Debargha Mukherjee

Add a config flag/code for SKIP_SGR computation

SGR currently calculates two filters with different radii. This patch
adds the experiment SKIP_SGR in which a filter is skipped if the radius
of the filter is set to 0.

SSE4.1 and SIMD code has also been updated.

Change-Id: I77e879e3636bfbacab2b6fc2ab426f35000c8e92
parent 20be5450
......@@ -27,7 +27,10 @@
#include "aom_ports/mem.h"
const sgr_params_type sgr_params[SGRPROJ_PARAMS] = {
// r1, eps1, r2, eps2
// r1, eps1, r2, eps2
#if CONFIG_SKIP_SGR
// Setting r = 0 skips the filter
#endif // CONFIG_SKIP_SGR
{ 2, 12, 1, 4 }, { 2, 15, 1, 6 }, { 2, 18, 1, 8 }, { 2, 20, 1, 9 },
{ 2, 22, 1, 10 }, { 2, 25, 1, 11 }, { 2, 35, 1, 12 }, { 2, 45, 1, 13 },
{ 2, 55, 1, 14 }, { 2, 65, 1, 15 }, { 2, 75, 1, 16 }, { 2, 30, 1, 6 },
......@@ -1049,6 +1052,40 @@ void av1_selfguided_restoration_c(const uint8_t *dgd8, int width, int height,
}
}
#if CONFIG_SKIP_SGR
// If params->r == 0 we skip the corresponding filter. We only allow one of
// the radii to be 0, as having both equal to 0 would be equivalent to
// skipping SGR entirely.
assert(!(params->r1 == 0 && params->r2 == 0));
#if CONFIG_FAST_SGR
if (params->r1 > 0) {
// r == 2 filter
assert(params->r1 == 2);
av1_selfguided_restoration_fast_internal(dgd32, width, height, dgd32_stride,
flt1, flt_stride, bit_depth,
params->r1, params->e1);
}
if (params->r2 > 0) {
// r == 1 filter
assert(params->r2 == 1);
av1_selfguided_restoration_internal(dgd32, width, height, dgd32_stride,
flt2, flt_stride, bit_depth, params->r2,
params->e2);
}
#else
if (params->r1 > 0)
av1_selfguided_restoration_internal(dgd32, width, height, dgd32_stride,
flt1, flt_stride, bit_depth, params->r1,
params->e1);
if (params->r2 > 0)
av1_selfguided_restoration_internal(dgd32, width, height, dgd32_stride,
flt2, flt_stride, bit_depth, params->r2,
params->e2);
#endif // CONFIG_FAST_SGR
#else // CONFIG_SKIP_SGR
#if CONFIG_FAST_SGR
// r == 2 filter
av1_selfguided_restoration_fast_internal(dgd32, width, height, dgd32_stride,
......@@ -1066,6 +1103,7 @@ void av1_selfguided_restoration_c(const uint8_t *dgd8, int width, int height,
flt_stride, bit_depth, params->r2,
params->e2);
#endif // CONFIG_FAST_SGR
#endif // CONFIG_SKIP_SGR
}
void apply_selfguided_restoration_c(const uint8_t *dat8, int width, int height,
......@@ -1077,8 +1115,15 @@ void apply_selfguided_restoration_c(const uint8_t *dat8, int width, int height,
int32_t *flt1 = tmpbuf;
int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX;
assert(width * height <= RESTORATION_TILEPELS_MAX);
#if CONFIG_SKIP_SGR
const sgr_params_type *params = &sgr_params[eps];
av1_selfguided_restoration_c(dat8, width, height, stride, flt1, flt2, width,
params, bit_depth, highbd);
#else // CONFIG_SKIP_SGR
av1_selfguided_restoration_c(dat8, width, height, stride, flt1, flt2, width,
&sgr_params[eps], bit_depth, highbd);
#endif // CONFIG_SKIP_SGR
decode_xq(xqd, xq);
for (int i = 0; i < height; ++i) {
for (int j = 0; j < width; ++j) {
......@@ -1088,9 +1133,17 @@ void apply_selfguided_restoration_c(const uint8_t *dat8, int width, int height,
const uint16_t pre_u = highbd ? *CONVERT_TO_SHORTPTR(dat8ij) : *dat8ij;
const int32_t u = (int32_t)pre_u << SGRPROJ_RST_BITS;
#if CONFIG_SKIP_SGR
int32_t v = u << SGRPROJ_PRJ_BITS;
// If params->r == 0 then we skipped the filtering in
// av1_selfguided_restoration_c, i.e. flt[k] == u
if (params->r1 > 0) v += xq[0] * (flt1[k] - u);
if (params->r2 > 0) v += xq[1] * (flt2[k] - u);
#else // CONFIG_SKIP_SGR
const int32_t f1 = flt1[k] - u;
const int32_t f2 = flt2[k] - u;
const int32_t v = xq[0] * f1 + xq[1] * f2 + (u << SGRPROJ_PRJ_BITS);
#endif // CONFIG_SKIP_SGR
const int16_t w =
(int16_t)ROUND_POWER_OF_TWO(v, SGRPROJ_PRJ_BITS + SGRPROJ_RST_BITS);
......
......@@ -584,6 +584,48 @@ void av1_selfguided_restoration_avx2(const uint8_t *dgd8, int width, int height,
buf_stride);
// Write to flt1 and flt2
#if CONFIG_SKIP_SGR
// If params->r == 0 we skip the corresponding filter. We only allow one of
// the radii to be 0, as having both equal to 0 would be equivalent to
// skipping SGR entirely.
assert(!(params->r1 == 0 && params->r2 == 0));
#if CONFIG_FAST_SGR
assert(params->r1 < AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ));
assert(params->r2 < AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ));
if (params->r1 > 0) {
// r == 2 filter
assert(params->r1 == 2);
calc_ab_fast(A, B, C, D, width, height, buf_stride, params->e1, bit_depth,
params->r1);
final_filter_fast(flt1, flt_stride, A, B, buf_stride, dgd8, dgd_stride,
width, height, highbd);
}
if (params->r2 > 0) {
// r == 1 filter
assert(params->r2 == 1);
calc_ab(A, B, C, D, width, height, buf_stride, params->e2, bit_depth,
params->r2);
final_filter(flt2, flt_stride, A, B, buf_stride, dgd8, dgd_stride, width,
height, highbd);
}
#else // CONFIG_FAST_SGR
for (int i = 0; i < 2; ++i) {
int r = i ? params->r2 : params->r1;
int e = i ? params->e2 : params->e1;
if (r == 0) continue;
int32_t *flt = i ? flt2 : flt1;
assert(r + 1 <= AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ));
calc_ab(A, B, C, D, width, height, buf_stride, e, bit_depth, r);
final_filter(flt, flt_stride, A, B, buf_stride, dgd8, dgd_stride, width,
height, highbd);
}
#endif // CONFIG_FAST_SGR
#else // CONFIG_SKIP_SGR
#if CONFIG_FAST_SGR
assert(params->r1 < AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ));
......@@ -600,7 +642,7 @@ void av1_selfguided_restoration_avx2(const uint8_t *dgd8, int width, int height,
params->r2);
final_filter(flt2, flt_stride, A, B, buf_stride, dgd8, dgd_stride, width,
height, highbd);
#else
#else // CONFIG_FAST_SGR
for (int i = 0; i < 2; ++i) {
int r = i ? params->r2 : params->r1;
int e = i ? params->e2 : params->e1;
......@@ -612,7 +654,8 @@ void av1_selfguided_restoration_avx2(const uint8_t *dgd8, int width, int height,
final_filter(flt, flt_stride, A, B, buf_stride, dgd8, dgd_stride, width,
height, highbd);
}
#endif
#endif // CONFIG_FAST_SGR
#endif // CONFIG_SKIP_SGR
}
void apply_selfguided_restoration_avx2(const uint8_t *dat8, int width,
......@@ -623,8 +666,14 @@ void apply_selfguided_restoration_avx2(const uint8_t *dat8, int width,
int32_t *flt1 = tmpbuf;
int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX;
assert(width * height <= RESTORATION_TILEPELS_MAX);
#if CONFIG_SKIP_SGR
const sgr_params_type *params = &sgr_params[eps];
av1_selfguided_restoration_avx2(dat8, width, height, stride, flt1, flt2,
width, params, bit_depth, highbd);
#else // CONFIG_SKIP_SGR
av1_selfguided_restoration_avx2(dat8, width, height, stride, flt1, flt2,
width, &sgr_params[eps], bit_depth, highbd);
#endif // CONFIG_SKIP_SGR
int xq[2];
decode_xq(xqd, xq);
......@@ -655,6 +704,26 @@ void apply_selfguided_restoration_avx2(const uint8_t *dat8, int width,
const __m256i u_0 = _mm256_slli_epi32(ep_0, SGRPROJ_RST_BITS);
const __m256i u_1 = _mm256_slli_epi32(ep_1, SGRPROJ_RST_BITS);
#if CONFIG_SKIP_SGR
__m256i v_0 = _mm256_slli_epi32(u_0, SGRPROJ_PRJ_BITS);
__m256i v_1 = _mm256_slli_epi32(u_1, SGRPROJ_PRJ_BITS);
if (params->r1 > 0) {
const __m256i f1_0 = _mm256_sub_epi32(yy_loadu_256(&flt1[k]), u_0);
v_0 = _mm256_add_epi32(v_0, _mm256_mullo_epi32(xq0, f1_0));
const __m256i f1_1 = _mm256_sub_epi32(yy_loadu_256(&flt1[k + 8]), u_1);
v_1 = _mm256_add_epi32(v_1, _mm256_mullo_epi32(xq0, f1_1));
}
if (params->r2 > 0) {
const __m256i f2_0 = _mm256_sub_epi32(yy_loadu_256(&flt2[k]), u_0);
v_0 = _mm256_add_epi32(v_0, _mm256_mullo_epi32(xq1, f2_0));
const __m256i f2_1 = _mm256_sub_epi32(yy_loadu_256(&flt2[k + 8]), u_1);
v_1 = _mm256_add_epi32(v_1, _mm256_mullo_epi32(xq1, f2_1));
}
#else // CONFIG_SKIP_SGR
const __m256i f1_0 = _mm256_sub_epi32(yy_loadu_256(&flt1[k]), u_0);
const __m256i f1_1 = _mm256_sub_epi32(yy_loadu_256(&flt1[k + 8]), u_1);
......@@ -669,6 +738,7 @@ void apply_selfguided_restoration_avx2(const uint8_t *dat8, int width,
_mm256_add_epi32(_mm256_add_epi32(_mm256_mullo_epi32(xq0, f1_1),
_mm256_mullo_epi32(xq1, f2_1)),
_mm256_slli_epi32(u_1, SGRPROJ_PRJ_BITS));
#endif // CONFIG_SKIP_SGR
const __m256i rounding =
round_for_shift(SGRPROJ_PRJ_BITS + SGRPROJ_RST_BITS);
......
......@@ -539,6 +539,48 @@ void av1_selfguided_restoration_sse4_1(const uint8_t *dgd8, int width,
buf_stride);
// Write to flt1 and flt2
#if CONFIG_SKIP_SGR
// If params->r == 0 we skip the corresponding filter. We only allow one of
// the radii to be 0, as having both equal to 0 would be equivalent to
// skipping SGR entirely.
assert(!(params->r1 == 0 && params->r2 == 0));
#if CONFIG_FAST_SGR
assert(params->r1 < AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ));
assert(params->r2 < AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ));
if (params->r1 > 0) {
// r == 2 filter
assert(params->r1 == 2);
calc_ab_fast(A, B, C, D, width, height, buf_stride, params->e1, bit_depth,
params->r1);
final_filter_fast2(flt1, flt_stride, A, B, buf_stride, dgd8, dgd_stride,
width, height, highbd);
}
if (params->r2 > 0) {
// r == 1 filter
assert(params->r2 == 1);
calc_ab(A, B, C, D, width, height, buf_stride, params->e2, bit_depth,
params->r2);
final_filter(flt2, flt_stride, A, B, buf_stride, dgd8, dgd_stride, width,
height, highbd);
}
#else // CONFIG_FAST_SGR
for (int i = 0; i < 2; ++i) {
int r = i ? params->r2 : params->r1;
int e = i ? params->e2 : params->e1;
if (r == 0) continue;
int32_t *flt = i ? flt2 : flt1;
assert(r + 1 <= AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ));
calc_ab(A, B, C, D, width, height, buf_stride, e, bit_depth, r);
final_filter(flt, flt_stride, A, B, buf_stride, dgd8, dgd_stride, width,
height, highbd);
}
#endif // CONFIG_FAST_SGR
#else // CONFIG_SKIP_SGR
#if CONFIG_FAST_SGR
assert(params->r1 < AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ));
......@@ -555,7 +597,7 @@ void av1_selfguided_restoration_sse4_1(const uint8_t *dgd8, int width,
params->r2);
final_filter(flt2, flt_stride, A, B, buf_stride, dgd8, dgd_stride, width,
height, highbd);
#else
#else // CONFIG_FAST_SGR
for (int i = 0; i < 2; ++i) {
int r = i ? params->r2 : params->r1;
int e = i ? params->e2 : params->e1;
......@@ -567,7 +609,8 @@ void av1_selfguided_restoration_sse4_1(const uint8_t *dgd8, int width,
final_filter(flt, flt_stride, A, B, buf_stride, dgd8, dgd_stride, width,
height, highbd);
}
#endif
#endif // CONFIG_FAST_SGR
#endif // CONFIG_SKIP_SGR
}
void apply_selfguided_restoration_sse4_1(const uint8_t *dat8, int width,
......@@ -578,8 +621,14 @@ void apply_selfguided_restoration_sse4_1(const uint8_t *dat8, int width,
int32_t *flt1 = tmpbuf;
int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX;
assert(width * height <= RESTORATION_TILEPELS_MAX);
#if CONFIG_SKIP_SGR
const sgr_params_type *params = &sgr_params[eps];
av1_selfguided_restoration_sse4_1(dat8, width, height, stride, flt1, flt2,
width, params, bit_depth, highbd);
#else // CONFIG_SKIP_SGR
av1_selfguided_restoration_sse4_1(dat8, width, height, stride, flt1, flt2,
width, &sgr_params[eps], bit_depth, highbd);
#endif // CONFIG_SKIP_SGR
int xq[2];
decode_xq(xqd, xq);
......@@ -605,6 +654,26 @@ void apply_selfguided_restoration_sse4_1(const uint8_t *dat8, int width,
const __m128i u_0 = _mm_cvtepu16_epi32(u);
const __m128i u_1 = _mm_cvtepu16_epi32(_mm_srli_si128(u, 8));
#if CONFIG_SKIP_SGR
__m128i v_0 = _mm_slli_epi32(u_0, SGRPROJ_PRJ_BITS);
__m128i v_1 = _mm_slli_epi32(u_1, SGRPROJ_PRJ_BITS);
if (params->r1 > 0) {
const __m128i f1_0 = _mm_sub_epi32(xx_loadu_128(&flt1[k]), u_0);
v_0 = _mm_add_epi32(v_0, _mm_mullo_epi32(xq0, f1_0));
const __m128i f1_1 = _mm_sub_epi32(xx_loadu_128(&flt1[k + 4]), u_1);
v_1 = _mm_add_epi32(v_1, _mm_mullo_epi32(xq0, f1_1));
}
if (params->r2 > 0) {
const __m128i f2_0 = _mm_sub_epi32(xx_loadu_128(&flt2[k]), u_0);
v_0 = _mm_add_epi32(v_0, _mm_mullo_epi32(xq1, f2_0));
const __m128i f2_1 = _mm_sub_epi32(xx_loadu_128(&flt2[k + 4]), u_1);
v_1 = _mm_add_epi32(v_1, _mm_mullo_epi32(xq1, f2_1));
}
#else // CONFIG_SKIP_SGR
const __m128i f1_0 = _mm_sub_epi32(xx_loadu_128(&flt1[k]), u_0);
const __m128i f2_0 = _mm_sub_epi32(xx_loadu_128(&flt2[k]), u_0);
const __m128i f1_1 = _mm_sub_epi32(xx_loadu_128(&flt1[k + 4]), u_1);
......@@ -616,6 +685,7 @@ void apply_selfguided_restoration_sse4_1(const uint8_t *dat8, int width,
const __m128i v_1 = _mm_add_epi32(
_mm_add_epi32(_mm_mullo_epi32(xq0, f1_1), _mm_mullo_epi32(xq1, f2_1)),
_mm_slli_epi32(u_1, SGRPROJ_PRJ_BITS));
#endif // CONFIG_SKIP_SGR
const __m128i rounding =
round_for_shift(SGRPROJ_PRJ_BITS + SGRPROJ_RST_BITS);
......
......@@ -148,6 +148,7 @@ set(CONFIG_SCALABILITY 0 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_SEGMENT_GLOBALMV 1 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_SEGMENT_PRED_LAST 1 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_SHORT_FILTER 1 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_SKIP_SGR 0 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_SPATIAL_SEGMENTATION 1 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_TILE_INFO_FIRST 0 CACHE NUMBER "AV1 experiment flag.")
set(CONFIG_TIMING_INFO_IN_SEQ_HEADERS 0 CACHE NUMBER "AV1 experiment flag.")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment