Commit 7d3bd8da authored by Imdad Sardharwalla's avatar Imdad Sardharwalla Committed by Debargha Mukherjee

Clean up SGR code and make consistent

Change-Id: I99c0cd287d154acc5063c92eb3ad4035bff8dad7
parent fdeb1169
......@@ -466,7 +466,7 @@ if (aom_config("CONFIG_LOOP_RESTORATION") eq "yes") {
add_proto qw/void apply_selfguided_restoration/, "const uint8_t *dat, int width, int height, int stride, int eps, const int *xqd, uint8_t *dst, int dst_stride, int32_t *tmpbuf, int bit_depth, int highbd";
specialize qw/apply_selfguided_restoration sse4_1 avx2/;
add_proto qw/void av1_selfguided_restoration/, "const uint8_t *dgd, int width, int height, int stride, int32_t *flt1, int32_t *flt2, int flt_stride, const sgr_params_type *params, int bit_depth, int highbd";
add_proto qw/void av1_selfguided_restoration/, "const uint8_t *dgd, int width, int height, int stride, int32_t *flt0, int32_t *flt1, int flt_stride, const sgr_params_type *params, int bit_depth, int highbd";
specialize qw/av1_selfguided_restoration sse4_1 avx2/;
}
......
......@@ -747,11 +747,11 @@ static void boxsum(int32_t *src, int width, int height, int src_stride, int r,
#if CONFIG_SKIP_SGR
void decode_xq(const int *xqd, int *xq, const sgr_params_type *params) {
if (params->r1 == 0) {
if (params->r0 == 0) {
assert(xqd[0] == 0);
xq[0] = 0;
xq[1] = (1 << SGRPROJ_PRJ_BITS) - xqd[1];
} else if (params->r2 == 0) {
} else if (params->r1 == 0) {
assert(xqd[1] == 0);
xq[0] = xqd[0];
xq[1] = 0;
......@@ -1051,7 +1051,7 @@ static void av1_selfguided_restoration_internal(int32_t *dgd, int width,
}
void av1_selfguided_restoration_c(const uint8_t *dgd8, int width, int height,
int dgd_stride, int32_t *flt1, int32_t *flt2,
int dgd_stride, int32_t *flt0, int32_t *flt1,
int flt_stride, const sgr_params_type *params,
int bit_depth, int highbd) {
int32_t dgd32_[RESTORATION_PROC_UNIT_PELS];
......@@ -1078,45 +1078,45 @@ void av1_selfguided_restoration_c(const uint8_t *dgd8, int width, int height,
// If params->r == 0 we skip the corresponding filter. We only allow one of
// the radii to be 0, as having both equal to 0 would be equivalent to
// skipping SGR entirely.
assert(!(params->r1 == 0 && params->r2 == 0));
assert(!(params->r0 == 0 && params->r1 == 0));
#if CONFIG_FAST_SGR
if (params->r1 > 0)
if (params->r0 > 0)
av1_selfguided_restoration_fast_internal(dgd32, width, height, dgd32_stride,
flt1, flt_stride, bit_depth,
params->r1, params->e1);
if (params->r2 > 0)
av1_selfguided_restoration_internal(dgd32, width, height, dgd32_stride,
flt2, flt_stride, bit_depth, params->r2,
params->e2);
#else
flt0, flt_stride, bit_depth,
params->r0, params->e0);
if (params->r1 > 0)
av1_selfguided_restoration_internal(dgd32, width, height, dgd32_stride,
flt1, flt_stride, bit_depth, params->r1,
params->e1);
#else
if (params->r0 > 0)
av1_selfguided_restoration_internal(dgd32, width, height, dgd32_stride,
flt0, flt_stride, bit_depth, params->r0,
params->e0);
if (params->r2 > 0)
if (params->r1 > 0)
av1_selfguided_restoration_internal(dgd32, width, height, dgd32_stride,
flt2, flt_stride, bit_depth, params->r2,
params->e2);
flt1, flt_stride, bit_depth, params->r1,
params->e1);
#endif // CONFIG_FAST_SGR
#else // CONFIG_SKIP_SGR
#if CONFIG_FAST_SGR
// r == 2 filter
av1_selfguided_restoration_fast_internal(dgd32, width, height, dgd32_stride,
flt1, flt_stride, bit_depth,
params->r1, params->e1);
flt0, flt_stride, bit_depth,
params->r0, params->e0);
// r == 1 filter
av1_selfguided_restoration_internal(dgd32, width, height, dgd32_stride, flt2,
flt_stride, bit_depth, params->r2,
params->e2);
av1_selfguided_restoration_internal(dgd32, width, height, dgd32_stride, flt1,
flt_stride, bit_depth, params->r1,
params->e1);
#else
av1_selfguided_restoration_internal(dgd32, width, height, dgd32_stride, flt0,
flt_stride, bit_depth, params->r0,
params->e0);
av1_selfguided_restoration_internal(dgd32, width, height, dgd32_stride, flt1,
flt_stride, bit_depth, params->r1,
params->e1);
av1_selfguided_restoration_internal(dgd32, width, height, dgd32_stride, flt2,
flt_stride, bit_depth, params->r2,
params->e2);
#endif // CONFIG_FAST_SGR
#endif // CONFIG_SKIP_SGR
}
......@@ -1126,18 +1126,18 @@ void apply_selfguided_restoration_c(const uint8_t *dat8, int width, int height,
uint8_t *dst8, int dst_stride,
int32_t *tmpbuf, int bit_depth,
int highbd) {
int32_t *flt1 = tmpbuf;
int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX;
int32_t *flt0 = tmpbuf;
int32_t *flt1 = flt0 + RESTORATION_TILEPELS_MAX;
assert(width * height <= RESTORATION_TILEPELS_MAX);
#if CONFIG_SKIP_SGR
const sgr_params_type *params = &sgr_params[eps];
av1_selfguided_restoration_c(dat8, width, height, stride, flt1, flt2, width,
av1_selfguided_restoration_c(dat8, width, height, stride, flt0, flt1, width,
params, bit_depth, highbd);
int xq[2];
decode_xq(xqd, xq, params);
#else // CONFIG_SKIP_SGR
av1_selfguided_restoration_c(dat8, width, height, stride, flt1, flt2, width,
av1_selfguided_restoration_c(dat8, width, height, stride, flt0, flt1, width,
&sgr_params[eps], bit_depth, highbd);
int xq[2];
decode_xq(xqd, xq);
......@@ -1154,11 +1154,11 @@ void apply_selfguided_restoration_c(const uint8_t *dat8, int width, int height,
int32_t v = u << SGRPROJ_PRJ_BITS;
// If params->r == 0 then we skipped the filtering in
// av1_selfguided_restoration_c, i.e. flt[k] == u
if (params->r1 > 0) v += xq[0] * (flt1[k] - u);
if (params->r2 > 0) v += xq[1] * (flt2[k] - u);
if (params->r0 > 0) v += xq[0] * (flt0[k] - u);
if (params->r1 > 0) v += xq[1] * (flt1[k] - u);
#else // CONFIG_SKIP_SGR
const int32_t f1 = flt1[k] - u;
const int32_t f2 = flt2[k] - u;
const int32_t f1 = flt0[k] - u;
const int32_t f2 = flt1[k] - u;
const int32_t v = xq[0] * f1 + xq[1] * f2 + (u << SGRPROJ_PRJ_BITS);
#endif // CONFIG_SKIP_SGR
const int16_t w =
......
......@@ -176,10 +176,10 @@ extern "C" {
#endif
typedef struct {
int r0;
int e0;
int r1;
int e1;
int r2;
int e2;
} sgr_params_type;
typedef struct {
......
......@@ -526,8 +526,8 @@ static void final_filter_fast(int32_t *dst, int dst_stride, const int32_t *A,
#endif
void av1_selfguided_restoration_avx2(const uint8_t *dgd8, int width, int height,
int dgd_stride, int32_t *flt1,
int32_t *flt2, int flt_stride,
int dgd_stride, int32_t *flt0,
int32_t *flt1, int flt_stride,
const sgr_params_type *params,
int bit_depth, int highbd) {
// The ALIGN_POWER_OF_TWO macro here ensures that column 1 of Atl, Btl,
......@@ -583,36 +583,36 @@ void av1_selfguided_restoration_avx2(const uint8_t *dgd8, int width, int height,
integral_images(dgd0, dgd_stride, width_ext, height_ext, Ctl, Dtl,
buf_stride);
// Write to flt1 and flt2
// Write to flt0 and flt1
#if CONFIG_SKIP_SGR
// If params->r == 0 we skip the corresponding filter. We only allow one of
// the radii to be 0, as having both equal to 0 would be equivalent to
// skipping SGR entirely.
assert(!(params->r1 == 0 && params->r2 == 0));
assert(!(params->r0 == 0 && params->r1 == 0));
#if CONFIG_FAST_SGR
assert(params->r0 < AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ));
assert(params->r1 < AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ));
assert(params->r2 < AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ));
if (params->r1 > 0) {
calc_ab_fast(A, B, C, D, width, height, buf_stride, params->e1, bit_depth,
params->r1);
final_filter_fast(flt1, flt_stride, A, B, buf_stride, dgd8, dgd_stride,
if (params->r0 > 0) {
calc_ab_fast(A, B, C, D, width, height, buf_stride, params->e0, bit_depth,
params->r0);
final_filter_fast(flt0, flt_stride, A, B, buf_stride, dgd8, dgd_stride,
width, height, highbd);
}
if (params->r2 > 0) {
calc_ab(A, B, C, D, width, height, buf_stride, params->e2, bit_depth,
params->r2);
final_filter(flt2, flt_stride, A, B, buf_stride, dgd8, dgd_stride, width,
if (params->r1 > 0) {
calc_ab(A, B, C, D, width, height, buf_stride, params->e1, bit_depth,
params->r1);
final_filter(flt1, flt_stride, A, B, buf_stride, dgd8, dgd_stride, width,
height, highbd);
}
#else // CONFIG_FAST_SGR
for (int i = 0; i < 2; ++i) {
int r = i ? params->r2 : params->r1;
int e = i ? params->e2 : params->e1;
int r = i ? params->r1 : params->r0;
int e = i ? params->e1 : params->e0;
if (r == 0) continue;
int32_t *flt = i ? flt2 : flt1;
int32_t *flt = i ? flt1 : flt0;
assert(r + 1 <= AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ));
......@@ -623,26 +623,26 @@ void av1_selfguided_restoration_avx2(const uint8_t *dgd8, int width, int height,
#endif // CONFIG_FAST_SGR
#else // CONFIG_SKIP_SGR
#if CONFIG_FAST_SGR
assert(params->r1 < AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ));
assert(params->r0 < AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ));
// r == 2 filter
assert(params->r1 == 2);
calc_ab_fast(A, B, C, D, width, height, buf_stride, params->e1, bit_depth,
params->r1);
final_filter_fast(flt1, flt_stride, A, B, buf_stride, dgd8, dgd_stride, width,
assert(params->r0 == 2);
calc_ab_fast(A, B, C, D, width, height, buf_stride, params->e0, bit_depth,
params->r0);
final_filter_fast(flt0, flt_stride, A, B, buf_stride, dgd8, dgd_stride, width,
height, highbd);
// r == 1 filter
assert(params->r2 == 1);
calc_ab(A, B, C, D, width, height, buf_stride, params->e2, bit_depth,
params->r2);
final_filter(flt2, flt_stride, A, B, buf_stride, dgd8, dgd_stride, width,
assert(params->r1 == 1);
calc_ab(A, B, C, D, width, height, buf_stride, params->e1, bit_depth,
params->r1);
final_filter(flt1, flt_stride, A, B, buf_stride, dgd8, dgd_stride, width,
height, highbd);
#else // CONFIG_FAST_SGR
for (int i = 0; i < 2; ++i) {
int r = i ? params->r2 : params->r1;
int e = i ? params->e2 : params->e1;
int32_t *flt = i ? flt2 : flt1;
int r = i ? params->r1 : params->r0;
int e = i ? params->e1 : params->e0;
int32_t *flt = i ? flt1 : flt0;
assert(r + 1 <= AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ));
......@@ -659,17 +659,17 @@ void apply_selfguided_restoration_avx2(const uint8_t *dat8, int width,
const int *xqd, uint8_t *dst8,
int dst_stride, int32_t *tmpbuf,
int bit_depth, int highbd) {
int32_t *flt1 = tmpbuf;
int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX;
int32_t *flt0 = tmpbuf;
int32_t *flt1 = flt0 + RESTORATION_TILEPELS_MAX;
assert(width * height <= RESTORATION_TILEPELS_MAX);
#if CONFIG_SKIP_SGR
const sgr_params_type *params = &sgr_params[eps];
av1_selfguided_restoration_avx2(dat8, width, height, stride, flt1, flt2,
av1_selfguided_restoration_avx2(dat8, width, height, stride, flt0, flt1,
width, params, bit_depth, highbd);
int xq[2];
decode_xq(xqd, xq, params);
#else // CONFIG_SKIP_SGR
av1_selfguided_restoration_avx2(dat8, width, height, stride, flt1, flt2,
av1_selfguided_restoration_avx2(dat8, width, height, stride, flt0, flt1,
width, &sgr_params[eps], bit_depth, highbd);
int xq[2];
decode_xq(xqd, xq);
......@@ -705,27 +705,27 @@ void apply_selfguided_restoration_avx2(const uint8_t *dat8, int width,
__m256i v_0 = _mm256_slli_epi32(u_0, SGRPROJ_PRJ_BITS);
__m256i v_1 = _mm256_slli_epi32(u_1, SGRPROJ_PRJ_BITS);
if (params->r1 > 0) {
const __m256i f1_0 = _mm256_sub_epi32(yy_loadu_256(&flt1[k]), u_0);
if (params->r0 > 0) {
const __m256i f1_0 = _mm256_sub_epi32(yy_loadu_256(&flt0[k]), u_0);
v_0 = _mm256_add_epi32(v_0, _mm256_mullo_epi32(xq0, f1_0));
const __m256i f1_1 = _mm256_sub_epi32(yy_loadu_256(&flt1[k + 8]), u_1);
const __m256i f1_1 = _mm256_sub_epi32(yy_loadu_256(&flt0[k + 8]), u_1);
v_1 = _mm256_add_epi32(v_1, _mm256_mullo_epi32(xq0, f1_1));
}
if (params->r2 > 0) {
const __m256i f2_0 = _mm256_sub_epi32(yy_loadu_256(&flt2[k]), u_0);
if (params->r1 > 0) {
const __m256i f2_0 = _mm256_sub_epi32(yy_loadu_256(&flt1[k]), u_0);
v_0 = _mm256_add_epi32(v_0, _mm256_mullo_epi32(xq1, f2_0));
const __m256i f2_1 = _mm256_sub_epi32(yy_loadu_256(&flt2[k + 8]), u_1);
const __m256i f2_1 = _mm256_sub_epi32(yy_loadu_256(&flt1[k + 8]), u_1);
v_1 = _mm256_add_epi32(v_1, _mm256_mullo_epi32(xq1, f2_1));
}
#else // CONFIG_SKIP_SGR
const __m256i f1_0 = _mm256_sub_epi32(yy_loadu_256(&flt1[k]), u_0);
const __m256i f1_1 = _mm256_sub_epi32(yy_loadu_256(&flt1[k + 8]), u_1);
const __m256i f1_0 = _mm256_sub_epi32(yy_loadu_256(&flt0[k]), u_0);
const __m256i f1_1 = _mm256_sub_epi32(yy_loadu_256(&flt0[k + 8]), u_1);
const __m256i f2_0 = _mm256_sub_epi32(yy_loadu_256(&flt2[k]), u_0);
const __m256i f2_1 = _mm256_sub_epi32(yy_loadu_256(&flt2[k + 8]), u_1);
const __m256i f2_0 = _mm256_sub_epi32(yy_loadu_256(&flt1[k]), u_0);
const __m256i f2_1 = _mm256_sub_epi32(yy_loadu_256(&flt1[k + 8]), u_1);
const __m256i v_0 =
_mm256_add_epi32(_mm256_add_epi32(_mm256_mullo_epi32(xq0, f1_0),
......
......@@ -431,10 +431,10 @@ static __m128i cross_sum_fast_odd_row(const int32_t *buf) {
// The final filter for the FAST_SGR self-guided restoration. Computes a
// weighted average across A, B with "cross sums" (see cross_sum_...
// implementations above).
static void final_filter_fast2(int32_t *dst, int dst_stride, const int32_t *A,
const int32_t *B, int buf_stride,
const void *dgd8, int dgd_stride, int width,
int height, int highbd) {
static void final_filter_fast(int32_t *dst, int dst_stride, const int32_t *A,
const int32_t *B, int buf_stride,
const void *dgd8, int dgd_stride, int width,
int height, int highbd) {
const int nb0 = 5;
const int nb1 = 4;
......@@ -486,7 +486,7 @@ static void final_filter_fast2(int32_t *dst, int dst_stride, const int32_t *A,
void av1_selfguided_restoration_sse4_1(const uint8_t *dgd8, int width,
int height, int dgd_stride,
int32_t *flt1, int32_t *flt2,
int32_t *flt0, int32_t *flt1,
int flt_stride,
const sgr_params_type *params,
int bit_depth, int highbd) {
......@@ -538,36 +538,36 @@ void av1_selfguided_restoration_sse4_1(const uint8_t *dgd8, int width,
integral_images(dgd0, dgd_stride, width_ext, height_ext, Ctl, Dtl,
buf_stride);
// Write to flt1 and flt2
// Write to flt0 and flt1
#if CONFIG_SKIP_SGR
// If params->r == 0 we skip the corresponding filter. We only allow one of
// the radii to be 0, as having both equal to 0 would be equivalent to
// skipping SGR entirely.
assert(!(params->r1 == 0 && params->r2 == 0));
assert(!(params->r0 == 0 && params->r1 == 0));
#if CONFIG_FAST_SGR
assert(params->r0 < AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ));
assert(params->r1 < AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ));
assert(params->r2 < AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ));
if (params->r1 > 0) {
calc_ab_fast(A, B, C, D, width, height, buf_stride, params->e1, bit_depth,
params->r1);
final_filter_fast2(flt1, flt_stride, A, B, buf_stride, dgd8, dgd_stride,
width, height, highbd);
if (params->r0 > 0) {
calc_ab_fast(A, B, C, D, width, height, buf_stride, params->e0, bit_depth,
params->r0);
final_filter_fast(flt0, flt_stride, A, B, buf_stride, dgd8, dgd_stride,
width, height, highbd);
}
if (params->r2 > 0) {
calc_ab(A, B, C, D, width, height, buf_stride, params->e2, bit_depth,
params->r2);
final_filter(flt2, flt_stride, A, B, buf_stride, dgd8, dgd_stride, width,
if (params->r1 > 0) {
calc_ab(A, B, C, D, width, height, buf_stride, params->e1, bit_depth,
params->r1);
final_filter(flt1, flt_stride, A, B, buf_stride, dgd8, dgd_stride, width,
height, highbd);
}
#else // CONFIG_FAST_SGR
for (int i = 0; i < 2; ++i) {
int r = i ? params->r2 : params->r1;
int e = i ? params->e2 : params->e1;
int r = i ? params->r1 : params->r0;
int e = i ? params->e1 : params->e0;
if (r == 0) continue;
int32_t *flt = i ? flt2 : flt1;
int32_t *flt = i ? flt1 : flt0;
assert(r + 1 <= AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ));
......@@ -578,26 +578,26 @@ void av1_selfguided_restoration_sse4_1(const uint8_t *dgd8, int width,
#endif // CONFIG_FAST_SGR
#else // CONFIG_SKIP_SGR
#if CONFIG_FAST_SGR
assert(params->r1 < AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ));
assert(params->r0 < AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ));
// r == 2 filter
assert(params->r1 == 2);
calc_ab_fast(A, B, C, D, width, height, buf_stride, params->e1, bit_depth,
params->r1);
final_filter_fast2(flt1, flt_stride, A, B, buf_stride, dgd8, dgd_stride,
width, height, highbd);
assert(params->r0 == 2);
calc_ab_fast(A, B, C, D, width, height, buf_stride, params->e0, bit_depth,
params->r0);
final_filter_fast(flt0, flt_stride, A, B, buf_stride, dgd8, dgd_stride, width,
height, highbd);
// r == 1 filter
assert(params->r2 == 1);
calc_ab(A, B, C, D, width, height, buf_stride, params->e2, bit_depth,
params->r2);
final_filter(flt2, flt_stride, A, B, buf_stride, dgd8, dgd_stride, width,
assert(params->r1 == 1);
calc_ab(A, B, C, D, width, height, buf_stride, params->e1, bit_depth,
params->r1);
final_filter(flt1, flt_stride, A, B, buf_stride, dgd8, dgd_stride, width,
height, highbd);
#else // CONFIG_FAST_SGR
for (int i = 0; i < 2; ++i) {
int r = i ? params->r2 : params->r1;
int e = i ? params->e2 : params->e1;
int32_t *flt = i ? flt2 : flt1;
int r = i ? params->r1 : params->r0;
int e = i ? params->e1 : params->e0;
int32_t *flt = i ? flt1 : flt0;
assert(r + 1 <= AOMMIN(SGRPROJ_BORDER_VERT, SGRPROJ_BORDER_HORZ));
......@@ -614,17 +614,17 @@ void apply_selfguided_restoration_sse4_1(const uint8_t *dat8, int width,
const int *xqd, uint8_t *dst8,
int dst_stride, int32_t *tmpbuf,
int bit_depth, int highbd) {
int32_t *flt1 = tmpbuf;
int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX;
int32_t *flt0 = tmpbuf;
int32_t *flt1 = flt0 + RESTORATION_TILEPELS_MAX;
assert(width * height <= RESTORATION_TILEPELS_MAX);
#if CONFIG_SKIP_SGR
const sgr_params_type *params = &sgr_params[eps];
av1_selfguided_restoration_sse4_1(dat8, width, height, stride, flt1, flt2,
av1_selfguided_restoration_sse4_1(dat8, width, height, stride, flt0, flt1,
width, params, bit_depth, highbd);
int xq[2];
decode_xq(xqd, xq, params);
#else // CONFIG_SKIP_SGR
av1_selfguided_restoration_sse4_1(dat8, width, height, stride, flt1, flt2,
av1_selfguided_restoration_sse4_1(dat8, width, height, stride, flt0, flt1,
width, &sgr_params[eps], bit_depth, highbd);
int xq[2];
decode_xq(xqd, xq);
......@@ -655,26 +655,26 @@ void apply_selfguided_restoration_sse4_1(const uint8_t *dat8, int width,
__m128i v_0 = _mm_slli_epi32(u_0, SGRPROJ_PRJ_BITS);
__m128i v_1 = _mm_slli_epi32(u_1, SGRPROJ_PRJ_BITS);
if (params->r1 > 0) {
const __m128i f1_0 = _mm_sub_epi32(xx_loadu_128(&flt1[k]), u_0);
if (params->r0 > 0) {
const __m128i f1_0 = _mm_sub_epi32(xx_loadu_128(&flt0[k]), u_0);
v_0 = _mm_add_epi32(v_0, _mm_mullo_epi32(xq0, f1_0));
const __m128i f1_1 = _mm_sub_epi32(xx_loadu_128(&flt1[k + 4]), u_1);
const __m128i f1_1 = _mm_sub_epi32(xx_loadu_128(&flt0[k + 4]), u_1);
v_1 = _mm_add_epi32(v_1, _mm_mullo_epi32(xq0, f1_1));
}
if (params->r2 > 0) {
const __m128i f2_0 = _mm_sub_epi32(xx_loadu_128(&flt2[k]), u_0);
if (params->r1 > 0) {
const __m128i f2_0 = _mm_sub_epi32(xx_loadu_128(&flt1[k]), u_0);
v_0 = _mm_add_epi32(v_0, _mm_mullo_epi32(xq1, f2_0));
const __m128i f2_1 = _mm_sub_epi32(xx_loadu_128(&flt2[k + 4]), u_1);
const __m128i f2_1 = _mm_sub_epi32(xx_loadu_128(&flt1[k + 4]), u_1);
v_1 = _mm_add_epi32(v_1, _mm_mullo_epi32(xq1, f2_1));
}
#else // CONFIG_SKIP_SGR
const __m128i f1_0 = _mm_sub_epi32(xx_loadu_128(&flt1[k]), u_0);
const __m128i f2_0 = _mm_sub_epi32(xx_loadu_128(&flt2[k]), u_0);
const __m128i f1_1 = _mm_sub_epi32(xx_loadu_128(&flt1[k + 4]), u_1);
const __m128i f2_1 = _mm_sub_epi32(xx_loadu_128(&flt2[k + 4]), u_1);
const __m128i f1_0 = _mm_sub_epi32(xx_loadu_128(&flt0[k]), u_0);
const __m128i f2_0 = _mm_sub_epi32(xx_loadu_128(&flt1[k]), u_0);
const __m128i f1_1 = _mm_sub_epi32(xx_loadu_128(&flt0[k + 4]), u_1);
const __m128i f2_1 = _mm_sub_epi32(xx_loadu_128(&flt1[k + 4]), u_1);
const __m128i v_0 = _mm_add_epi32(
_mm_add_epi32(_mm_mullo_epi32(xq0, f1_0), _mm_mullo_epi32(xq1, f2_0)),
......
......@@ -917,14 +917,14 @@ static void read_sgrproj_filter(SgrprojInfo *sgrproj_info,
sgrproj_info->ep = aom_read_literal(rb, SGRPROJ_PARAMS_BITS, ACCT_STR);
const sgr_params_type *params = &sgr_params[sgrproj_info->ep];
if (params->r1 == 0) {
if (params->r0 == 0) {
sgrproj_info->xqd[0] = 0;
sgrproj_info->xqd[1] =
aom_read_primitive_refsubexpfin(
rb, SGRPROJ_PRJ_MAX1 - SGRPROJ_PRJ_MIN1 + 1, SGRPROJ_PRJ_SUBEXP_K,
ref_sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1, ACCT_STR) +
SGRPROJ_PRJ_MIN1;
} else if (params->r2 == 0) {
} else if (params->r1 == 0) {
sgrproj_info->xqd[0] =
aom_read_primitive_refsubexpfin(
rb, SGRPROJ_PRJ_MAX0 - SGRPROJ_PRJ_MIN0 + 1, SGRPROJ_PRJ_SUBEXP_K,
......
......@@ -2194,13 +2194,13 @@ static void write_sgrproj_filter(const SgrprojInfo *sgrproj_info,
aom_write_literal(wb, sgrproj_info->ep, SGRPROJ_PARAMS_BITS);
const sgr_params_type *params = &sgr_params[sgrproj_info->ep];
if (params->r1 == 0) {
if (params->r0 == 0) {
assert(sgrproj_info->xqd[0] == 0);
aom_write_primitive_refsubexpfin(
wb, SGRPROJ_PRJ_MAX1 - SGRPROJ_PRJ_MIN1 + 1, SGRPROJ_PRJ_SUBEXP_K,
ref_sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1,
sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1);
} else if (params->r2 == 0) {
} else if (params->r1 == 0) {
aom_write_primitive_refsubexpfin(
wb, SGRPROJ_PRJ_MAX0 - SGRPROJ_PRJ_MIN0 + 1, SGRPROJ_PRJ_SUBEXP_K,
ref_sgrproj_info->xqd[0] - SGRPROJ_PRJ_MIN0,
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment