Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Xiph.Org
aom-rav1e
Commits
127b562a
Commit
127b562a
authored
Jan 25, 2018
by
Debargha Mukherjee
Browse files
Adjust last odd row weight in fast_sgr
Change-Id: I2348a7c6a3553bbbb0d061820a7c546a1a0367df
parent
6cd8e177
Changes
3
Hide whitespace changes
Inline
Side-by-side
av1/common/restoration.c
View file @
127b562a
...
...
@@ -916,12 +916,12 @@ static void av1_selfguided_restoration_fast_internal(
const
int
m
=
i
*
dst_stride
+
j
;
const
int
nb
=
6
;
const
int
buf_stride2
=
2
*
buf_stride
;
const
int32_t
a
=
A
[
k
]
*
1
6
+
(
A
[
k
-
1
]
+
A
[
k
+
1
])
*
1
4
+
A
[
k
-
buf_stride2
]
*
8
+
(
A
[
k
-
1
-
buf_stride2
]
+
A
[
k
+
1
-
buf_stride2
])
*
6
;
const
int32_t
b
=
B
[
k
]
*
1
6
+
(
B
[
k
-
1
]
+
B
[
k
+
1
])
*
1
4
+
B
[
k
-
buf_stride2
]
*
8
+
(
B
[
k
-
1
-
buf_stride2
]
+
B
[
k
+
1
-
buf_stride2
])
*
6
;
const
int32_t
a
=
A
[
k
]
*
1
8
+
(
A
[
k
-
1
]
+
A
[
k
+
1
])
*
1
6
+
A
[
k
-
buf_stride2
]
*
6
+
(
A
[
k
-
1
-
buf_stride2
]
+
A
[
k
+
1
-
buf_stride2
])
*
4
;
const
int32_t
b
=
B
[
k
]
*
1
8
+
(
B
[
k
-
1
]
+
B
[
k
+
1
])
*
1
6
+
B
[
k
-
buf_stride2
]
*
6
+
(
B
[
k
-
1
-
buf_stride2
]
+
B
[
k
+
1
-
buf_stride2
])
*
4
;
const
int32_t
v
=
a
*
dgd
[
l
]
+
b
;
dst
[
m
]
=
ROUND_POWER_OF_TWO
(
v
,
SGRPROJ_SGR_BITS
+
nb
-
SGRPROJ_RST_BITS
);
...
...
av1/common/x86/selfguided_avx2.c
View file @
127b562a
...
...
@@ -499,21 +499,21 @@ static __m256i cross_sum_fast_odd_not_last(const int32_t *buf, int stride) {
// xl x xr
//
// Pixels are weighted like this:
//
6
8
6
//
4
6
4
// 0 0 0
// 1
4
1
6
1
4
// 1
6
1
8
1
6
//
// buf points to x
//
//
sixe
s = xtl + xtr
//
eight
s = xt
//
four
teens = xl + xr
//
six
teens = x
// cross_sum =
6
*
sixe
s +
8
*
eight
s + 1
4
*
four
teens + 1
6
*
six
teens
// =
8
* (
sixes + eight
s) + 16 * (sixteens +
four
teens)
//
-
2 * (sixes +
four
teens)
// = (
sixes + eight
s) <<
3
+ (sixteens +
four
teens) << 4
//
-
(sixes +
four
teens) << 1
//
four
s = xtl + xtr
//
sixe
s = xt
//
six
teens = xl + xr
//
eigh
teens = x
// cross_sum =
4
*
four
s +
6
*
sixe
s + 1
6
*
six
teens + 1
8
*
eigh
teens
// =
4
* (
fours + sixe
s) + 16 * (sixteens +
eigh
teens)
//
+
2 * (sixes +
eigh
teens)
// = (
fours + sixe
s) <<
2
+ (sixteens +
eigh
teens) << 4
//
+
(sixes +
eigh
teens) << 1
static
__m256i
cross_sum_fast_odd_last
(
const
int32_t
*
buf
,
int
stride
)
{
const
int
two_stride
=
2
*
stride
;
const
__m256i
xtl
=
yy_loadu_256
(
buf
-
1
-
two_stride
);
...
...
@@ -523,19 +523,19 @@ static __m256i cross_sum_fast_odd_last(const int32_t *buf, int stride) {
const
__m256i
x
=
yy_loadu_256
(
buf
);
const
__m256i
xr
=
yy_loadu_256
(
buf
+
1
);
const
__m256i
sixe
s
=
_mm256_add_epi32
(
xtl
,
xtr
);
const
__m256i
eight
s
=
xt
;
const
__m256i
four
teens
=
_mm256_add_epi32
(
xl
,
xr
);
const
__m256i
six
teens
=
x
;
const
__m256i
four
s
=
_mm256_add_epi32
(
xtl
,
xtr
);
const
__m256i
sixe
s
=
xt
;
const
__m256i
six
teens
=
_mm256_add_epi32
(
xl
,
xr
);
const
__m256i
eigh
teens
=
x
;
const
__m256i
sixe
s_plus_
eight
s
=
_mm256_add_epi32
(
sixes
,
eight
s
);
const
__m256i
sixteens_plus_
four
teens
=
_mm256_add_epi32
(
sixteens
,
four
teens
);
const
__m256i
sixes_plus_
four
teens
=
_mm256_add_epi32
(
sixes
,
four
teens
);
const
__m256i
four
s_plus_
sixe
s
=
_mm256_add_epi32
(
fours
,
sixe
s
);
const
__m256i
sixteens_plus_
eigh
teens
=
_mm256_add_epi32
(
sixteens
,
eigh
teens
);
const
__m256i
sixes_plus_
eigh
teens
=
_mm256_add_epi32
(
sixes
,
eigh
teens
);
return
_mm256_
sub
_epi32
(
_mm256_add_epi32
(
_mm256_slli_epi32
(
sixe
s_plus_
eight
s
,
3
),
_mm256_slli_epi32
(
sixteens_plus_
four
teens
,
4
)),
_mm256_slli_epi32
(
sixes_plus_
four
teens
,
1
));
return
_mm256_
add
_epi32
(
_mm256_add_epi32
(
_mm256_slli_epi32
(
four
s_plus_
sixe
s
,
2
),
_mm256_slli_epi32
(
sixteens_plus_
eigh
teens
,
4
)),
_mm256_slli_epi32
(
sixes_plus_
eigh
teens
,
1
));
}
// The final filter for selfguided restoration. Computes a weighted average
...
...
av1/common/x86/selfguided_sse4.c
View file @
127b562a
...
...
@@ -462,21 +462,21 @@ static __m128i cross_sum_fast_odd_not_last(const int32_t *buf, int stride) {
// xl x xr
//
// Pixels are weighted like this:
//
6
8
6
//
4
6
4
// 0 0 0
// 1
4
1
6
1
4
// 1
6
1
8
1
6
//
// buf points to x
//
//
sixe
s = xtl + xtr
//
eight
s = xt
//
four
teens = xl + xr
//
six
teens = x
// cross_sum =
6
*
sixe
s +
8
*
eight
s + 1
4
*
four
teens + 1
6
*
six
teens
// =
8
* (
sixes + eight
s) + 16 * (sixteens +
four
teens)
//
-
2 * (sixes +
four
teens)
// = (
sixes + eight
s) <<
3
+ (sixteens +
four
teens) << 4
//
-
(sixes +
four
teens) << 1
//
four
s = xtl + xtr
//
sixe
s = xt
//
six
teens = xl + xr
//
eigh
teens = x
// cross_sum =
4
*
four
s +
6
*
sixe
s + 1
6
*
six
teens + 1
8
*
eigh
teens
// =
4
* (
fours + sixe
s) + 16 * (sixteens +
eigh
teens)
//
+
2 * (sixes +
eigh
teens)
// = (
fours + sixe
s) <<
2
+ (sixteens +
eigh
teens) << 4
//
+
(sixes +
eigh
teens) << 1
static
__m128i
cross_sum_fast_odd_last
(
const
int32_t
*
buf
,
int
stride
)
{
const
int
two_stride
=
2
*
stride
;
const
__m128i
xtl
=
xx_loadu_128
(
buf
-
1
-
two_stride
);
...
...
@@ -486,19 +486,19 @@ static __m128i cross_sum_fast_odd_last(const int32_t *buf, int stride) {
const
__m128i
x
=
xx_loadu_128
(
buf
);
const
__m128i
xr
=
xx_loadu_128
(
buf
+
1
);
const
__m128i
sixe
s
=
_mm_add_epi32
(
xtl
,
xtr
);
const
__m128i
eight
s
=
xt
;
const
__m128i
four
teens
=
_mm_add_epi32
(
xl
,
xr
);
const
__m128i
six
teens
=
x
;
const
__m128i
four
s
=
_mm_add_epi32
(
xtl
,
xtr
);
const
__m128i
sixe
s
=
xt
;
const
__m128i
six
teens
=
_mm_add_epi32
(
xl
,
xr
);
const
__m128i
eigh
teens
=
x
;
const
__m128i
sixe
s_plus_
eight
s
=
_mm_add_epi32
(
sixes
,
eight
s
);
const
__m128i
sixteens_plus_
four
teens
=
_mm_add_epi32
(
sixteens
,
four
teens
);
const
__m128i
sixes_plus_
four
teens
=
_mm_add_epi32
(
sixes
,
four
teens
);
const
__m128i
four
s_plus_
sixe
s
=
_mm_add_epi32
(
fours
,
sixe
s
);
const
__m128i
sixteens_plus_
eigh
teens
=
_mm_add_epi32
(
sixteens
,
eigh
teens
);
const
__m128i
sixes_plus_
eigh
teens
=
_mm_add_epi32
(
sixes
,
eigh
teens
);
return
_mm_
sub
_epi32
(
_mm_add_epi32
(
_mm_slli_epi32
(
sixe
s_plus_
eight
s
,
3
),
_mm_slli_epi32
(
sixteens_plus_
four
teens
,
4
)),
_mm_slli_epi32
(
sixes_plus_
four
teens
,
1
));
return
_mm_
add
_epi32
(
_mm_add_epi32
(
_mm_slli_epi32
(
four
s_plus_
sixe
s
,
2
),
_mm_slli_epi32
(
sixteens_plus_
eigh
teens
,
4
)),
_mm_slli_epi32
(
sixes_plus_
eigh
teens
,
1
));
}
// The final filter for selfguided restoration. Computes a weighted average
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment