Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
7
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
Xiph.Org
aom-rav1e
Commits
be26e03d
Unverified
Commit
be26e03d
authored
Apr 03, 2017
by
Jean-Marc Valin
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Saving 3 instructions per line from od_filter_dering_direction_8x8()
Change-Id: I7ca73e03ed171b57a05dd1fd5957906e11b66728
parent
c07b23de
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
48 additions
and
48 deletions
+48
-48
av1/common/od_dering_simd.h
av1/common/od_dering_simd.h
+48
-48
No files found.
av1/common/od_dering_simd.h
View file @
be26e03d
...
...
@@ -288,7 +288,7 @@ int SIMD_FUNC(od_filter_dering_direction_8x8)(uint16_t *y, int ystride,
int
dir
)
{
int
i
;
v128
sum
;
v128
p
;
v128
p
0
,
p1
;
v128
cmp
;
v128
row
;
v128
res
;
...
...
@@ -302,53 +302,53 @@ int SIMD_FUNC(od_filter_dering_direction_8x8)(uint16_t *y, int ystride,
thresh
=
v128_dup_16
(
threshold
);
for
(
i
=
0
;
i
<
8
;
i
++
)
{
sum
=
v128_zero
();
row
=
v128_load_
un
aligned
(
&
in
[
i
*
OD_FILT_BSTRIDE
]);
/*p = in[i*OD_FILT_BSTRIDE + offset] - row*/
p
=
v128_sub_16
(
v128_load_unaligned
(
&
in
[
i
*
OD_FILT_BSTRIDE
+
off1
]),
row
);
/*
if (
abs(p) < thresh
) sum += taps[k]*p
*/
cmp
=
od_cmplt_abs_epi16
(
p
,
thresh
);
p
=
v128_ad
d_16
(
p
,
v128_shl_n_16
(
p
,
1
)
);
p
=
v128_and
(
p
,
cmp
);
sum
=
v128_add_16
(
sum
,
p
);
/*p =
in[i*OD_FILT_BSTRIDE - offset] - row
*/
p
=
v128_sub_16
(
v128_load_unaligned
(
&
in
[
i
*
OD_FILT_BSTRIDE
-
off1
]),
row
);
/*if (abs(p) < thresh) sum += taps[k]*p1*/
cmp
=
od_cmplt_abs_epi16
(
p
,
thresh
);
p
=
v128_add_16
(
p
,
v128_shl_n_16
(
p
,
1
)
);
p
=
v128_a
n
d
(
p
,
cmp
);
sum
=
v128_add_16
(
sum
,
p
);
/*p = in[i*OD_FILT_BSTRIDE + offset] - row*/
p
=
v128_sub_16
(
v128_load_unaligned
(
&
in
[
i
*
OD_FILT_BSTRIDE
+
off2
]),
row
);
/*
if (
abs(p) < thresh
) sum += taps[k]*p
*/
cmp
=
od_cmplt_abs_epi16
(
p
,
thresh
);
p
=
v128_
shl_n_16
(
p
,
1
);
p
=
v128_and
(
p
,
cmp
);
sum
=
v128_add_16
(
sum
,
p
);
/*p =
in[i*OD_FILT_BSTRIDE - offset] - row
*/
p
=
v128_sub_16
(
v128_load_unaligned
(
&
in
[
i
*
OD_FILT_BSTRIDE
-
off2
]),
row
);
/*if (abs(p) < thresh) sum += taps[k]*p1*/
cmp
=
od_cmplt_abs_epi16
(
p
,
thresh
);
p
=
v128_shl_n_16
(
p
,
1
);
p
=
v128_a
nd
(
p
,
cmp
);
sum
=
v128_add_16
(
sum
,
p
);
/*p =
in[i
*
OD_FILT_BSTRIDE + off
set] -
row
*/
p
=
v128_sub_16
(
v128_load_unaligned
(
&
in
[
i
*
OD_FILT_BSTRIDE
+
off3
]),
row
);
/*if (abs(p) < thresh) sum += taps[k]*p*/
cmp
=
od_cmplt_abs_epi16
(
p
,
thresh
);
p
=
v128_and
(
p
,
cmp
);
sum
=
v128_add_16
(
sum
,
p
);
/*p =
in[i*OD_FILT_BSTRIDE - offset] - row
*/
p
=
v128_sub_16
(
v128_load_unaligned
(
&
in
[
i
*
OD_FILT_BSTRIDE
-
off3
]),
row
);
/*if (abs(p) < thresh) sum += taps[k]*p1*/
cmp
=
od_cmplt_abs_epi16
(
p
,
thresh
);
p
=
v128_a
n
d
(
p
,
cm
p
);
sum
=
v128_add_16
(
sum
,
p
);
row
=
v128_load_aligned
(
&
in
[
i
*
OD_FILT_BSTRIDE
]);
/*p
0
= in[i*OD_FILT_BSTRIDE + offset] - row*/
p
0
=
v128_sub_16
(
v128_load_unaligned
(
&
in
[
i
*
OD_FILT_BSTRIDE
+
off1
]),
row
);
/*
p0 =
abs(p
0
) < thresh
? p0 : 0
*/
cmp
=
od_cmplt_abs_epi16
(
p
0
,
thresh
);
p
0
=
v128_a
n
d
(
p
0
,
cmp
);
/*p1 = in[i*OD_FILT_BSTRIDE - offset] - row*/
p1
=
v128_sub_16
(
v128_load_unaligned
(
&
in
[
i
*
OD_FILT_BSTRIDE
-
off1
]),
row
);
/*p
1
=
abs(p1) < thresh ? p1 : 0
*/
cm
p
=
od_cmplt_abs_epi16
(
p1
,
thresh
);
p1
=
v128_and
(
p1
,
cmp
);
/*sum += 3*(p0 + p1)*/
p
0
=
v128_add_16
(
p
0
,
p1
);
p
0
=
v128_ad
d_16
(
p
0
,
v128_shl_n_16
(
p0
,
1
)
);
sum
=
v128_add_16
(
sum
,
p
0
);
/*p
0
= in[i*OD_FILT_BSTRIDE + offset] - row*/
p
0
=
v128_sub_16
(
v128_load_unaligned
(
&
in
[
i
*
OD_FILT_BSTRIDE
+
off2
]),
row
);
/*
p0 =
abs(p
0
) < thresh
? p0 : 0
*/
cmp
=
od_cmplt_abs_epi16
(
p
0
,
thresh
);
p
0
=
v128_
and
(
p
0
,
cmp
);
/*p1 = in[i*OD_FILT_BSTRIDE - offset] - row*/
p1
=
v128_sub_16
(
v128_load_unaligned
(
&
in
[
i
*
OD_FILT_BSTRIDE
-
off2
]),
row
);
/*p
1
=
abs(p1) < thresh ? p1 : 0
*/
cm
p
=
od_cmplt_abs_epi16
(
p1
,
thresh
);
p1
=
v128_and
(
p1
,
cmp
);
/* sum += 2*(p0 + p1)*/
p
0
=
v128_shl_n_16
(
v128_add_16
(
p0
,
p1
)
,
1
);
sum
=
v128_a
dd_16
(
sum
,
p0
);
/*p0 = in[i*OD_FILT_BSTRIDE + offset] - row*/
p0
=
v128_sub_16
(
v128_load_unaligned
(
&
in
[
i
*
OD_FILT_BSTRIDE
+
off
3
]),
row
);
/*p0 = abs(p0) < thresh ? p0 : 0*/
cmp
=
od_cmplt_abs_epi16
(
p0
,
thresh
);
p0
=
v128_and
(
p
0
,
cmp
);
/*p1 = in[i*OD_FILT_BSTRIDE - offset] - row*/
p1
=
v128_sub_16
(
v128_load_unaligned
(
&
in
[
i
*
OD_FILT_BSTRIDE
-
off3
]),
row
);
/*p
1
=
abs(p1) < thresh ? p1 : 0
*/
cm
p
=
od_cmplt_abs_epi16
(
p1
,
thresh
);
p1
=
v128_and
(
p1
,
cmp
);
/*sum += (p0 + p1)*/
p
0
=
v128_ad
d_16
(
p
0
,
p
1
);
sum
=
v128_add_16
(
sum
,
p
0
);
/*res = row + ((sum + 8) >> 4)*/
res
=
v128_add_16
(
sum
,
v128_dup_16
(
8
));
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment