Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
A
aom-rav1e
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Incidents
Environments
Packages & Registries
Packages & Registries
Container Registry
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Xiph.Org
aom-rav1e
Commits
be26e03d
Unverified
Commit
be26e03d
authored
Apr 03, 2017
by
Jean-Marc Valin
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Saving 3 instructions per line from od_filter_dering_direction_8x8()
Change-Id: I7ca73e03ed171b57a05dd1fd5957906e11b66728
parent
c07b23de
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
48 additions
and
48 deletions
+48
-48
av1/common/od_dering_simd.h
av1/common/od_dering_simd.h
+48
-48
No files found.
av1/common/od_dering_simd.h
View file @
be26e03d
...
...
@@ -288,7 +288,7 @@ int SIMD_FUNC(od_filter_dering_direction_8x8)(uint16_t *y, int ystride,
int
dir
)
{
int
i
;
v128
sum
;
v128
p
;
v128
p
0
,
p1
;
v128
cmp
;
v128
row
;
v128
res
;
...
...
@@ -302,53 +302,53 @@ int SIMD_FUNC(od_filter_dering_direction_8x8)(uint16_t *y, int ystride,
thresh
=
v128_dup_16
(
threshold
);
for
(
i
=
0
;
i
<
8
;
i
++
)
{
sum
=
v128_zero
();
row
=
v128_load_
un
aligned
(
&
in
[
i
*
OD_FILT_BSTRIDE
]);
/*p = in[i*OD_FILT_BSTRIDE + offset] - row*/
p
=
v128_sub_16
(
v128_load_unaligned
(
&
in
[
i
*
OD_FILT_BSTRIDE
+
off1
]),
row
);
/*
if (abs(p) < thresh) sum += taps[k]*p
*/
cmp
=
od_cmplt_abs_epi16
(
p
,
thresh
);
p
=
v128_add_16
(
p
,
v128_shl_n_16
(
p
,
1
)
);
p
=
v128_and
(
p
,
cmp
);
sum
=
v128_add_16
(
sum
,
p
);
/*p
= in[i*OD_FILT_BSTRIDE - offset] - row
*/
p
=
v128_sub_16
(
v128_load_unaligned
(
&
in
[
i
*
OD_FILT_BSTRIDE
-
off1
]),
row
);
/*if (abs(p) < thresh) sum += taps[k]*p1*/
cmp
=
od_cmplt_abs_epi16
(
p
,
thresh
);
p
=
v128_add_16
(
p
,
v128_shl_n_16
(
p
,
1
)
);
p
=
v128_and
(
p
,
cmp
);
sum
=
v128_add_16
(
sum
,
p
);
/*p = in[i*OD_FILT_BSTRIDE + offset] - row*/
p
=
v128_sub_16
(
v128_load_unaligned
(
&
in
[
i
*
OD_FILT_BSTRIDE
+
off2
]),
row
);
/*
if (abs(p) < thresh) sum += taps[k]*p
*/
cmp
=
od_cmplt_abs_epi16
(
p
,
thresh
);
p
=
v128_shl_n_16
(
p
,
1
);
p
=
v128_and
(
p
,
cmp
);
sum
=
v128_add_16
(
sum
,
p
);
/*p
= in[i*OD_FILT_BSTRIDE - offset] - row
*/
p
=
v128_sub_16
(
v128_load_unaligned
(
&
in
[
i
*
OD_FILT_BSTRIDE
-
off2
]),
row
);
/*if (abs(p) < thresh) sum += taps[k]*p1*/
cmp
=
od_cmplt_abs_epi16
(
p
,
thresh
);
p
=
v128_shl_n_16
(
p
,
1
);
p
=
v128_and
(
p
,
cmp
);
sum
=
v128_add_16
(
sum
,
p
);
/*p = in[i*OD_FILT_BSTRIDE + offset] - row*/
p
=
v128_sub_16
(
v128_load_unaligned
(
&
in
[
i
*
OD_FILT_BSTRIDE
+
off3
]),
row
);
/*if (abs(p) < thresh) sum += taps[k]*p*/
cmp
=
od_cmplt_abs_epi16
(
p
,
thresh
);
p
=
v128_and
(
p
,
cmp
);
sum
=
v128_add_16
(
sum
,
p
);
/*p
= in[i*OD_FILT_BSTRIDE - offset] - row
*/
p
=
v128_sub_16
(
v128_load_unaligned
(
&
in
[
i
*
OD_FILT_BSTRIDE
-
off3
]),
row
);
/*if (abs(p) < thresh) sum += taps[k]*p1*/
cmp
=
od_cmplt_abs_epi16
(
p
,
thresh
);
p
=
v128_and
(
p
,
cmp
);
sum
=
v128_add_16
(
sum
,
p
);
row
=
v128_load_aligned
(
&
in
[
i
*
OD_FILT_BSTRIDE
]);
/*p
0
= in[i*OD_FILT_BSTRIDE + offset] - row*/
p
0
=
v128_sub_16
(
v128_load_unaligned
(
&
in
[
i
*
OD_FILT_BSTRIDE
+
off1
]),
row
);
/*
p0 = abs(p0) < thresh ? p0 : 0
*/
cmp
=
od_cmplt_abs_epi16
(
p
0
,
thresh
);
p
0
=
v128_and
(
p0
,
cmp
);
/*p1 = in[i*OD_FILT_BSTRIDE - offset] - row*/
p1
=
v128_sub_16
(
v128_load_unaligned
(
&
in
[
i
*
OD_FILT_BSTRIDE
-
off1
]),
row
);
/*p
1 = abs(p1) < thresh ? p1 : 0
*/
cmp
=
od_cmplt_abs_epi16
(
p1
,
thresh
);
p1
=
v128_and
(
p1
,
cmp
);
/*sum += 3*(p0 + p1)*/
p
0
=
v128_add_16
(
p0
,
p1
);
p
0
=
v128_add_16
(
p0
,
v128_shl_n_16
(
p0
,
1
)
);
sum
=
v128_add_16
(
sum
,
p
0
);
/*p
0
= in[i*OD_FILT_BSTRIDE + offset] - row*/
p
0
=
v128_sub_16
(
v128_load_unaligned
(
&
in
[
i
*
OD_FILT_BSTRIDE
+
off2
]),
row
);
/*
p0 = abs(p0) < thresh ? p0 : 0
*/
cmp
=
od_cmplt_abs_epi16
(
p
0
,
thresh
);
p
0
=
v128_and
(
p0
,
cmp
);
/*p1 = in[i*OD_FILT_BSTRIDE - offset] - row*/
p1
=
v128_sub_16
(
v128_load_unaligned
(
&
in
[
i
*
OD_FILT_BSTRIDE
-
off2
]),
row
);
/*p
1 = abs(p1) < thresh ? p1 : 0
*/
cmp
=
od_cmplt_abs_epi16
(
p1
,
thresh
);
p1
=
v128_and
(
p1
,
cmp
);
/* sum += 2*(p0 + p1)*/
p
0
=
v128_shl_n_16
(
v128_add_16
(
p0
,
p1
)
,
1
);
sum
=
v128_add_16
(
sum
,
p0
);
/*p0 = in[i*OD_FILT_BSTRIDE + offset] - row*/
p0
=
v128_sub_16
(
v128_load_unaligned
(
&
in
[
i
*
OD_FILT_BSTRIDE
+
off3
]),
row
);
/*p0 = abs(p0) < thresh ? p0 : 0*/
cmp
=
od_cmplt_abs_epi16
(
p0
,
thresh
);
p0
=
v128_and
(
p0
,
cmp
);
/*p1 = in[i*OD_FILT_BSTRIDE - offset] - row*/
p1
=
v128_sub_16
(
v128_load_unaligned
(
&
in
[
i
*
OD_FILT_BSTRIDE
-
off3
]),
row
);
/*p
1 = abs(p1) < thresh ? p1 : 0
*/
cmp
=
od_cmplt_abs_epi16
(
p1
,
thresh
);
p1
=
v128_and
(
p1
,
cmp
);
/*sum += (p0 + p1)*/
p
0
=
v128_add_16
(
p0
,
p1
);
sum
=
v128_add_16
(
sum
,
p
0
);
/*res = row + ((sum + 8) >> 4)*/
res
=
v128_add_16
(
sum
,
v128_dup_16
(
8
));
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment