Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
Opus
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Xiph.Org
Opus
Commits
4e018b22
Commit
4e018b22
authored
11 years ago
by
Jean-Marc Valin
Browse files
Options
Downloads
Patches
Plain Diff
SSE optimization of remove_doubling()
Should be trivial to adapt for Neon.
parent
39cbc458
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
celt/pitch.c
+15
-6
15 additions, 6 deletions
celt/pitch.c
celt/x86/pitch_sse.h
+30
-0
30 additions, 0 deletions
celt/x86/pitch_sse.h
with
45 additions
and
6 deletions
celt/pitch.c
+
15
−
6
View file @
4e018b22
...
...
@@ -394,6 +394,20 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR
RESTORE_STACK
;
}
#ifndef OVERRIDE_DUAL_INNER_PROD
static
opus_val32
dual_inner_prod
(
opus_val16
*
x
,
opus_val16
*
y1
,
opus_val16
*
y2
,
int
N
)
{
int
i
;
opus_val32
xy
=
0
;
for
(
i
=
0
;
i
<
N
;
i
++
)
{
xy
=
MAC16_16
(
xy
,
x
[
i
],
y1
[
i
]);
xy
=
MAC16_16
(
xy
,
x
[
i
],
y2
[
i
]);
}
return
xy
;
}
#endif
static
const
int
second_check
[
16
]
=
{
0
,
0
,
3
,
2
,
3
,
2
,
5
,
2
,
3
,
2
,
3
,
2
,
5
,
2
,
3
,
2
};
opus_val16
remove_doubling
(
opus_val16
*
x
,
int
maxperiod
,
int
minperiod
,
int
N
,
int
*
T0_
,
int
prev_period
,
opus_val16
prev_gain
)
...
...
@@ -470,12 +484,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
{
T1b
=
(
2
*
second_check
[
k
]
*
T0
+
k
)
/
(
2
*
k
);
}
xy
=
0
;
for
(
i
=
0
;
i
<
N
;
i
++
)
{
xy
=
MAC16_16
(
xy
,
x
[
i
],
x
[
i
-
T1
]);
xy
=
MAC16_16
(
xy
,
x
[
i
],
x
[
i
-
T1b
]);
}
xy
=
dual_inner_prod
(
x
,
&
x
[
-
T1
],
&
x
[
-
T1b
],
N
);
yy
=
yy_lookup
[
T1
]
+
yy_lookup
[
T1b
];
#ifdef FIXED_POINT
{
...
...
This diff is collapsed.
Click to expand it.
celt/x86/pitch_sse.h
+
30
−
0
View file @
4e018b22
...
...
@@ -71,4 +71,34 @@ static inline void xcorr_kernel(const opus_val16 *x, const opus_val16 *y, opus_v
_mm_storeu_ps
(
sum
,
_mm_add_ps
(
xsum1
,
xsum2
));
}
#define OVERRIDE_DUAL_INNER_PROD
static
inline
opus_val32
dual_inner_prod
(
const
opus_val16
*
x
,
const
opus_val16
*
y1
,
const
opus_val16
*
y2
,
int
N
)
{
int
i
;
__m128
xsum1
,
xsum2
;
opus_val32
xy
=
0
;
xsum1
=
_mm_setzero_ps
();
xsum2
=
_mm_setzero_ps
();
for
(
i
=
0
;
i
<
N
-
3
;
i
+=
4
)
{
__m128
xi
=
_mm_loadu_ps
(
x
+
i
);
__m128
y1i
=
_mm_loadu_ps
(
y1
+
i
);
__m128
y2i
=
_mm_loadu_ps
(
y2
+
i
);
xsum1
=
_mm_add_ps
(
xsum1
,
_mm_mul_ps
(
xi
,
y1i
));
xsum2
=
_mm_add_ps
(
xsum2
,
_mm_mul_ps
(
xi
,
y2i
));
}
xsum1
=
_mm_add_ps
(
xsum1
,
xsum2
);
/* Horizontal sum */
xsum1
=
_mm_add_ps
(
xsum1
,
_mm_movehl_ps
(
xsum1
,
xsum1
));
xsum1
=
_mm_add_ss
(
xsum1
,
_mm_shuffle_ps
(
xsum1
,
xsum1
,
0x55
));
_mm_store_ss
(
&
xy
,
xsum1
);
for
(;
i
<
N
;
i
++
)
{
xy
=
MAC16_16
(
xy
,
x
[
i
],
y1
[
i
]);
xy
=
MAC16_16
(
xy
,
x
[
i
],
y2
[
i
]);
}
return
xy
;
}
#endif
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment