Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
10
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
Mark Harris
Opus
Commits
57cd849c
Commit
57cd849c
authored
Dec 09, 2013
by
Jean-Marc Valin
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Defining celt_inner_prod() and using it instead of explicit loops.
Also adds an SSE-optimized celt_inner_prod().
parent
ff072009
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
59 additions
and
36 deletions
+59
-36
celt/celt_encoder.c
celt/celt_encoder.c
+5
-9
celt/pitch.c
celt/pitch.c
+10
-12
celt/pitch.h
celt/pitch.h
+12
-0
celt/vq.c
celt/vq.c
+6
-15
celt/x86/pitch_sse.h
celt/x86/pitch_sse.h
+26
-0
No files found.
celt/celt_encoder.c
View file @
57cd849c
...
...
@@ -576,7 +576,7 @@ static int tf_analysis(const CELTMode *m, int len, int isTransient,
*
tf_sum
=
0
;
for
(
i
=
0
;
i
<
len
;
i
++
)
{
int
j
,
k
,
N
;
int
k
,
N
;
int
narrow
;
opus_val32
L1
,
best_L1
;
int
best_level
=
0
;
...
...
@@ -768,10 +768,8 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,
/* Compute inter-channel correlation for low frequencies */
for
(
i
=
0
;
i
<
8
;
i
++
)
{
int
j
;
opus_val32
partial
=
0
;
for
(
j
=
m
->
eBands
[
i
]
<<
LM
;
j
<
m
->
eBands
[
i
+
1
]
<<
LM
;
j
++
)
partial
=
MAC16_16
(
partial
,
X
[
j
],
X
[
N0
+
j
]);
opus_val32
partial
;
partial
=
celt_inner_prod
(
&
X
[
m
->
eBands
[
i
]
<<
LM
],
&
X
[
N0
+
(
m
->
eBands
[
i
]
<<
LM
)],
(
m
->
eBands
[
i
+
1
]
-
m
->
eBands
[
i
])
<<
LM
);
sum
=
ADD16
(
sum
,
EXTRACT16
(
SHR32
(
partial
,
18
)));
}
sum
=
MULT16_16_Q15
(
QCONST16
(
1
.
f
/
8
,
15
),
sum
);
...
...
@@ -779,10 +777,8 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,
minXC
=
sum
;
for
(
i
=
8
;
i
<
intensity
;
i
++
)
{
int
j
;
opus_val32
partial
=
0
;
for
(
j
=
m
->
eBands
[
i
]
<<
LM
;
j
<
m
->
eBands
[
i
+
1
]
<<
LM
;
j
++
)
partial
=
MAC16_16
(
partial
,
X
[
j
],
X
[
N0
+
j
]);
opus_val32
partial
;
partial
=
celt_inner_prod
(
&
X
[
m
->
eBands
[
i
]
<<
LM
],
&
X
[
N0
+
(
m
->
eBands
[
i
]
<<
LM
)],
(
m
->
eBands
[
i
+
1
]
-
m
->
eBands
[
i
])
<<
LM
);
minXC
=
MIN16
(
minXC
,
ABS16
(
EXTRACT16
(
SHR32
(
partial
,
18
))));
}
minXC
=
MIN16
(
QCONST16
(
1
.
f
,
10
),
ABS16
(
minXC
));
...
...
celt/pitch.c
View file @
57cd849c
...
...
@@ -252,7 +252,7 @@ void
#endif
celt_pitch_xcorr_c
(
const
opus_val16
*
_x
,
const
opus_val16
*
_y
,
opus_val32
*
xcorr
,
int
len
,
int
max_pitch
)
{
int
i
,
j
;
int
i
;
/*The EDSP version requires that max_pitch is at least 1, and that _x is
32-bit aligned.
Since it's hard to put asserts in assembly, put them here.*/
...
...
@@ -279,9 +279,8 @@ celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr
/* In case max_pitch isn't a multiple of 4, do non-unrolled version. */
for
(;
i
<
max_pitch
;
i
++
)
{
opus_val32
sum
=
0
;
for
(
j
=
0
;
j
<
len
;
j
++
)
sum
=
MAC16_16
(
sum
,
_x
[
j
],
_y
[
i
+
j
]);
opus_val32
sum
;
sum
=
celt_inner_prod
(
_x
,
_y
+
i
,
len
);
xcorr
[
i
]
=
sum
;
#ifdef FIXED_POINT
maxcorr
=
MAX32
(
maxcorr
,
sum
);
...
...
@@ -361,12 +360,17 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR
#endif
for
(
i
=
0
;
i
<
max_pitch
>>
1
;
i
++
)
{
opus_val32
sum
=
0
;
opus_val32
sum
;
xcorr
[
i
]
=
0
;
if
(
abs
(
i
-
2
*
best_pitch
[
0
])
>
2
&&
abs
(
i
-
2
*
best_pitch
[
1
])
>
2
)
continue
;
#ifdef FIXED_POINT
sum
=
0
;
for
(
j
=
0
;
j
<
len
>>
1
;
j
++
)
sum
+=
SHR32
(
MULT16_16
(
x_lp
[
j
],
y
[
i
+
j
]),
shift
);
#else
sum
=
celt_inner_prod
(
x_lp
,
y
+
i
,
len
>>
1
);
#endif
xcorr
[
i
]
=
MAX32
(
-
1
,
sum
);
#ifdef FIXED_POINT
maxcorr
=
MAX32
(
maxcorr
,
sum
);
...
...
@@ -513,13 +517,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
pg
=
SHR32
(
frac_div32
(
best_xy
,
best_yy
+
1
),
16
);
for
(
k
=
0
;
k
<
3
;
k
++
)
{
int
T1
=
T
+
k
-
1
;
xy
=
0
;
for
(
i
=
0
;
i
<
N
;
i
++
)
xy
=
MAC16_16
(
xy
,
x
[
i
],
x
[
i
-
T1
]);
xcorr
[
k
]
=
xy
;
}
xcorr
[
k
]
=
celt_inner_prod
(
x
,
x
-
(
T
+
k
-
1
),
N
);
if
((
xcorr
[
2
]
-
xcorr
[
0
])
>
MULT16_32_Q15
(
QCONST16
(.
7
f
,
15
),
xcorr
[
1
]
-
xcorr
[
0
]))
offset
=
1
;
else
if
((
xcorr
[
0
]
-
xcorr
[
2
])
>
MULT16_32_Q15
(
QCONST16
(.
7
f
,
15
),
xcorr
[
1
]
-
xcorr
[
2
]))
...
...
celt/pitch.h
View file @
57cd849c
...
...
@@ -141,6 +141,18 @@ static OPUS_INLINE void dual_inner_prod(const opus_val16 *x, const opus_val16 *y
}
#endif
#ifndef OVERRIDE_CELT_INNER_PROD
static
OPUS_INLINE
opus_val32
celt_inner_prod
(
const
opus_val16
*
x
,
const
opus_val16
*
y
,
int
N
)
{
int
i
;
opus_val32
xy
=
0
;
for
(
i
=
0
;
i
<
N
;
i
++
)
xy
=
MAC16_16
(
xy
,
x
[
i
],
y
[
i
]);
return
xy
;
}
#endif
#ifdef FIXED_POINT
opus_val32
#else
...
...
celt/vq.c
View file @
57cd849c
...
...
@@ -37,6 +37,7 @@
#include "os_support.h"
#include "bands.h"
#include "rate.h"
#include "pitch.h"
static
void
exp_rotation1
(
celt_norm
*
X
,
int
len
,
int
stride
,
opus_val16
c
,
opus_val16
s
)
{
...
...
@@ -350,15 +351,11 @@ void renormalise_vector(celt_norm *X, int N, opus_val16 gain)
#ifdef FIXED_POINT
int
k
;
#endif
opus_val32
E
=
EPSILON
;
opus_val32
E
;
opus_val16
g
;
opus_val32
t
;
celt_norm
*
xptr
=
X
;
for
(
i
=
0
;
i
<
N
;
i
++
)
{
E
=
MAC16_16
(
E
,
*
xptr
,
*
xptr
);
xptr
++
;
}
celt_norm
*
xptr
;
E
=
EPSILON
+
celt_inner_prod
(
X
,
X
,
N
);
#ifdef FIXED_POINT
k
=
celt_ilog2
(
E
)
>>
1
;
#endif
...
...
@@ -393,14 +390,8 @@ int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N)
Eside
=
MAC16_16
(
Eside
,
s
,
s
);
}
}
else
{
for
(
i
=
0
;
i
<
N
;
i
++
)
{
celt_norm
m
,
s
;
m
=
X
[
i
];
s
=
Y
[
i
];
Emid
=
MAC16_16
(
Emid
,
m
,
m
);
Eside
=
MAC16_16
(
Eside
,
s
,
s
);
}
Emid
+=
celt_inner_prod
(
X
,
X
,
N
);
Eside
+=
celt_inner_prod
(
Y
,
Y
,
N
);
}
mid
=
celt_sqrt
(
Emid
);
side
=
celt_sqrt
(
Eside
);
...
...
celt/x86/pitch_sse.h
View file @
57cd849c
...
...
@@ -101,6 +101,32 @@ static OPUS_INLINE void dual_inner_prod(const opus_val16 *x, const opus_val16 *y
}
}
#define OVERRIDE_CELT_INNER_PROD
static
OPUS_INLINE
opus_val32
celt_inner_prod
(
const
opus_val16
*
x
,
const
opus_val16
*
y
,
int
N
)
{
int
i
;
float
xy
;
__m128
sum
;
sum
=
_mm_setzero_ps
();
/* FIXME: We should probably go 8-way and use 2 sums. */
for
(
i
=
0
;
i
<
N
-
3
;
i
+=
4
)
{
__m128
xi
=
_mm_loadu_ps
(
x
+
i
);
__m128
yi
=
_mm_loadu_ps
(
y
+
i
);
sum
=
_mm_add_ps
(
sum
,
_mm_mul_ps
(
xi
,
yi
));
}
/* Horizontal sum */
sum
=
_mm_add_ps
(
sum
,
_mm_movehl_ps
(
sum
,
sum
));
sum
=
_mm_add_ss
(
sum
,
_mm_shuffle_ps
(
sum
,
sum
,
0x55
));
_mm_store_ss
(
&
xy
,
sum
);
for
(;
i
<
N
;
i
++
)
{
xy
=
MAC16_16
(
xy
,
x
[
i
],
y
[
i
]);
}
return
xy
;
}
#define OVERRIDE_COMB_FILTER_CONST
static
OPUS_INLINE
void
comb_filter_const
(
opus_val32
*
y
,
opus_val32
*
x
,
int
T
,
int
N
,
opus_val16
g10
,
opus_val16
g11
,
opus_val16
g12
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment