Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
Opus
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Alexander Traud
Opus
Commits
ed317c94
Commit
ed317c94
authored
16 years ago
by
Jean-Marc Valin
Browse files
Options
Downloads
Patches
Plain Diff
optimisation: another bunch of simplifications to the "simple case" of the
alg_quant() search.
parent
6ea8baed
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
libcelt/cwrs.c
+1
-0
1 addition, 0 deletions
libcelt/cwrs.c
libcelt/vq.c
+37
-29
37 additions, 29 deletions
libcelt/vq.c
with
38 additions
and
29 deletions
libcelt/cwrs.c
+
1
−
0
View file @
ed317c94
...
...
@@ -42,6 +42,7 @@
#include
"config.h"
#endif
#include
"os_support.h"
#include
<stdlib.h>
#include
<string.h>
#include
"cwrs.h"
...
...
This diff is collapsed.
Click to expand it.
libcelt/vq.c
+
37
−
29
View file @
ed317c94
...
...
@@ -137,61 +137,67 @@ void alg_quant(celt_norm_t *X, celt_mask_t *W, int N, int K, const celt_norm_t *
while
(
pulsesLeft
>
0
)
{
int
pulsesAtOnce
=
1
;
int
sign
;
celt_word32_t
Rxy
,
Ryy
,
Ryp
;
celt_word32_t
g
;
celt_word32_t
best_num
;
celt_word16_t
best_den
;
int
best_id
;
celt_word16_t
magnitude
;
#ifdef FIXED_POINT
int
rshift
;
#endif
/* Decide on how many pulses to find at once */
pulsesAtOnce
=
(
pulsesLeft
*
N_1
)
>>
9
;
/* pulsesLeft/N */
if
(
pulsesAtOnce
<
1
)
pulsesAtOnce
=
1
;
#ifdef FIXED_POINT
rshift
=
yshift
+
1
+
celt_ilog2
(
K
-
pulsesLeft
+
pulsesAtOnce
);
#endif
magnitude
=
SHL16
(
pulsesAtOnce
,
yshift
);
/* This should ensure that anything we can process will have a better score */
best_num
=
-
SHR32
(
VERY_LARGE32
,
4
);
best_den
=
0
;
best_id
=
0
;
/* The squared magnitude term gets added anyway, so we might as well
add it outside the loop */
yy
=
ADD32
(
yy
,
MULT16_16
(
magnitude
,
magnitude
));
/* Choose between fast and accurate strategy depending on where we are in the search */
if
(
pulsesLeft
>
1
)
{
/* OPT: This loop is very CPU-intensive */
/* This should ensure that anything we can process will have a better score */
celt_word32_t
best_num
=
-
VERY_LARGE16
;
celt_word16_t
best_den
=
0
;
j
=
0
;
do
{
celt_word32_t
num
;
celt_word16_t
den
;
celt_word16_t
Rxy
,
Ryy
;
/* Select sign based on X[j] alone */
sign
=
signx
[
j
];
s
=
SHL16
(
sign
*
pulsesAtOnce
,
yshift
);
s
=
signx
[
j
]
*
magnitude
;
/* Temporary sums of the new pulse(s) */
Rxy
=
xy
+
MULT16_16
(
s
,
X
[
j
]);
Ryy
=
yy
+
2
*
MULT16_16
(
s
,
y
[
j
])
+
MULT16_16
(
s
,
s
);
Rxy
=
SHR32
(
xy
+
MULT16_16
(
s
,
X
[
j
]),
rshift
);
/* We're multiplying y[j] by two so we don't have to do it here */
Ryy
=
SHR32
(
yy
+
MULT16_16
(
s
,
y
[
j
]),
rshift
);
/* Approximate score: we maximise Rxy/sqrt(Ryy)
*/
num
=
MULT16_16
(
ROUND16
(
Rxy
,
14
),
ABS16
(
ROUND16
(
Rxy
,
14
)));
den
=
ROUND16
(
R
y
y
,
14
);
/* Approximate score: we maximise Rxy/sqrt(Ryy)
(we're guaranteed that
Rxy is positive because the sign is pre-computed) */
Rxy
=
MULT16_16_Q15
(
R
x
y
,
Rxy
);
/* The idea is to check for num/den >= best_num/best_den, but that way
we can do it without any division */
/* OPT: Make sure to use
a
conditional move here */
if
(
MULT16_
32_Q15
(
best_den
,
num
)
>
MULT16_
32_Q15
(
den
,
best_num
))
/* OPT: Make sure to use conditional move
s
here */
if
(
MULT16_
16
(
best_den
,
Rxy
)
>
MULT16_
16
(
Ryy
,
best_num
))
{
best_den
=
den
;
best_num
=
num
;
best_den
=
Ryy
;
best_num
=
Rxy
;
best_id
=
j
;
}
}
while
(
++
j
<
N
);
/* Promises we loop at least once */
}
else
{
celt_word32_t
g
;
celt_word32_t
best_num
=
-
VERY_LARGE32
;
for
(
j
=
0
;
j
<
N
;
j
++
)
{
celt_word32_t
Rxy
,
Ryy
,
Ryp
;
celt_word32_t
num
;
/* Select sign based on X[j] alone */
sign
=
signx
[
j
];
s
=
SHL16
(
sign
*
pulsesAtOnce
,
yshift
);
s
=
signx
[
j
]
*
magnitude
;
/* Temporary sums of the new pulse(s) */
Rxy
=
xy
+
MULT16_16
(
s
,
X
[
j
]);
Ryy
=
yy
+
2
*
MULT16_16
(
s
,
y
[
j
])
+
MULT16_16
(
s
,
s
);
Ryp
=
yp
+
MULT16_16
(
s
,
P
[
j
]);
/* We're multiplying y[j] by two so we don't have to do it here */
Ryy
=
yy
+
MULT16_16
(
s
,
y
[
j
]);
Ryp
=
yp
+
MULT16_16
(
s
,
P
[
j
]);
/* Compute the gain such that ||p + g*y|| = 1 */
g
=
MULT16_32_Q15
(
...
...
@@ -218,11 +224,13 @@ void alg_quant(celt_norm_t *X, celt_mask_t *W, int N, int K, const celt_norm_t *
/* Updating the sums of the new pulse(s) */
xy
=
xy
+
MULT16_16
(
s
,
X
[
j
]);
yy
=
yy
+
2
*
MULT16_16
(
s
,
y
[
j
])
+
MULT16_16
(
s
,
s
);
/* We're multiplying y[j] by two so we don't have to do it here */
yy
=
yy
+
MULT16_16
(
s
,
y
[
j
]);
yp
=
yp
+
MULT16_16
(
s
,
P
[
j
]);
/* Only now that we've made the final choice, update y/iy */
y
[
j
]
+=
s
;
/* Multiplying y[j] by 2 so we don't have to do it everywhere else */
y
[
j
]
+=
2
*
s
;
iy
[
j
]
+=
is
;
pulsesLeft
-=
pulsesAtOnce
;
}
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment