Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
A
aom-rav1e
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Xiph.Org
aom-rav1e
Commits
8dd39051
Commit
8dd39051
authored
11 years ago
by
Johann Koenig
Committed by
Gerrit Code Review
11 years ago
Browse files
Options
Downloads
Plain Diff
Merge "Added optimized vp9_idct32x32_34_add_dspr2"
parents
c88f1ec8
d5a52edc
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
vp9/common/mips/dspr2/vp9_itrans32_dspr2.c
+66
-3
66 additions, 3 deletions
vp9/common/mips/dspr2/vp9_itrans32_dspr2.c
vp9/common/vp9_rtcd_defs.sh
+1
-1
1 addition, 1 deletion
vp9/common/vp9_rtcd_defs.sh
with
67 additions
and
4 deletions
vp9/common/mips/dspr2/vp9_itrans32_dspr2.c
+
66
−
3
View file @
8dd39051
...
...
@@ -19,7 +19,8 @@
#include
"vp9/common/mips/dspr2/vp9_common_dspr2.h"
#if HAVE_DSPR2
static
void
idct32_1d_rows_dspr2
(
const
int16_t
*
input
,
int16_t
*
output
)
{
static
void
idct32_1d_rows_dspr2
(
const
int16_t
*
input
,
int16_t
*
output
,
uint32_t
no_rows
)
{
int16_t
step1_0
,
step1_1
,
step1_2
,
step1_3
,
step1_4
,
step1_5
,
step1_6
;
int16_t
step1_7
,
step1_8
,
step1_9
,
step1_10
,
step1_11
,
step1_12
,
step1_13
;
int16_t
step1_14
,
step1_15
,
step1_16
,
step1_17
,
step1_18
,
step1_19
,
step1_20
;
...
...
@@ -42,7 +43,7 @@ static void idct32_1d_rows_dspr2(const int16_t *input, int16_t *output) {
const
int
const_2_power_13
=
8192
;
const
int32_t
*
input_int
;
for
(
i
=
32
;
i
--
;
)
{
for
(
i
=
no_rows
;
i
--
;
)
{
input_int
=
(
const
int32_t
*
)
input
;
if
(
!
(
input_int
[
0
]
|
input_int
[
1
]
|
input_int
[
2
]
|
input_int
[
3
]
|
...
...
@@ -881,12 +882,74 @@ void vp9_idct32x32_1024_add_dspr2(const int16_t *input, uint8_t *dest,
);
// Rows
idct32_1d_rows_dspr2
(
input
,
outptr
);
idct32_1d_rows_dspr2
(
input
,
outptr
,
32
);
// Columns
vp9_idct32_1d_cols_add_blk_dspr2
(
out
,
dest
,
dest_stride
);
}
void
vp9_idct32x32_34_add_dspr2
(
const
int16_t
*
input
,
uint8_t
*
dest
,
int
stride
)
{
DECLARE_ALIGNED
(
32
,
int16_t
,
out
[
32
*
32
]);
int16_t
*
outptr
=
out
;
uint32_t
i
;
uint32_t
pos
=
45
;
/* bit positon for extract from acc */
__asm__
__volatile__
(
"wrdsp %[pos], 1
\n\t
"
:
:
[
pos
]
"r"
(
pos
)
);
// Rows
idct32_1d_rows_dspr2
(
input
,
outptr
,
8
);
outptr
+=
8
;
__asm__
__volatile__
(
"sw $zero, 0(%[outptr])
\n\t
"
"sw $zero, 4(%[outptr])
\n\t
"
"sw $zero, 8(%[outptr])
\n\t
"
"sw $zero, 12(%[outptr])
\n\t
"
"sw $zero, 16(%[outptr])
\n\t
"
"sw $zero, 20(%[outptr])
\n\t
"
"sw $zero, 24(%[outptr])
\n\t
"
"sw $zero, 28(%[outptr])
\n\t
"
"sw $zero, 32(%[outptr])
\n\t
"
"sw $zero, 36(%[outptr])
\n\t
"
"sw $zero, 40(%[outptr])
\n\t
"
"sw $zero, 44(%[outptr])
\n\t
"
:
:
[
outptr
]
"r"
(
outptr
)
);
for
(
i
=
0
;
i
<
31
;
++
i
)
{
outptr
+=
32
;
__asm__
__volatile__
(
"sw $zero, 0(%[outptr])
\n\t
"
"sw $zero, 4(%[outptr])
\n\t
"
"sw $zero, 8(%[outptr])
\n\t
"
"sw $zero, 12(%[outptr])
\n\t
"
"sw $zero, 16(%[outptr])
\n\t
"
"sw $zero, 20(%[outptr])
\n\t
"
"sw $zero, 24(%[outptr])
\n\t
"
"sw $zero, 28(%[outptr])
\n\t
"
"sw $zero, 32(%[outptr])
\n\t
"
"sw $zero, 36(%[outptr])
\n\t
"
"sw $zero, 40(%[outptr])
\n\t
"
"sw $zero, 44(%[outptr])
\n\t
"
:
:
[
outptr
]
"r"
(
outptr
)
);
}
// Columns
vp9_idct32_1d_cols_add_blk_dspr2
(
out
,
dest
,
stride
);
}
void
vp9_idct32x32_1_add_dspr2
(
const
int16_t
*
input
,
uint8_t
*
dest
,
int
stride
)
{
int
r
,
out
;
...
...
This diff is collapsed.
Click to expand it.
vp9/common/vp9_rtcd_defs.sh
+
1
−
1
View file @
8dd39051
...
...
@@ -296,7 +296,7 @@ prototype void vp9_idct32x32_1024_add "const int16_t *input, uint8_t *dest, int
specialize vp9_idct32x32_1024_add sse2 neon dspr2
prototype void vp9_idct32x32_34_add
"const int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_idct32x32_34_add sse2
specialize vp9_idct32x32_34_add sse2
dspr2
prototype void vp9_idct32x32_1_add
"const int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_idct32x32_1_add sse2 neon dspr2
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment