Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
A
aom-rav1e
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Incidents
Environments
Packages & Registries
Packages & Registries
Container Registry
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Xiph.Org
aom-rav1e
Commits
bba68342
Commit
bba68342
authored
Aug 16, 2013
by
Johann
Committed by
Gerrit Code Review
Aug 16, 2013
Browse files
Options
Browse Files
Download
Plain Diff
Merge "vp9: neon: use aligned stores in convolve functions"
parents
79f4c1b9
4fa93bce
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
23 additions
and
22 deletions
+23
-22
vp9/common/arm/neon/vp9_convolve8_avg_neon.asm
vp9/common/arm/neon/vp9_convolve8_avg_neon.asm
+12
-12
vp9/common/arm/neon/vp9_convolve8_neon.asm
vp9/common/arm/neon/vp9_convolve8_neon.asm
+8
-8
vp9/common/arm/neon/vp9_convolve_neon.c
vp9/common/arm/neon/vp9_convolve_neon.c
+3
-2
No files found.
vp9/common/arm/neon/vp9_convolve8_avg_neon.asm
View file @
bba68342
...
...
@@ -159,10 +159,10 @@ loop_horiz
; average the new value and the dst value
vrhadd.u8
q1
,
q1
,
q3
vst1.u32
{
d2
[
0
]
}
,
[
r2
],
r3
vst1.u32
{
d3
[
0
]
}
,
[
r2
],
r3
vst1.u32
{
d2
[
1
]
}
,
[
r2
],
r3
vst1.u32
{
d3
[
1
]
}
,
[
r2
],
r4
vst1.u32
{
d2
[
0
]
}
,
[
r2
@32
],
r3
vst1.u32
{
d3
[
0
]
}
,
[
r2
@32
],
r3
vst1.u32
{
d2
[
1
]
}
,
[
r2
@32
],
r3
vst1.u32
{
d3
[
1
]
}
,
[
r2
@32
],
r4
vmov
q8
,
q9
vmov
d20
,
d23
...
...
@@ -234,10 +234,10 @@ loop_vert
vmovl.u8
q12
,
d24
vmovl.u8
q13
,
d26
vld1.u32
{
d6
[
0
]
}
,
[
r5
],
r3
vld1.u32
{
d6
[
1
]
}
,
[
r8
],
r3
vld1.u32
{
d7
[
0
]
}
,
[
r5
],
r3
vld1.u32
{
d7
[
1
]
}
,
[
r8
],
r3
vld1.u32
{
d6
[
0
]
}
,
[
r5
@32
],
r3
vld1.u32
{
d6
[
1
]
}
,
[
r8
@32
],
r3
vld1.u32
{
d7
[
0
]
}
,
[
r5
@32
],
r3
vld1.u32
{
d7
[
1
]
}
,
[
r8
@32
],
r3
pld
[
r7
]
pld
[
r4
]
...
...
@@ -276,10 +276,10 @@ loop_vert
sub
r5
,
r5
,
r3
,
lsl
#
1
; reset for store
sub
r8
,
r8
,
r3
,
lsl
#
1
vst1.u32
{
d2
[
0
]
}
,
[
r5
],
r3
vst1.u32
{
d2
[
1
]
}
,
[
r8
],
r3
vst1.u32
{
d3
[
0
]
}
,
[
r5
],
r3
vst1.u32
{
d3
[
1
]
}
,
[
r8
],
r3
vst1.u32
{
d2
[
0
]
}
,
[
r5
@32
],
r3
vst1.u32
{
d2
[
1
]
}
,
[
r8
@32
],
r3
vst1.u32
{
d3
[
0
]
}
,
[
r5
@32
],
r3
vst1.u32
{
d3
[
1
]
}
,
[
r8
@32
],
r3
vmov
q8
,
q10
vmov
d18
,
d22
...
...
vp9/common/arm/neon/vp9_convolve8_neon.asm
View file @
bba68342
...
...
@@ -148,10 +148,10 @@ loop_horiz
vtrn.32
d2
,
d3
vtrn.8
d2
,
d3
vst1.u32
{
d2
[
0
]
}
,
[
r2
],
r3
vst1.u32
{
d3
[
0
]
}
,
[
r2
],
r3
vst1.u32
{
d2
[
1
]
}
,
[
r2
],
r3
vst1.u32
{
d3
[
1
]
}
,
[
r2
],
r4
vst1.u32
{
d2
[
0
]
}
,
[
r2
@32
],
r3
vst1.u32
{
d3
[
0
]
}
,
[
r2
@32
],
r3
vst1.u32
{
d2
[
1
]
}
,
[
r2
@32
],
r3
vst1.u32
{
d3
[
1
]
}
,
[
r2
@32
],
r4
vmov
q8
,
q9
vmov
d20
,
d23
...
...
@@ -254,10 +254,10 @@ loop_vert
vqmovn.u16
d2
,
q1
vqmovn.u16
d3
,
q2
vst1.u32
{
d2
[
0
]
}
,
[
r5
],
r3
vst1.u32
{
d2
[
1
]
}
,
[
r8
],
r3
vst1.u32
{
d3
[
0
]
}
,
[
r5
],
r3
vst1.u32
{
d3
[
1
]
}
,
[
r8
],
r3
vst1.u32
{
d2
[
0
]
}
,
[
r5
@32
],
r3
vst1.u32
{
d2
[
1
]
}
,
[
r8
@32
],
r3
vst1.u32
{
d3
[
0
]
}
,
[
r5
@32
],
r3
vst1.u32
{
d3
[
1
]
}
,
[
r8
@32
],
r3
vmov
q8
,
q10
vmov
d18
,
d22
...
...
vp9/common/arm/neon/vp9_convolve_neon.c
View file @
bba68342
...
...
@@ -10,6 +10,7 @@
#include "./vp9_rtcd.h"
#include "vp9/common/vp9_common.h"
#include "vpx_ports/mem.h"
void
vp9_convolve8_neon
(
const
uint8_t
*
src
,
ptrdiff_t
src_stride
,
uint8_t
*
dst
,
ptrdiff_t
dst_stride
,
...
...
@@ -19,7 +20,7 @@ void vp9_convolve8_neon(const uint8_t *src, ptrdiff_t src_stride,
/* Given our constraints: w <= 64, h <= 64, taps == 8 we can reduce the
* maximum buffer size to 64 * 64 + 7 (+ 1 to make it divisible by 4).
*/
uint8_t
temp
[
64
*
72
]
;
DECLARE_ALIGNED_ARRAY
(
8
,
uint8_t
,
temp
,
64
*
72
)
;
// Account for the vertical phase needing 3 lines prior and 4 lines post
int
intermediate_height
=
h
+
7
;
...
...
@@ -53,7 +54,7 @@ void vp9_convolve8_avg_neon(const uint8_t *src, ptrdiff_t src_stride,
const
int16_t
*
filter_x
,
int
x_step_q4
,
const
int16_t
*
filter_y
,
int
y_step_q4
,
int
w
,
int
h
)
{
uint8_t
temp
[
64
*
72
]
;
DECLARE_ALIGNED_ARRAY
(
8
,
uint8_t
,
temp
,
64
*
72
)
;
int
intermediate_height
=
h
+
7
;
if
(
x_step_q4
!=
16
||
y_step_q4
!=
16
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment