Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Xiph.Org
aom-rav1e
Commits
bba68342
Commit
bba68342
authored
Aug 16, 2013
by
Johann
Committed by
Gerrit Code Review
Aug 16, 2013
Browse files
Merge "vp9: neon: use aligned stores in convolve functions"
parents
79f4c1b9
4fa93bce
Changes
3
Hide whitespace changes
Inline
Side-by-side
vp9/common/arm/neon/vp9_convolve8_avg_neon.asm
View file @
bba68342
...
...
@@ -159,10 +159,10 @@ loop_horiz
; average the new value and the dst value
vrhadd.u8
q1
,
q1
,
q3
vst1.u32
{
d2
[
0
]
}
,
[
r2
],
r3
vst1.u32
{
d3
[
0
]
}
,
[
r2
],
r3
vst1.u32
{
d2
[
1
]
}
,
[
r2
],
r3
vst1.u32
{
d3
[
1
]
}
,
[
r2
],
r4
vst1.u32
{
d2
[
0
]
}
,
[
r2
@32
],
r3
vst1.u32
{
d3
[
0
]
}
,
[
r2
@32
],
r3
vst1.u32
{
d2
[
1
]
}
,
[
r2
@32
],
r3
vst1.u32
{
d3
[
1
]
}
,
[
r2
@32
],
r4
vmov
q8
,
q9
vmov
d20
,
d23
...
...
@@ -234,10 +234,10 @@ loop_vert
vmovl.u8
q12
,
d24
vmovl.u8
q13
,
d26
vld1.u32
{
d6
[
0
]
}
,
[
r5
],
r3
vld1.u32
{
d6
[
1
]
}
,
[
r8
],
r3
vld1.u32
{
d7
[
0
]
}
,
[
r5
],
r3
vld1.u32
{
d7
[
1
]
}
,
[
r8
],
r3
vld1.u32
{
d6
[
0
]
}
,
[
r5
@32
],
r3
vld1.u32
{
d6
[
1
]
}
,
[
r8
@32
],
r3
vld1.u32
{
d7
[
0
]
}
,
[
r5
@32
],
r3
vld1.u32
{
d7
[
1
]
}
,
[
r8
@32
],
r3
pld
[
r7
]
pld
[
r4
]
...
...
@@ -276,10 +276,10 @@ loop_vert
sub
r5
,
r5
,
r3
,
lsl
#
1
; reset for store
sub
r8
,
r8
,
r3
,
lsl
#
1
vst1.u32
{
d2
[
0
]
}
,
[
r5
],
r3
vst1.u32
{
d2
[
1
]
}
,
[
r8
],
r3
vst1.u32
{
d3
[
0
]
}
,
[
r5
],
r3
vst1.u32
{
d3
[
1
]
}
,
[
r8
],
r3
vst1.u32
{
d2
[
0
]
}
,
[
r5
@32
],
r3
vst1.u32
{
d2
[
1
]
}
,
[
r8
@32
],
r3
vst1.u32
{
d3
[
0
]
}
,
[
r5
@32
],
r3
vst1.u32
{
d3
[
1
]
}
,
[
r8
@32
],
r3
vmov
q8
,
q10
vmov
d18
,
d22
...
...
vp9/common/arm/neon/vp9_convolve8_neon.asm
View file @
bba68342
...
...
@@ -148,10 +148,10 @@ loop_horiz
vtrn.32
d2
,
d3
vtrn.8
d2
,
d3
vst1.u32
{
d2
[
0
]
}
,
[
r2
],
r3
vst1.u32
{
d3
[
0
]
}
,
[
r2
],
r3
vst1.u32
{
d2
[
1
]
}
,
[
r2
],
r3
vst1.u32
{
d3
[
1
]
}
,
[
r2
],
r4
vst1.u32
{
d2
[
0
]
}
,
[
r2
@32
],
r3
vst1.u32
{
d3
[
0
]
}
,
[
r2
@32
],
r3
vst1.u32
{
d2
[
1
]
}
,
[
r2
@32
],
r3
vst1.u32
{
d3
[
1
]
}
,
[
r2
@32
],
r4
vmov
q8
,
q9
vmov
d20
,
d23
...
...
@@ -254,10 +254,10 @@ loop_vert
vqmovn.u16
d2
,
q1
vqmovn.u16
d3
,
q2
vst1.u32
{
d2
[
0
]
}
,
[
r5
],
r3
vst1.u32
{
d2
[
1
]
}
,
[
r8
],
r3
vst1.u32
{
d3
[
0
]
}
,
[
r5
],
r3
vst1.u32
{
d3
[
1
]
}
,
[
r8
],
r3
vst1.u32
{
d2
[
0
]
}
,
[
r5
@32
],
r3
vst1.u32
{
d2
[
1
]
}
,
[
r8
@32
],
r3
vst1.u32
{
d3
[
0
]
}
,
[
r5
@32
],
r3
vst1.u32
{
d3
[
1
]
}
,
[
r8
@32
],
r3
vmov
q8
,
q10
vmov
d18
,
d22
...
...
vp9/common/arm/neon/vp9_convolve_neon.c
View file @
bba68342
...
...
@@ -10,6 +10,7 @@
#include "./vp9_rtcd.h"
#include "vp9/common/vp9_common.h"
#include "vpx_ports/mem.h"
void
vp9_convolve8_neon
(
const
uint8_t
*
src
,
ptrdiff_t
src_stride
,
uint8_t
*
dst
,
ptrdiff_t
dst_stride
,
...
...
@@ -19,7 +20,7 @@ void vp9_convolve8_neon(const uint8_t *src, ptrdiff_t src_stride,
/* Given our constraints: w <= 64, h <= 64, taps == 8 we can reduce the
* maximum buffer size to 64 * 64 + 7 (+ 1 to make it divisible by 4).
*/
uint8_t
temp
[
64
*
72
]
;
DECLARE_ALIGNED_ARRAY
(
8
,
uint8_t
,
temp
,
64
*
72
)
;
// Account for the vertical phase needing 3 lines prior and 4 lines post
int
intermediate_height
=
h
+
7
;
...
...
@@ -53,7 +54,7 @@ void vp9_convolve8_avg_neon(const uint8_t *src, ptrdiff_t src_stride,
const
int16_t
*
filter_x
,
int
x_step_q4
,
const
int16_t
*
filter_y
,
int
y_step_q4
,
int
w
,
int
h
)
{
uint8_t
temp
[
64
*
72
]
;
DECLARE_ALIGNED_ARRAY
(
8
,
uint8_t
,
temp
,
64
*
72
)
;
int
intermediate_height
=
h
+
7
;
if
(
x_step_q4
!=
16
||
y_step_q4
!=
16
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment