Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Yushin Cho
aom-rav1e
Commits
78629508
Commit
78629508
authored
Aug 14, 2015
by
James Zern
Committed by
Gerrit Code Review
Aug 14, 2015
Browse files
Merge "VPX: removed step checks from neon convolve code"
parents
94ba3939
fa472129
Changes
5
Hide whitespace changes
Inline
Side-by-side
vpx_dsp/arm/vpx_convolve8_avg_neon.c
View file @
78629508
...
...
@@ -9,23 +9,13 @@
*/
#include
<arm_neon.h>
#include
<assert.h>
#include
"./vpx_config.h"
#include
"./vpx_dsp_rtcd.h"
#include
"vpx/vpx_integer.h"
#include
"vpx_ports/mem.h"
void
vpx_convolve8_avg_horiz_c
(
const
uint8_t
*
src
,
ptrdiff_t
src_stride
,
uint8_t
*
dst
,
ptrdiff_t
dst_stride
,
const
int16_t
*
filter_x
,
int
x_step_q4
,
const
int16_t
*
filter_y
,
int
y_step_q4
,
int
w
,
int
h
);
void
vpx_convolve8_avg_vert_c
(
const
uint8_t
*
src
,
ptrdiff_t
src_stride
,
uint8_t
*
dst
,
ptrdiff_t
dst_stride
,
const
int16_t
*
filter_x
,
int
x_step_q4
,
const
int16_t
*
filter_y
,
int
y_step_q4
,
int
w
,
int
h
);
static
INLINE
int32x4_t
MULTIPLY_BY_Q0
(
int16x4_t
dsrc0
,
int16x4_t
dsrc1
,
...
...
@@ -82,12 +72,7 @@ void vpx_convolve8_avg_horiz_neon(
uint16x4x2_t
d0x2u16
,
d1x2u16
;
uint32x4x2_t
q0x2u32
;
if
(
x_step_q4
!=
16
)
{
vpx_convolve8_avg_horiz_c
(
src
,
src_stride
,
dst
,
dst_stride
,
filter_x
,
x_step_q4
,
filter_y
,
y_step_q4
,
w
,
h
);
return
;
}
assert
(
x_step_q4
==
16
);
q0s16
=
vld1q_s16
(
filter_x
);
...
...
@@ -271,12 +256,7 @@ void vpx_convolve8_avg_vert_neon(
uint16x8_t
q1u16
,
q2u16
,
q8u16
,
q9u16
,
q10u16
,
q11u16
,
q12u16
,
q13u16
;
int32x4_t
q1s32
,
q2s32
,
q14s32
,
q15s32
;
if
(
y_step_q4
!=
16
)
{
vpx_convolve8_avg_vert_c
(
src
,
src_stride
,
dst
,
dst_stride
,
filter_x
,
x_step_q4
,
filter_y
,
y_step_q4
,
w
,
h
);
return
;
}
assert
(
y_step_q4
==
16
);
src
-=
src_stride
*
3
;
q0s16
=
vld1q_s16
(
filter_y
);
...
...
vpx_dsp/arm/vpx_convolve8_avg_neon_asm.asm
View file @
78629508
...
...
@@ -19,8 +19,6 @@
EXPORT
|
vpx_convolve8_avg_horiz_neon
|
EXPORT
|
vpx_convolve8_avg_vert_neon
|
IMPORT
|
vpx_convolve8_avg_horiz_c
|
IMPORT
|
vpx_convolve8_avg_vert_c
|
ARM
REQUIRE8
PRESERVE8
...
...
@@ -52,10 +50,6 @@
; sp[]int h
|
vpx_convolve8_avg_horiz_neon
|
PROC
ldr
r12
,
[
sp
,
#
4
]
; x_step_q4
cmp
r12
,
#
16
bne
vpx_convolve8_avg_horiz_c
push
{
r4
-
r10
,
lr
}
sub
r0
,
r0
,
#
3
; adjust for taps
...
...
@@ -184,10 +178,6 @@ vpx_convolve8_avg_loop_horiz
ENDP
|
vpx_convolve8_avg_vert_neon
|
PROC
ldr
r12
,
[
sp
,
#
12
]
cmp
r12
,
#
16
bne
vpx_convolve8_avg_vert_c
push
{
r4
-
r8
,
lr
}
; adjust for taps
...
...
vpx_dsp/arm/vpx_convolve8_neon.c
View file @
78629508
...
...
@@ -9,23 +9,13 @@
*/
#include
<arm_neon.h>
#include
<assert.h>
#include
"./vpx_config.h"
#include
"./vpx_dsp_rtcd.h"
#include
"vpx/vpx_integer.h"
#include
"vpx_ports/mem.h"
void
vpx_convolve8_horiz_c
(
const
uint8_t
*
src
,
ptrdiff_t
src_stride
,
uint8_t
*
dst
,
ptrdiff_t
dst_stride
,
const
int16_t
*
filter_x
,
int
x_step_q4
,
const
int16_t
*
filter_y
,
int
y_step_q4
,
int
w
,
int
h
);
void
vpx_convolve8_vert_c
(
const
uint8_t
*
src
,
ptrdiff_t
src_stride
,
uint8_t
*
dst
,
ptrdiff_t
dst_stride
,
const
int16_t
*
filter_x
,
int
x_step_q4
,
const
int16_t
*
filter_y
,
int
y_step_q4
,
int
w
,
int
h
);
static
INLINE
int32x4_t
MULTIPLY_BY_Q0
(
int16x4_t
dsrc0
,
int16x4_t
dsrc1
,
...
...
@@ -82,12 +72,7 @@ void vpx_convolve8_horiz_neon(
uint16x4x2_t
d0x2u16
,
d1x2u16
;
uint32x4x2_t
q0x2u32
;
if
(
x_step_q4
!=
16
)
{
vpx_convolve8_horiz_c
(
src
,
src_stride
,
dst
,
dst_stride
,
filter_x
,
x_step_q4
,
filter_y
,
y_step_q4
,
w
,
h
);
return
;
}
assert
(
x_step_q4
==
16
);
q0s16
=
vld1q_s16
(
filter_x
);
...
...
@@ -255,12 +240,7 @@ void vpx_convolve8_vert_neon(
uint16x8_t
q1u16
,
q2u16
,
q8u16
,
q9u16
,
q10u16
,
q11u16
,
q12u16
,
q13u16
;
int32x4_t
q1s32
,
q2s32
,
q14s32
,
q15s32
;
if
(
y_step_q4
!=
16
)
{
vpx_convolve8_vert_c
(
src
,
src_stride
,
dst
,
dst_stride
,
filter_x
,
x_step_q4
,
filter_y
,
y_step_q4
,
w
,
h
);
return
;
}
assert
(
y_step_q4
==
16
);
src
-=
src_stride
*
3
;
q0s16
=
vld1q_s16
(
filter_y
);
...
...
vpx_dsp/arm/vpx_convolve8_neon_asm.asm
View file @
78629508
...
...
@@ -19,8 +19,6 @@
EXPORT
|
vpx_convolve8_horiz_neon
|
EXPORT
|
vpx_convolve8_vert_neon
|
IMPORT
|
vpx_convolve8_horiz_c
|
IMPORT
|
vpx_convolve8_vert_c
|
ARM
REQUIRE8
PRESERVE8
...
...
@@ -52,10 +50,6 @@
; sp[]int h
|
vpx_convolve8_horiz_neon
|
PROC
ldr
r12
,
[
sp
,
#
4
]
; x_step_q4
cmp
r12
,
#
16
bne
vpx_convolve8_horiz_c
push
{
r4
-
r10
,
lr
}
sub
r0
,
r0
,
#
3
; adjust for taps
...
...
@@ -173,10 +167,6 @@ vpx_convolve8_loop_horiz
ENDP
|
vpx_convolve8_vert_neon
|
PROC
ldr
r12
,
[
sp
,
#
12
]
cmp
r12
,
#
16
bne
vpx_convolve8_vert_c
push
{
r4
-
r8
,
lr
}
; adjust for taps
...
...
vpx_dsp/arm/vpx_convolve_neon.c
View file @
78629508
...
...
@@ -8,6 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include
<assert.h>
#include
"./vpx_dsp_rtcd.h"
#include
"vpx_dsp/vpx_dsp_common.h"
#include
"vpx_ports/mem.h"
...
...
@@ -25,14 +27,8 @@ void vpx_convolve8_neon(const uint8_t *src, ptrdiff_t src_stride,
// Account for the vertical phase needing 3 lines prior and 4 lines post
int
intermediate_height
=
h
+
7
;
if
(
x_step_q4
!=
16
||
y_step_q4
!=
16
)
{
vpx_convolve8_c
(
src
,
src_stride
,
dst
,
dst_stride
,
filter_x
,
x_step_q4
,
filter_y
,
y_step_q4
,
w
,
h
);
return
;
}
assert
(
y_step_q4
==
16
);
assert
(
x_step_q4
==
16
);
/* Filter starting 3 lines back. The neon implementation will ignore the
* given height and filter a multiple of 4 lines. Since this goes in to
...
...
@@ -59,14 +55,8 @@ void vpx_convolve8_avg_neon(const uint8_t *src, ptrdiff_t src_stride,
DECLARE_ALIGNED
(
8
,
uint8_t
,
temp
[
64
*
72
]);
int
intermediate_height
=
h
+
7
;
if
(
x_step_q4
!=
16
||
y_step_q4
!=
16
)
{
vpx_convolve8_avg_c
(
src
,
src_stride
,
dst
,
dst_stride
,
filter_x
,
x_step_q4
,
filter_y
,
y_step_q4
,
w
,
h
);
return
;
}
assert
(
y_step_q4
==
16
);
assert
(
x_step_q4
==
16
);
/* This implementation has the same issues as above. In addition, we only want
* to average the values after both passes.
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment