Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Xiph.Org
aom-rav1e
Commits
e97b8492
Commit
e97b8492
authored
May 27, 2015
by
James Zern
Browse files
vp9_reconintra_neon: add DC 8x8 predictors
~90% faster over 20M pixels Change-Id: Iab791510cc57c8332c2f9a5da0ed50702e5f5763
parent
bbea7c95
Changes
4
Hide whitespace changes
Inline
Side-by-side
test/test_intra_pred_speed.cc
View file @
e97b8492
...
...
@@ -248,9 +248,11 @@ INTRA_PRED_TEST(DSPR2, TestIntraPred8, vp9_dc_predictor_8x8_dspr2, NULL, NULL,
#endif // HAVE_DSPR2
#if HAVE_NEON
INTRA_PRED_TEST
(
NEON
,
TestIntraPred8
,
NULL
,
NULL
,
NULL
,
NULL
,
vp9_v_predictor_8x8_neon
,
vp9_h_predictor_8x8_neon
,
NULL
,
NULL
,
NULL
,
NULL
,
NULL
,
NULL
,
vp9_tm_predictor_8x8_neon
)
INTRA_PRED_TEST
(
NEON
,
TestIntraPred8
,
vp9_dc_predictor_8x8_neon
,
vp9_dc_left_predictor_8x8_neon
,
vp9_dc_top_predictor_8x8_neon
,
vp9_dc_128_predictor_8x8_neon
,
vp9_v_predictor_8x8_neon
,
vp9_h_predictor_8x8_neon
,
NULL
,
NULL
,
NULL
,
NULL
,
NULL
,
NULL
,
vp9_tm_predictor_8x8_neon
)
#endif // HAVE_NEON
...
...
vp9/common/arm/neon/vp9_reconintra_neon.c
View file @
e97b8492
...
...
@@ -8,9 +8,85 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include
<stddef.h>
#include
<arm_neon.h>
#include
"./vp9_rtcd.h"
#include
"./vpx_config.h"
#include
"vpx/vpx_integer.h"
//------------------------------------------------------------------------------
// DC 8x8
// 'do_above' and 'do_left' facilitate branch removal when inlined.
static
INLINE
void
dc_8x8
(
uint8_t
*
dst
,
ptrdiff_t
stride
,
const
uint8_t
*
above
,
const
uint8_t
*
left
,
int
do_above
,
int
do_left
)
{
uint16x8_t
sum_top
;
uint16x8_t
sum_left
;
uint8x8_t
dc0
;
if
(
do_above
)
{
const
uint8x8_t
A
=
vld1_u8
(
above
);
// top row
const
uint16x4_t
p0
=
vpaddl_u8
(
A
);
// cascading summation of the top
const
uint16x4_t
p1
=
vpadd_u16
(
p0
,
p0
);
const
uint16x4_t
p2
=
vpadd_u16
(
p1
,
p1
);
sum_top
=
vcombine_u16
(
p2
,
p2
);
}
if
(
do_left
)
{
const
uint8x8_t
L
=
vld1_u8
(
left
);
// left border
const
uint16x4_t
p0
=
vpaddl_u8
(
L
);
// cascading summation of the left
const
uint16x4_t
p1
=
vpadd_u16
(
p0
,
p0
);
const
uint16x4_t
p2
=
vpadd_u16
(
p1
,
p1
);
sum_left
=
vcombine_u16
(
p2
,
p2
);
}
if
(
do_above
&&
do_left
)
{
const
uint16x8_t
sum
=
vaddq_u16
(
sum_left
,
sum_top
);
dc0
=
vrshrn_n_u16
(
sum
,
4
);
}
else
if
(
do_above
)
{
dc0
=
vrshrn_n_u16
(
sum_top
,
3
);
}
else
if
(
do_left
)
{
dc0
=
vrshrn_n_u16
(
sum_left
,
3
);
}
else
{
dc0
=
vdup_n_u8
(
0x80
);
}
{
const
uint8x8_t
dc
=
vdup_lane_u8
(
dc0
,
0
);
int
i
;
for
(
i
=
0
;
i
<
8
;
++
i
)
{
vst1_u32
((
uint32_t
*
)(
dst
+
i
*
stride
),
vreinterpret_u32_u8
(
dc
));
}
}
}
void
vp9_dc_predictor_8x8_neon
(
uint8_t
*
dst
,
ptrdiff_t
stride
,
const
uint8_t
*
above
,
const
uint8_t
*
left
)
{
dc_8x8
(
dst
,
stride
,
above
,
left
,
1
,
1
);
}
void
vp9_dc_left_predictor_8x8_neon
(
uint8_t
*
dst
,
ptrdiff_t
stride
,
const
uint8_t
*
above
,
const
uint8_t
*
left
)
{
(
void
)
above
;
dc_8x8
(
dst
,
stride
,
NULL
,
left
,
0
,
1
);
}
void
vp9_dc_top_predictor_8x8_neon
(
uint8_t
*
dst
,
ptrdiff_t
stride
,
const
uint8_t
*
above
,
const
uint8_t
*
left
)
{
(
void
)
left
;
dc_8x8
(
dst
,
stride
,
above
,
NULL
,
1
,
0
);
}
void
vp9_dc_128_predictor_8x8_neon
(
uint8_t
*
dst
,
ptrdiff_t
stride
,
const
uint8_t
*
above
,
const
uint8_t
*
left
)
{
(
void
)
above
;
(
void
)
left
;
dc_8x8
(
dst
,
stride
,
NULL
,
NULL
,
0
,
0
);
}
#if !HAVE_NEON_ASM
void
vp9_v_predictor_4x4_neon
(
uint8_t
*
dst
,
ptrdiff_t
y_stride
,
const
uint8_t
*
above
,
const
uint8_t
*
left
)
{
int
i
;
...
...
@@ -423,3 +499,4 @@ void vp9_tm_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride,
}
}
}
#endif // !HAVE_NEON_ASM
vp9/common/vp9_rtcd_defs.pl
View file @
e97b8492
...
...
@@ -123,16 +123,16 @@ add_proto qw/void vp9_tm_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, cons
specialize
qw/vp9_tm_predictor_8x8 neon dspr2/
,
"
$sse2_x86inc
";
add_proto
qw/void vp9_dc_predictor_8x8/
,
"
uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left
";
specialize
qw/vp9_dc_predictor_8x8 dspr2/
,
"
$sse_x86inc
";
specialize
qw/vp9_dc_predictor_8x8 dspr2
neon
/
,
"
$sse_x86inc
";
add_proto
qw/void vp9_dc_top_predictor_8x8/
,
"
uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left
";
specialize
qw/vp9_dc_top_predictor_8x8/
,
"
$sse_x86inc
";
specialize
qw/vp9_dc_top_predictor_8x8
neon
/
,
"
$sse_x86inc
";
add_proto
qw/void vp9_dc_left_predictor_8x8/
,
"
uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left
";
specialize
qw/vp9_dc_left_predictor_8x8/
,
"
$sse_x86inc
";
specialize
qw/vp9_dc_left_predictor_8x8
neon
/
,
"
$sse_x86inc
";
add_proto
qw/void vp9_dc_128_predictor_8x8/
,
"
uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left
";
specialize
qw/vp9_dc_128_predictor_8x8/
,
"
$sse_x86inc
";
specialize
qw/vp9_dc_128_predictor_8x8
neon
/
,
"
$sse_x86inc
";
add_proto
qw/void vp9_d207_predictor_16x16/
,
"
uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left
";
specialize
qw/vp9_d207_predictor_16x16/
,
"
$ssse3_x86inc
";
...
...
vp9/vp9_common.mk
View file @
e97b8492
...
...
@@ -199,8 +199,9 @@ VP9_COMMON_SRCS-yes += common/arm/neon/vp9_loopfilter_4_neon.c
# TODO(johannkoenig): re-enable when chromium build is fixed
# # https://code.google.com/p/chromium/issues/detail?id=443839
#VP9_COMMON_SRCS-yes += common/arm/neon/vp9_loopfilter_8_neon.c
VP9_COMMON_SRCS-yes
+=
common/arm/neon/vp9_reconintra_neon.c
endif
# HAVE_NEON
endif
# HAVE_NEON_ASM
VP9_COMMON_SRCS-$(HAVE_NEON)
+=
common/arm/neon/vp9_reconintra_neon.c
$(eval
$(call
rtcd_h_template,vp9_rtcd,vp9/common/vp9_rtcd_defs.pl))
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment