Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Xiph.Org
aom-rav1e
Commits
e77f859d
Commit
e77f859d
authored
Jun 18, 2015
by
James Zern
Committed by
Gerrit Code Review
Jun 18, 2015
Browse files
Merge "vp9_reconintra_neon: add DC 32x32 predictors"
parents
d1398e9f
79fb3a01
Changes
3
Hide whitespace changes
Inline
Side-by-side
test/test_intra_pred_speed.cc
View file @
e77f859d
...
...
@@ -362,9 +362,12 @@ INTRA_PRED_TEST(SSSE3, TestIntraPred32, NULL, NULL, NULL, NULL, NULL,
#endif // HAVE_SSSE3
#if HAVE_NEON
INTRA_PRED_TEST
(
NEON
,
TestIntraPred32
,
NULL
,
NULL
,
NULL
,
NULL
,
vp9_v_predictor_32x32_neon
,
vp9_h_predictor_32x32_neon
,
NULL
,
NULL
,
NULL
,
NULL
,
NULL
,
NULL
,
vp9_tm_predictor_32x32_neon
)
INTRA_PRED_TEST
(
NEON
,
TestIntraPred32
,
vp9_dc_predictor_32x32_neon
,
vp9_dc_left_predictor_32x32_neon
,
vp9_dc_top_predictor_32x32_neon
,
vp9_dc_128_predictor_32x32_neon
,
vp9_v_predictor_32x32_neon
,
vp9_h_predictor_32x32_neon
,
NULL
,
NULL
,
NULL
,
NULL
,
NULL
,
NULL
,
vp9_tm_predictor_32x32_neon
)
#endif // HAVE_NEON
#if HAVE_MSA
...
...
vp9/common/arm/neon/vp9_reconintra_neon.c
View file @
e77f859d
...
...
@@ -161,6 +161,89 @@ void vp9_dc_128_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride,
dc_16x16
(
dst
,
stride
,
NULL
,
NULL
,
0
,
0
);
}
//------------------------------------------------------------------------------
// DC 32x32
// 'do_above' and 'do_left' facilitate branch removal when inlined.
static
INLINE
void
dc_32x32
(
uint8_t
*
dst
,
ptrdiff_t
stride
,
const
uint8_t
*
above
,
const
uint8_t
*
left
,
int
do_above
,
int
do_left
)
{
uint16x8_t
sum_top
;
uint16x8_t
sum_left
;
uint8x8_t
dc0
;
if
(
do_above
)
{
const
uint8x16_t
A0
=
vld1q_u8
(
above
);
// top row
const
uint8x16_t
A1
=
vld1q_u8
(
above
+
16
);
const
uint16x8_t
p0
=
vpaddlq_u8
(
A0
);
// cascading summation of the top
const
uint16x8_t
p1
=
vpaddlq_u8
(
A1
);
const
uint16x8_t
p2
=
vaddq_u16
(
p0
,
p1
);
const
uint16x4_t
p3
=
vadd_u16
(
vget_low_u16
(
p2
),
vget_high_u16
(
p2
));
const
uint16x4_t
p4
=
vpadd_u16
(
p3
,
p3
);
const
uint16x4_t
p5
=
vpadd_u16
(
p4
,
p4
);
sum_top
=
vcombine_u16
(
p5
,
p5
);
}
if
(
do_left
)
{
const
uint8x16_t
L0
=
vld1q_u8
(
left
);
// left row
const
uint8x16_t
L1
=
vld1q_u8
(
left
+
16
);
const
uint16x8_t
p0
=
vpaddlq_u8
(
L0
);
// cascading summation of the left
const
uint16x8_t
p1
=
vpaddlq_u8
(
L1
);
const
uint16x8_t
p2
=
vaddq_u16
(
p0
,
p1
);
const
uint16x4_t
p3
=
vadd_u16
(
vget_low_u16
(
p2
),
vget_high_u16
(
p2
));
const
uint16x4_t
p4
=
vpadd_u16
(
p3
,
p3
);
const
uint16x4_t
p5
=
vpadd_u16
(
p4
,
p4
);
sum_left
=
vcombine_u16
(
p5
,
p5
);
}
if
(
do_above
&&
do_left
)
{
const
uint16x8_t
sum
=
vaddq_u16
(
sum_left
,
sum_top
);
dc0
=
vrshrn_n_u16
(
sum
,
6
);
}
else
if
(
do_above
)
{
dc0
=
vrshrn_n_u16
(
sum_top
,
5
);
}
else
if
(
do_left
)
{
dc0
=
vrshrn_n_u16
(
sum_left
,
5
);
}
else
{
dc0
=
vdup_n_u8
(
0x80
);
}
{
const
uint8x16_t
dc
=
vdupq_lane_u8
(
dc0
,
0
);
int
i
;
for
(
i
=
0
;
i
<
32
;
++
i
)
{
vst1q_u8
(
dst
+
i
*
stride
,
dc
);
vst1q_u8
(
dst
+
i
*
stride
+
16
,
dc
);
}
}
}
void
vp9_dc_predictor_32x32_neon
(
uint8_t
*
dst
,
ptrdiff_t
stride
,
const
uint8_t
*
above
,
const
uint8_t
*
left
)
{
dc_32x32
(
dst
,
stride
,
above
,
left
,
1
,
1
);
}
void
vp9_dc_left_predictor_32x32_neon
(
uint8_t
*
dst
,
ptrdiff_t
stride
,
const
uint8_t
*
above
,
const
uint8_t
*
left
)
{
(
void
)
above
;
dc_32x32
(
dst
,
stride
,
NULL
,
left
,
0
,
1
);
}
void
vp9_dc_top_predictor_32x32_neon
(
uint8_t
*
dst
,
ptrdiff_t
stride
,
const
uint8_t
*
above
,
const
uint8_t
*
left
)
{
(
void
)
left
;
dc_32x32
(
dst
,
stride
,
above
,
NULL
,
1
,
0
);
}
void
vp9_dc_128_predictor_32x32_neon
(
uint8_t
*
dst
,
ptrdiff_t
stride
,
const
uint8_t
*
above
,
const
uint8_t
*
left
)
{
(
void
)
above
;
(
void
)
left
;
dc_32x32
(
dst
,
stride
,
NULL
,
NULL
,
0
,
0
);
}
#if !HAVE_NEON_ASM
void
vp9_v_predictor_4x4_neon
(
uint8_t
*
dst
,
ptrdiff_t
stride
,
...
...
vp9/common/vp9_rtcd_defs.pl
View file @
e77f859d
...
...
@@ -201,16 +201,16 @@ add_proto qw/void vp9_tm_predictor_32x32/, "uint8_t *dst, ptrdiff_t y_stride, co
specialize
qw/vp9_tm_predictor_32x32 neon msa/
,
"
$sse2_x86_64
";
add_proto
qw/void vp9_dc_predictor_32x32/
,
"
uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left
";
specialize
qw/vp9_dc_predictor_32x32 msa/
,
"
$sse2_x86inc
";
specialize
qw/vp9_dc_predictor_32x32 msa
neon
/
,
"
$sse2_x86inc
";
add_proto
qw/void vp9_dc_top_predictor_32x32/
,
"
uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left
";
specialize
qw/vp9_dc_top_predictor_32x32 msa/
,
"
$sse2_x86inc
";
specialize
qw/vp9_dc_top_predictor_32x32 msa
neon
/
,
"
$sse2_x86inc
";
add_proto
qw/void vp9_dc_left_predictor_32x32/
,
"
uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left
";
specialize
qw/vp9_dc_left_predictor_32x32 msa/
,
"
$sse2_x86inc
";
specialize
qw/vp9_dc_left_predictor_32x32 msa
neon
/
,
"
$sse2_x86inc
";
add_proto
qw/void vp9_dc_128_predictor_32x32/
,
"
uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left
";
specialize
qw/vp9_dc_128_predictor_32x32 msa/
,
"
$sse2_x86inc
";
specialize
qw/vp9_dc_128_predictor_32x32 msa
neon
/
,
"
$sse2_x86inc
";
#
# Loopfilter
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment