Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
7
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
Xiph.Org
aom-rav1e
Commits
5327fcf8
Commit
5327fcf8
authored
Jul 08, 2015
by
Frank Galligan
Committed by
Gerrit Code Review
Jul 08, 2015
Browse files
Options
Browse Files
Download
Plain Diff
Merge "Add vp9_int_pro_row_neon."
parents
ac7f403c
944ad6ca
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
59 additions
and
1 deletion
+59
-1
test/vp9_avg_test.cc
test/vp9_avg_test.cc
+5
-0
vp9/common/vp9_rtcd_defs.pl
vp9/common/vp9_rtcd_defs.pl
+1
-1
vp9/encoder/arm/neon/vp9_avg_neon.c
vp9/encoder/arm/neon/vp9_avg_neon.c
+53
-0
No files found.
test/vp9_avg_test.cc
View file @
5327fcf8
...
...
@@ -286,6 +286,11 @@ INSTANTIATE_TEST_CASE_P(
make_tuple
(
16
,
16
,
5
,
8
,
&
vp9_avg_8x8_neon
),
make_tuple
(
32
,
32
,
15
,
8
,
&
vp9_avg_8x8_neon
)));
INSTANTIATE_TEST_CASE_P
(
NEON
,
IntProRowTest
,
::
testing
::
Values
(
make_tuple
(
16
,
&
vp9_int_pro_row_neon
,
&
vp9_int_pro_row_c
),
make_tuple
(
32
,
&
vp9_int_pro_row_neon
,
&
vp9_int_pro_row_c
),
make_tuple
(
64
,
&
vp9_int_pro_row_neon
,
&
vp9_int_pro_row_c
)));
#endif
#if HAVE_MSA
...
...
vp9/common/vp9_rtcd_defs.pl
View file @
5327fcf8
...
...
@@ -821,7 +821,7 @@ add_proto qw/int16_t vp9_satd/, "const int16_t *coeff, int length";
specialize
qw/vp9_satd sse2/
;
add_proto
qw/void vp9_int_pro_row/
,
"
int16_t *hbuf, uint8_t const *ref, const int ref_stride, const int height
";
specialize
qw/vp9_int_pro_row sse2/
;
specialize
qw/vp9_int_pro_row sse2
neon
/
;
add_proto
qw/int16_t vp9_int_pro_col/
,
"
uint8_t const *ref, const int width
";
specialize
qw/vp9_int_pro_col sse2/
;
...
...
vp9/encoder/arm/neon/vp9_avg_neon.c
View file @
5327fcf8
...
...
@@ -47,3 +47,56 @@ unsigned int vp9_avg_8x8_neon(const uint8_t *s, int p) {
return
(
horizontal_add_u16x8
(
v_sum
)
+
32
)
>>
6
;
}
void
vp9_int_pro_row_neon
(
int16_t
hbuf
[
16
],
uint8_t
const
*
ref
,
const
int
ref_stride
,
const
int
height
)
{
int
i
;
uint16x8_t
vec_sum_lo
=
vdupq_n_u16
(
0
);
uint16x8_t
vec_sum_hi
=
vdupq_n_u16
(
0
);
const
int
shift_factor
=
((
height
>>
5
)
+
3
)
*
-
1
;
const
int16x8_t
vec_shift
=
vdupq_n_s16
(
shift_factor
);
for
(
i
=
0
;
i
<
height
;
i
+=
8
)
{
const
uint8x16_t
vec_row1
=
vld1q_u8
(
ref
);
const
uint8x16_t
vec_row2
=
vld1q_u8
(
ref
+
ref_stride
);
const
uint8x16_t
vec_row3
=
vld1q_u8
(
ref
+
ref_stride
*
2
);
const
uint8x16_t
vec_row4
=
vld1q_u8
(
ref
+
ref_stride
*
3
);
const
uint8x16_t
vec_row5
=
vld1q_u8
(
ref
+
ref_stride
*
4
);
const
uint8x16_t
vec_row6
=
vld1q_u8
(
ref
+
ref_stride
*
5
);
const
uint8x16_t
vec_row7
=
vld1q_u8
(
ref
+
ref_stride
*
6
);
const
uint8x16_t
vec_row8
=
vld1q_u8
(
ref
+
ref_stride
*
7
);
vec_sum_lo
=
vaddw_u8
(
vec_sum_lo
,
vget_low_u8
(
vec_row1
));
vec_sum_hi
=
vaddw_u8
(
vec_sum_hi
,
vget_high_u8
(
vec_row1
));
vec_sum_lo
=
vaddw_u8
(
vec_sum_lo
,
vget_low_u8
(
vec_row2
));
vec_sum_hi
=
vaddw_u8
(
vec_sum_hi
,
vget_high_u8
(
vec_row2
));
vec_sum_lo
=
vaddw_u8
(
vec_sum_lo
,
vget_low_u8
(
vec_row3
));
vec_sum_hi
=
vaddw_u8
(
vec_sum_hi
,
vget_high_u8
(
vec_row3
));
vec_sum_lo
=
vaddw_u8
(
vec_sum_lo
,
vget_low_u8
(
vec_row4
));
vec_sum_hi
=
vaddw_u8
(
vec_sum_hi
,
vget_high_u8
(
vec_row4
));
vec_sum_lo
=
vaddw_u8
(
vec_sum_lo
,
vget_low_u8
(
vec_row5
));
vec_sum_hi
=
vaddw_u8
(
vec_sum_hi
,
vget_high_u8
(
vec_row5
));
vec_sum_lo
=
vaddw_u8
(
vec_sum_lo
,
vget_low_u8
(
vec_row6
));
vec_sum_hi
=
vaddw_u8
(
vec_sum_hi
,
vget_high_u8
(
vec_row6
));
vec_sum_lo
=
vaddw_u8
(
vec_sum_lo
,
vget_low_u8
(
vec_row7
));
vec_sum_hi
=
vaddw_u8
(
vec_sum_hi
,
vget_high_u8
(
vec_row7
));
vec_sum_lo
=
vaddw_u8
(
vec_sum_lo
,
vget_low_u8
(
vec_row8
));
vec_sum_hi
=
vaddw_u8
(
vec_sum_hi
,
vget_high_u8
(
vec_row8
));
ref
+=
ref_stride
*
8
;
}
vec_sum_lo
=
vshlq_u16
(
vec_sum_lo
,
vec_shift
);
vec_sum_hi
=
vshlq_u16
(
vec_sum_hi
,
vec_shift
);
vst1q_s16
(
hbuf
,
vreinterpretq_s16_u16
(
vec_sum_lo
));
hbuf
+=
8
;
vst1q_s16
(
hbuf
,
vreinterpretq_s16_u16
(
vec_sum_hi
));
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment