Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Xiph.Org
aom-rav1e
Commits
9d6a53b8
Commit
9d6a53b8
authored
Sep 28, 2016
by
Steinar Midtskogen
Committed by
Yaowu Xu
Oct 11, 2016
Browse files
Improve v128 and v64 8 bit shifts for x86
Change-Id: I25dc61bab46895d425ce49f89fceb164bee36906
parent
e66fc87c
Changes
2
Hide whitespace changes
Inline
Side-by-side
aom_dsp/simd/v128_intrinsics_x86.h
View file @
9d6a53b8
...
...
@@ -420,26 +420,19 @@ SIMD_INLINE v128 v128_cmplt_s16(v128 a, v128 b) {
SIMD_INLINE
v128
v128_cmpeq_16
(
v128
a
,
v128
b
)
{
return
_mm_cmpeq_epi16
(
a
,
b
);
}
SIMD_INLINE
v128
v128_shl_8
(
v128
a
,
unsigned
int
c
)
{
__m128i
x
=
_mm_cvtsi32_si128
(
c
);
return
_mm_packus_epi16
(
_mm_srli_epi16
(
_mm_sll_epi16
(
_mm_unpacklo_epi8
(
_mm_setzero_si128
(),
a
),
x
),
8
),
_mm_srli_epi16
(
_mm_sll_epi16
(
_mm_unpackhi_epi8
(
_mm_setzero_si128
(),
a
),
x
),
8
));
return
_mm_and_si128
(
_mm_set1_epi8
((
uint8_t
)(
0xff
<<
c
)),
_mm_sll_epi16
(
a
,
_mm_cvtsi32_si128
(
c
)));
}
SIMD_INLINE
v128
v128_shr_u8
(
v128
a
,
unsigned
int
c
)
{
__m128i
x
=
_mm_cvtsi32_si128
(
c
+
8
);
return
_mm_packus_epi16
(
_mm_srl_epi16
(
_mm_unpacklo_epi8
(
_mm_setzero_si128
(),
a
),
x
),
_mm_srl_epi16
(
_mm_unpackhi_epi8
(
_mm_setzero_si128
(),
a
),
x
));
return
_mm_and_si128
(
_mm_set1_epi8
(
0xff
>>
c
),
_mm_srl_epi16
(
a
,
_mm_cvtsi32_si128
(
c
)));
}
SIMD_INLINE
v128
v128_shr_s8
(
v128
a
,
unsigned
int
c
)
{
__m128i
x
=
_mm_cvtsi32_si128
(
c
+
8
);
return
_mm_packs_epi16
(
_mm_sra_epi16
(
_mm_unpacklo_epi8
(
_mm_setzero_si128
(),
a
),
x
),
_mm_sra_epi16
(
_mm_unpackhi_epi8
(
_mm_setzero_si128
(),
a
),
x
));
return
_mm_packs_epi16
(
_mm_sra_epi16
(
_mm_unpacklo_epi8
(
a
,
a
),
x
),
_mm_sra_epi16
(
_mm_unpackhi_epi8
(
a
,
a
),
x
));
}
SIMD_INLINE
v128
v128_shl_16
(
v128
a
,
unsigned
int
c
)
{
...
...
@@ -470,20 +463,13 @@ SIMD_INLINE v128 v128_shr_s32(v128 a, unsigned int c) {
to enforce that. */
#define v128_shl_n_byte(a, c) _mm_slli_si128(a, c)
#define v128_shr_n_byte(a, c) _mm_srli_si128(a, c)
#define v128_shl_n_8(a, c) \
_mm_packus_epi16( \
_mm_srli_epi16( \
_mm_slli_epi16(_mm_unpacklo_epi8(_mm_setzero_si128(), a), c), 8), \
_mm_srli_epi16( \
_mm_slli_epi16(_mm_unpackhi_epi8(_mm_setzero_si128(), a), c), 8))
#define v128_shr_n_u8(a, c) \
_mm_packus_epi16( \
_mm_srli_epi16(_mm_unpacklo_epi8(_mm_setzero_si128(), a), c + 8), \
_mm_srli_epi16(_mm_unpackhi_epi8(_mm_setzero_si128(), a), c + 8))
#define v128_shr_n_s8(a, c) \
_mm_packs_epi16( \
_mm_srai_epi16(_mm_unpacklo_epi8(_mm_setzero_si128(), a), c + 8), \
_mm_srai_epi16(_mm_unpackhi_epi8(_mm_setzero_si128(), a), c + 8))
#define v128_shl_n_8(a, c) \
_mm_and_si128(_mm_set1_epi8((uint8_t)(0xff << (c))), _mm_slli_epi16(a, c))
#define v128_shr_n_u8(a, c) \
_mm_and_si128(_mm_set1_epi8(0xff >> (c)), _mm_srli_epi16(a, c))
#define v128_shr_n_s8(a, c) \
_mm_packs_epi16(_mm_srai_epi16(_mm_unpacklo_epi8(a, a), (c) + 8), \
_mm_srai_epi16(_mm_unpackhi_epi8(a, a), (c) + 8))
#define v128_shl_n_16(a, c) _mm_slli_epi16(a, c)
#define v128_shr_n_u16(a, c) _mm_srli_epi16(a, c)
#define v128_shr_n_s16(a, c) _mm_srai_epi16(a, c)
...
...
aom_dsp/simd/v64_intrinsics_x86.h
View file @
9d6a53b8
...
...
@@ -389,25 +389,18 @@ SIMD_INLINE v64 v64_cmplt_s16(v64 a, v64 b) { return _mm_cmplt_epi16(a, b); }
SIMD_INLINE
v64
v64_cmpeq_16
(
v64
a
,
v64
b
)
{
return
_mm_cmpeq_epi16
(
a
,
b
);
}
SIMD_INLINE
v64
v64_shl_8
(
v64
a
,
unsigned
int
c
)
{
return
_mm_packus_epi16
(
_mm_srli_epi16
(
_mm_sll_epi16
(
_mm_unpacklo_epi8
(
_mm_setzero_si128
(),
a
),
_mm_cvtsi32_si128
(
c
)),
8
),
_mm_setzero_si128
());
return
_mm_and_si128
(
_mm_set1_epi8
((
uint8_t
)(
0xff
<<
c
)),
_mm_sll_epi16
(
a
,
_mm_cvtsi32_si128
(
c
)));
}
SIMD_INLINE
v64
v64_shr_u8
(
v64
a
,
unsigned
int
c
)
{
__m128i
cp8
=
_mm_cvtsi32_si128
(
c
+
8
);
return
_mm_packus_epi16
(
_mm_srl_epi16
(
_mm_unpacklo_epi8
(
_mm_setzero_si128
(),
a
),
cp8
),
_mm_setzero_si128
());
return
_mm_and_si128
(
_mm_set1_epi8
(
0xff
>>
c
),
_mm_srl_epi16
(
a
,
_mm_cvtsi32_si128
(
c
)));
}
SIMD_INLINE
v64
v64_shr_s8
(
v64
a
,
unsigned
int
c
)
{
__m128i
cp8
=
_mm_cvtsi32_si128
(
c
+
8
);
return
_mm_packs_epi16
(
_mm_sra_epi16
(
_mm_unpacklo_epi8
(
_mm_setzero_si128
(),
a
),
cp8
),
_mm_setzero_si128
());
_mm_sra_epi16
(
_mm_unpacklo_epi8
(
a
,
a
),
_mm_cvtsi32_si128
(
c
+
8
)),
a
);
}
SIMD_INLINE
v64
v64_shl_16
(
v64
a
,
unsigned
int
c
)
{
...
...
@@ -438,19 +431,12 @@ SIMD_INLINE v64 v64_shr_s32(v64 a, unsigned int c) {
to enforce that. */
#define v64_shl_n_byte(a, c) _mm_slli_si128(a, c)
#define v64_shr_n_byte(a, c) _mm_srli_si128(_mm_unpacklo_epi64(a, a), c + 8)
#define v64_shl_n_8(a, c) \
_mm_packus_epi16( \
_mm_srli_epi16( \
_mm_sll_epi16(_mm_unpacklo_epi8(_mm_setzero_si128(), a), c), 8), \
_mm_setzero_si128())
#define v64_shr_n_u8(a, c) \
_mm_packus_epi16( \
_mm_srl_epi16(_mm_unpacklo_epi8(_mm_setzero_si128(), a), (c) + 8), \
_mm_setzero_si128())
#define v64_shr_n_s8(a, c) \
_mm_packs_epi16( \
_mm_sra_epi16(_mm_unpacklo_epi8(_mm_setzero_si128(), a), (c) + 8), \
_mm_setzero_si128())
#define v64_shl_n_8(a, c) \
_mm_and_si128(_mm_set1_epi8((uint8_t)(0xff << (c))), _mm_slli_epi16(a, c))
#define v64_shr_n_u8(a, c) \
_mm_and_si128(_mm_set1_epi8(0xff >> (c)), _mm_srli_epi16(a, c))
#define v64_shr_n_s8(a, c) \
_mm_packs_epi16(_mm_srai_epi16(_mm_unpacklo_epi8(a, a), (c) + 8), a)
#define v64_shl_n_16(a, c) _mm_slli_epi16(a, c)
#define v64_shr_n_u16(a, c) _mm_srli_epi16(a, c)
#define v64_shr_n_s16(a, c) _mm_srai_epi16(a, c)
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment