Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Xiph.Org
aom-rav1e
Commits
6033fb85
Commit
6033fb85
authored
Apr 02, 2017
by
Steinar Midtskogen
Browse files
Add v64_abs_s8, v128_abs_s8 and v256_abs_s8
Change-Id: I529509e4e997ba123799a3a581d20624d75cf582
parent
569c7b91
Changes
14
Hide whitespace changes
Inline
Side-by-side
aom_dsp/simd/v128_intrinsics.h
View file @
6033fb85
...
...
@@ -97,6 +97,7 @@ SIMD_INLINE v128 v128_ssub_s16(v128 a, v128 b) { return c_v128_ssub_s16(a, b); }
SIMD_INLINE
v128
v128_ssub_u16
(
v128
a
,
v128
b
)
{
return
c_v128_ssub_u16
(
a
,
b
);
}
SIMD_INLINE
v128
v128_sub_32
(
v128
a
,
v128
b
)
{
return
c_v128_sub_32
(
a
,
b
);
}
SIMD_INLINE
v128
v128_abs_s16
(
v128
a
)
{
return
c_v128_abs_s16
(
a
);
}
SIMD_INLINE
v128
v128_abs_s8
(
v128
a
)
{
return
c_v128_abs_s8
(
a
);
}
SIMD_INLINE
v128
v128_mul_s16
(
v64
a
,
v64
b
)
{
return
c_v128_mul_s16
(
a
,
b
);
}
SIMD_INLINE
v128
v128_mullo_s16
(
v128
a
,
v128
b
)
{
...
...
aom_dsp/simd/v128_intrinsics_arm.h
View file @
6033fb85
...
...
@@ -208,6 +208,10 @@ SIMD_INLINE v128 v128_abs_s16(v128 x) {
return
vreinterpretq_s64_s16
(
vabsq_s16
(
vreinterpretq_s16_s64
(
x
)));
}
SIMD_INLINE
v128
v128_abs_s8
(
v128
x
)
{
return
vreinterpretq_s64_s8
(
vabsq_s8
(
vreinterpretq_s8_s64
(
x
)));
}
SIMD_INLINE
v128
v128_mul_s16
(
v64
a
,
v64
b
)
{
return
vreinterpretq_s64_s32
(
vmull_s16
(
vreinterpret_s16_s64
(
a
),
vreinterpret_s16_s64
(
b
)));
...
...
aom_dsp/simd/v128_intrinsics_c.h
View file @
6033fb85
...
...
@@ -244,6 +244,10 @@ SIMD_INLINE c_v128 c_v128_abs_s16(c_v128 a) {
return
c_v128_from_v64
(
c_v64_abs_s16
(
a
.
v64
[
1
]),
c_v64_abs_s16
(
a
.
v64
[
0
]));
}
SIMD_INLINE
c_v128
c_v128_abs_s8
(
c_v128
a
)
{
return
c_v128_from_v64
(
c_v64_abs_s8
(
a
.
v64
[
1
]),
c_v64_abs_s8
(
a
.
v64
[
0
]));
}
SIMD_INLINE
c_v128
c_v128_mul_s16
(
c_v64
a
,
c_v64
b
)
{
c_v64
lo_bits
=
c_v64_mullo_s16
(
a
,
b
);
c_v64
hi_bits
=
c_v64_mulhi_s16
(
a
,
b
);
...
...
aom_dsp/simd/v128_intrinsics_x86.h
View file @
6033fb85
...
...
@@ -120,6 +120,16 @@ SIMD_INLINE v128 v128_abs_s16(v128 a) {
#endif
}
SIMD_INLINE
v128
v128_abs_s8
(
v128
a
)
{
#if defined(__SSSE3__)
return
_mm_abs_epi8
(
a
);
#else
v128
t
=
_mm_sub_epi8
(
_mm_setzero_si128
(),
a
);
v128
mask
=
_mm_cmplt_epi8
(
t
,
a
);
return
_mm_or_si128
(
_mm_andnot_si128
(
mask
,
t
),
_mm_and_si128
(
mask
,
a
));
#endif
}
SIMD_INLINE
v128
v128_ziplo_8
(
v128
a
,
v128
b
)
{
return
_mm_unpacklo_epi8
(
b
,
a
);
}
...
...
aom_dsp/simd/v256_intrinsics.h
View file @
6033fb85
...
...
@@ -99,6 +99,7 @@ SIMD_INLINE v256 v256_ssub_s16(v256 a, v256 b) { return c_v256_ssub_s16(a, b); }
SIMD_INLINE
v256
v256_ssub_u16
(
v256
a
,
v256
b
)
{
return
c_v256_ssub_u16
(
a
,
b
);
}
SIMD_INLINE
v256
v256_sub_32
(
v256
a
,
v256
b
)
{
return
c_v256_sub_32
(
a
,
b
);
}
SIMD_INLINE
v256
v256_abs_s16
(
v256
a
)
{
return
c_v256_abs_s16
(
a
);
}
SIMD_INLINE
v256
v256_abs_s8
(
v256
a
)
{
return
c_v256_abs_s8
(
a
);
}
SIMD_INLINE
v256
v256_mul_s16
(
v128
a
,
v128
b
)
{
return
c_v256_mul_s16
(
a
,
b
);
}
SIMD_INLINE
v256
v256_mullo_s16
(
v256
a
,
v256
b
)
{
...
...
aom_dsp/simd/v256_intrinsics_c.h
View file @
6033fb85
...
...
@@ -253,6 +253,10 @@ SIMD_INLINE c_v256 c_v256_abs_s16(c_v256 a) {
return
c_v256_from_v128
(
c_v128_abs_s16
(
a
.
v128
[
1
]),
c_v128_abs_s16
(
a
.
v128
[
0
]));
}
SIMD_INLINE
c_v256
c_v256_abs_s8
(
c_v256
a
)
{
return
c_v256_from_v128
(
c_v128_abs_s8
(
a
.
v128
[
1
]),
c_v128_abs_s8
(
a
.
v128
[
0
]));
}
SIMD_INLINE
c_v256
c_v256_mul_s16
(
c_v128
a
,
c_v128
b
)
{
c_v128
lo_bits
=
c_v128_mullo_s16
(
a
,
b
);
c_v128
hi_bits
=
c_v128_mulhi_s16
(
a
,
b
);
...
...
aom_dsp/simd/v256_intrinsics_v128.h
View file @
6033fb85
...
...
@@ -211,6 +211,10 @@ SIMD_INLINE v256 v256_abs_s16(v256 a) {
return
v256_from_v128
(
v128_abs_s16
(
a
.
hi
),
v128_abs_s16
(
a
.
lo
));
}
SIMD_INLINE
v256
v256_abs_s8
(
v256
a
)
{
return
v256_from_v128
(
v128_abs_s8
(
a
.
hi
),
v128_abs_s8
(
a
.
lo
));
}
SIMD_INLINE
v256
v256_mul_s16
(
v128
a
,
v128
b
)
{
v128
lo_bits
=
v128_mullo_s16
(
a
,
b
);
v128
hi_bits
=
v128_mulhi_s16
(
a
,
b
);
...
...
aom_dsp/simd/v256_intrinsics_x86.h
View file @
6033fb85
...
...
@@ -118,6 +118,8 @@ SIMD_INLINE v256 v256_sub_32(v256 a, v256 b) { return _mm256_sub_epi32(a, b); }
SIMD_INLINE
v256
v256_abs_s16
(
v256
a
)
{
return
_mm256_abs_epi16
(
a
);
}
SIMD_INLINE
v256
v256_abs_s8
(
v256
a
)
{
return
_mm256_abs_epi8
(
a
);
}
// AVX doesn't have the direct intrinsics to zip/unzip 8, 16, 32 bit
// lanes of lower or upper halves of a 256bit vector because the
// unpack/pack intrinsics operate on the 256 bit input vector as 2
...
...
aom_dsp/simd/v64_intrinsics.h
View file @
6033fb85
...
...
@@ -81,6 +81,7 @@ SIMD_INLINE v64 v64_ssub_s16(v64 a, v64 b) { return c_v64_ssub_s16(a, b); }
SIMD_INLINE
v64
v64_ssub_u16
(
v64
a
,
v64
b
)
{
return
c_v64_ssub_u16
(
a
,
b
);
}
SIMD_INLINE
v64
v64_sub_32
(
v64
a
,
v64
b
)
{
return
c_v64_sub_32
(
a
,
b
);
}
SIMD_INLINE
v64
v64_abs_s16
(
v64
a
)
{
return
c_v64_abs_s16
(
a
);
}
SIMD_INLINE
v64
v64_abs_s8
(
v64
a
)
{
return
c_v64_abs_s8
(
a
);
}
SIMD_INLINE
v64
v64_ziplo_8
(
v64
a
,
v64
b
)
{
return
c_v64_ziplo_8
(
a
,
b
);
}
SIMD_INLINE
v64
v64_ziphi_8
(
v64
a
,
v64
b
)
{
return
c_v64_ziphi_8
(
a
,
b
);
}
...
...
aom_dsp/simd/v64_intrinsics_arm.h
View file @
6033fb85
...
...
@@ -242,6 +242,10 @@ SIMD_INLINE v64 v64_abs_s16(v64 x) {
return
vreinterpret_s64_s16
(
vabs_s16
(
vreinterpret_s16_s64
(
x
)));
}
SIMD_INLINE
v64
v64_abs_s8
(
v64
x
)
{
return
vreinterpret_s64_s8
(
vabs_s8
(
vreinterpret_s8_s64
(
x
)));
}
SIMD_INLINE
v64
v64_mullo_s16
(
v64
x
,
v64
y
)
{
return
vreinterpret_s64_s16
(
vmul_s16
(
vreinterpret_s16_s64
(
x
),
vreinterpret_s16_s64
(
y
)));
...
...
aom_dsp/simd/v64_intrinsics_c.h
View file @
6033fb85
...
...
@@ -264,6 +264,13 @@ SIMD_INLINE c_v64 c_v64_abs_s16(c_v64 a) {
return
t
;
}
SIMD_INLINE
c_v64
c_v64_abs_s8
(
c_v64
a
)
{
c_v64
t
;
int
c
;
for
(
c
=
0
;
c
<
8
;
c
++
)
t
.
u8
[
c
]
=
(
int8_t
)
a
.
u8
[
c
]
>
0
?
a
.
u8
[
c
]
:
-
a
.
u8
[
c
];
return
t
;
}
SIMD_INLINE
c_v64
_c_v64_zip_8
(
c_v64
a
,
c_v64
b
,
int
mode
)
{
c_v64
t
;
if
(
mode
)
{
...
...
aom_dsp/simd/v64_intrinsics_x86.h
View file @
6033fb85
...
...
@@ -138,6 +138,16 @@ SIMD_INLINE v64 v64_abs_s16(v64 a) {
#endif
}
SIMD_INLINE
v64
v64_abs_s8
(
v64
a
)
{
#if defined(__SSSE3__)
return
_mm_abs_epi8
(
a
);
#else
v64
t
=
_mm_sub_epi8
(
_mm_setzero_si128
(),
a
);
v64
mask
=
_mm_cmplt_epi8
(
t
,
a
);
return
_mm_or_si128
(
_mm_andnot_si128
(
mask
,
t
),
_mm_and_si128
(
mask
,
a
));
#endif
}
SIMD_INLINE
v64
v64_ziplo_8
(
v64
a
,
v64
b
)
{
return
_mm_unpacklo_epi8
(
b
,
a
);
}
SIMD_INLINE
v64
v64_ziphi_8
(
v64
a
,
v64
b
)
{
...
...
test/simd_cmp_impl.h
View file @
6033fb85
...
...
@@ -325,6 +325,7 @@ const mapping m[] = { MAP(v64_sad_u8),
MAP
(
imm_v64_align
<
5
>
),
MAP
(
imm_v64_align
<
6
>
),
MAP
(
imm_v64_align
<
7
>
),
MAP
(
v64_abs_s8
),
MAP
(
v64_abs_s16
),
MAP
(
v64_unpacklo_u8_s16
),
MAP
(
v64_unpackhi_u8_s16
),
...
...
@@ -509,6 +510,7 @@ const mapping m[] = { MAP(v64_sad_u8),
MAP
(
imm_v128_align
<
13
>
),
MAP
(
imm_v128_align
<
14
>
),
MAP
(
imm_v128_align
<
15
>
),
MAP
(
v128_abs_s8
),
MAP
(
v128_abs_s16
),
MAP
(
v128_padd_s16
),
MAP
(
v128_unpacklo_u16_s32
),
...
...
test/simd_impl.h
View file @
6033fb85
...
...
@@ -264,7 +264,8 @@ INSTANTIATE(
SIMD_TUPLE
(
imm_v64_align
<
5
>
,
0U
,
0U
),
SIMD_TUPLE
(
imm_v64_align
<
6
>
,
0U
,
0U
),
SIMD_TUPLE
(
imm_v64_align
<
7
>
,
0U
,
0U
));
INSTANTIATE
(
ARCH
,
ARCH_POSTFIX
(
V64_V64
),
SIMD_TUPLE
(
v64_abs_s16
,
0U
,
0U
),
INSTANTIATE
(
ARCH
,
ARCH_POSTFIX
(
V64_V64
),
SIMD_TUPLE
(
v64_abs_s8
,
0U
,
0U
),
SIMD_TUPLE
(
v64_abs_s16
,
0U
,
0U
),
SIMD_TUPLE
(
v64_unpacklo_u8_s16
,
0U
,
0U
),
SIMD_TUPLE
(
v64_unpackhi_u8_s16
,
0U
,
0U
),
SIMD_TUPLE
(
v64_unpacklo_u16_s32
,
0U
,
0U
),
...
...
@@ -312,10 +313,10 @@ INSTANTIATE(ARCH, ARCH_POSTFIX(V64_V64), SIMD_TUPLE(v64_abs_s16, 0U, 0U),
SIMD_TUPLE
(
imm_v64_shl_n_16
<
6
>
,
0U
,
0U
),
SIMD_TUPLE
(
imm_v64_shl_n_16
<
8
>
,
0U
,
0U
),
SIMD_TUPLE
(
imm_v64_shl_n_16
<
10
>
,
0U
,
0U
),
SIMD_TUPLE
(
imm_v64_shl_n_16
<
12
>
,
0U
,
0U
),
SIMD_TUPLE
(
imm_v64_shl_n_16
<
14
>
,
0U
,
0U
));
SIMD_TUPLE
(
imm_v64_shl_n_16
<
12
>
,
0U
,
0U
));
INSTANTIATE
(
ARCH
,
ARCH_POSTFIX
(
V64_V64_Part2
),
SIMD_TUPLE
(
imm_v64_shl_n_16
<
14
>
,
0U
,
0U
),
SIMD_TUPLE
(
imm_v64_shr_n_u16
<
1
>
,
0U
,
0U
),
SIMD_TUPLE
(
imm_v64_shr_n_u16
<
2
>
,
0U
,
0U
),
SIMD_TUPLE
(
imm_v64_shr_n_u16
<
4
>
,
0U
,
0U
),
...
...
@@ -437,8 +438,8 @@ INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128V128_Part2),
SIMD_TUPLE
(
imm_v128_align
<
14
>
,
0U
,
0U
),
SIMD_TUPLE
(
imm_v128_align
<
15
>
,
0U
,
0U
));
INSTANTIATE
(
ARCH
,
ARCH_POSTFIX
(
V128_V128
),
SIMD_TUPLE
(
v128_abs_s
16
,
0U
,
0U
),
SIMD_TUPLE
(
v128_padd_s16
,
0U
,
0U
),
INSTANTIATE
(
ARCH
,
ARCH_POSTFIX
(
V128_V128
),
SIMD_TUPLE
(
v128_abs_s
8
,
0U
,
0U
),
SIMD_TUPLE
(
v128_abs_s16
,
0U
,
0U
),
SIMD_TUPLE
(
v128_padd_s16
,
0U
,
0U
),
SIMD_TUPLE
(
v128_unpacklo_u8_s16
,
0U
,
0U
),
SIMD_TUPLE
(
v128_unpacklo_u16_s32
,
0U
,
0U
),
SIMD_TUPLE
(
v128_unpacklo_s16_s32
,
0U
,
0U
),
...
...
@@ -484,10 +485,10 @@ INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128), SIMD_TUPLE(v128_abs_s16, 0U, 0U),
SIMD_TUPLE
(
imm_v128_shl_n_8
<
7
>
,
0U
,
0U
),
SIMD_TUPLE
(
imm_v128_shr_n_u8
<
1
>
,
0U
,
0U
),
SIMD_TUPLE
(
imm_v128_shr_n_u8
<
2
>
,
0U
,
0U
),
SIMD_TUPLE
(
imm_v128_shr_n_u8
<
3
>
,
0U
,
0U
),
SIMD_TUPLE
(
imm_v128_shr_n_u8
<
4
>
,
0U
,
0U
));
SIMD_TUPLE
(
imm_v128_shr_n_u8
<
3
>
,
0U
,
0U
));
INSTANTIATE
(
ARCH
,
ARCH_POSTFIX
(
V128_V128_Part2
),
SIMD_TUPLE
(
imm_v128_shr_n_u8
<
4
>
,
0U
,
0U
),
SIMD_TUPLE
(
imm_v128_shr_n_u8
<
5
>
,
0U
,
0U
),
SIMD_TUPLE
(
imm_v128_shr_n_u8
<
6
>
,
0U
,
0U
),
SIMD_TUPLE
(
imm_v128_shr_n_u8
<
7
>
,
0U
,
0U
),
...
...
@@ -533,10 +534,10 @@ INSTANTIATE(ARCH, ARCH_POSTFIX(V128_V128_Part2),
SIMD_TUPLE
(
imm_v128_shr_n_u32
<
1
>
,
0U
,
0U
),
SIMD_TUPLE
(
imm_v128_shr_n_u32
<
4
>
,
0U
,
0U
),
SIMD_TUPLE
(
imm_v128_shr_n_u32
<
8
>
,
0U
,
0U
),
SIMD_TUPLE
(
imm_v128_shr_n_u32
<
12
>
,
0U
,
0U
),
SIMD_TUPLE
(
imm_v128_shr_n_u32
<
16
>
,
0U
,
0U
));
SIMD_TUPLE
(
imm_v128_shr_n_u32
<
12
>
,
0U
,
0U
));
INSTANTIATE
(
ARCH
,
ARCH_POSTFIX
(
V128_V128_Part3
),
SIMD_TUPLE
(
imm_v128_shr_n_u32
<
16
>
,
0U
,
0U
),
SIMD_TUPLE
(
imm_v128_shr_n_u32
<
20
>
,
0U
,
0U
),
SIMD_TUPLE
(
imm_v128_shr_n_u32
<
24
>
,
0U
,
0U
),
SIMD_TUPLE
(
imm_v128_shr_n_u32
<
28
>
,
0U
,
0U
),
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment