Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Opus
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Incidents
Environments
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Mark Harris
Opus
Commits
1632152b
Commit
1632152b
authored
Oct 27, 2015
by
Radu Velea
Committed by
Timothy B. Terriberry
Nov 05, 2015
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Adding AVX config switches
Signed-off-by:
Timothy B. Terriberry
<
tterribe@xiph.org
>
parent
bb0e1e0d
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
64 additions
and
3 deletions
+64
-3
celt/cpu_support.h
celt/cpu_support.h
+3
-2
celt/x86/x86_celt_map.c
celt/x86/x86_celt_map.c
+7
-0
celt/x86/x86cpu.c
celt/x86/x86cpu.c
+10
-0
configure.ac
configure.ac
+37
-1
silk/x86/x86_silk_map.c
silk/x86/x86_silk_map.c
+7
-0
No files found.
celt/cpu_support.h
View file @
1632152b
...
...
@@ -48,13 +48,14 @@
(defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1))
#include "x86/x86cpu.h"
/* We currently support
4
x86 variants:
/* We currently support
5
x86 variants:
* arch[0] -> non-sse
* arch[1] -> sse
* arch[2] -> sse2
* arch[3] -> sse4.1
* arch[4] -> avx
*/
#define OPUS_ARCHMASK
3
#define OPUS_ARCHMASK
7
int
opus_select_arch
(
void
);
#else
...
...
celt/x86/x86_celt_map.c
View file @
1632152b
...
...
@@ -53,6 +53,7 @@ void (*const CELT_FIR_IMPL[OPUS_ARCHMASK + 1])(
celt_fir_c
,
celt_fir_c
,
MAY_HAVE_SSE4_1
(
celt_fir
),
/* sse4.1 */
MAY_HAVE_SSE4_1
(
celt_fir
)
/* avx */
};
void
(
*
const
XCORR_KERNEL_IMPL
[
OPUS_ARCHMASK
+
1
])(
...
...
@@ -65,6 +66,7 @@ void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
xcorr_kernel_c
,
xcorr_kernel_c
,
MAY_HAVE_SSE4_1
(
xcorr_kernel
),
/* sse4.1 */
MAY_HAVE_SSE4_1
(
xcorr_kernel
)
/* avx */
};
#endif
...
...
@@ -81,6 +83,7 @@ opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
celt_inner_prod_c
,
MAY_HAVE_SSE2
(
celt_inner_prod
),
MAY_HAVE_SSE4_1
(
celt_inner_prod
),
/* sse4.1 */
MAY_HAVE_SSE4_1
(
celt_inner_prod
)
/* avx */
};
#endif
...
...
@@ -99,6 +102,7 @@ void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
MAY_HAVE_SSE
(
xcorr_kernel
),
MAY_HAVE_SSE
(
xcorr_kernel
),
MAY_HAVE_SSE
(
xcorr_kernel
),
MAY_HAVE_SSE
(
xcorr_kernel
)
};
opus_val32
(
*
const
CELT_INNER_PROD_IMPL
[
OPUS_ARCHMASK
+
1
])(
...
...
@@ -110,6 +114,7 @@ opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
MAY_HAVE_SSE
(
celt_inner_prod
),
MAY_HAVE_SSE
(
celt_inner_prod
),
MAY_HAVE_SSE
(
celt_inner_prod
),
MAY_HAVE_SSE
(
celt_inner_prod
)
};
void
(
*
const
DUAL_INNER_PROD_IMPL
[
OPUS_ARCHMASK
+
1
])(
...
...
@@ -124,6 +129,7 @@ void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
MAY_HAVE_SSE
(
dual_inner_prod
),
MAY_HAVE_SSE
(
dual_inner_prod
),
MAY_HAVE_SSE
(
dual_inner_prod
),
MAY_HAVE_SSE
(
dual_inner_prod
)
};
void
(
*
const
COMB_FILTER_CONST_IMPL
[
OPUS_ARCHMASK
+
1
])(
...
...
@@ -139,6 +145,7 @@ void (*const COMB_FILTER_CONST_IMPL[OPUS_ARCHMASK + 1])(
MAY_HAVE_SSE
(
comb_filter_const
),
MAY_HAVE_SSE
(
comb_filter_const
),
MAY_HAVE_SSE
(
comb_filter_const
),
MAY_HAVE_SSE
(
comb_filter_const
)
};
...
...
celt/x86/x86cpu.c
View file @
1632152b
...
...
@@ -91,6 +91,8 @@ typedef struct CPU_Feature{
int
HW_SSE
;
int
HW_SSE2
;
int
HW_SSE41
;
/* SIMD: 256-bit */
int
HW_AVX
;
}
CPU_Feature
;
static
void
opus_cpu_feature_check
(
CPU_Feature
*
cpu_feature
)
...
...
@@ -106,11 +108,13 @@ static void opus_cpu_feature_check(CPU_Feature *cpu_feature)
cpu_feature
->
HW_SSE
=
(
info
[
3
]
&
(
1
<<
25
))
!=
0
;
cpu_feature
->
HW_SSE2
=
(
info
[
3
]
&
(
1
<<
26
))
!=
0
;
cpu_feature
->
HW_SSE41
=
(
info
[
2
]
&
(
1
<<
19
))
!=
0
;
cpu_feature
->
HW_AVX
=
(
info
[
2
]
&
(
1
<<
28
))
!=
0
;
}
else
{
cpu_feature
->
HW_SSE
=
0
;
cpu_feature
->
HW_SSE2
=
0
;
cpu_feature
->
HW_SSE41
=
0
;
cpu_feature
->
HW_AVX
=
0
;
}
}
...
...
@@ -140,6 +144,12 @@ int opus_select_arch(void)
}
arch
++
;
if
(
!
cpu_feature
.
HW_AVX
)
{
return
arch
;
}
arch
++
;
return
arch
;
}
...
...
configure.ac
View file @
1632152b
...
...
@@ -351,10 +351,12 @@ AM_CONDITIONAL([OPUS_ARM_EXTERNAL_ASM],
AM_CONDITIONAL([HAVE_SSE], [false])
AM_CONDITIONAL([HAVE_SSE2], [false])
AM_CONDITIONAL([HAVE_SSE4_1], [false])
AM_CONDITIONAL([HAVE_AVX], [false])
m4_define([DEFAULT_X86_SSE_CFLAGS], [-msse])
m4_define([DEFAULT_X86_SSE2_CFLAGS], [-msse2])
m4_define([DEFAULT_X86_SSE4_1_CFLAGS], [-msse4.1])
m4_define([DEFAULT_X86_AVX_CFLAGS], [-mavx])
m4_define([DEFAULT_ARM_NEON_INTR_CFLAGS], [-mfpu=neon])
# With GCC on ARM32 softfp architectures (e.g. Android, or older Ubuntu) you need to specify
# -mfloat-abi=softfp for -mfpu=neon to work. However, on ARM32 hardfp architectures (e.g. newer Ubuntu),
...
...
@@ -371,11 +373,13 @@ AS_CASE([$host],
AC_ARG_VAR([X86_SSE_CFLAGS], [C compiler flags to compile SSE intrinsics @<:@default=]DEFAULT_X86_SSE_CFLAGS[@:>@])
AC_ARG_VAR([X86_SSE2_CFLAGS], [C compiler flags to compile SSE2 intrinsics @<:@default=]DEFAULT_X86_SSE2_CFLAGS[@:>@])
AC_ARG_VAR([X86_SSE4_1_CFLAGS], [C compiler flags to compile SSE4.1 intrinsics @<:@default=]DEFAULT_X86_SSE4_1_CFLAGS[@:>@])
AC_ARG_VAR([X86_AVX_CFLAGS], [C compiler flags to compile AVX intrinsics @<:@default=]DEFAULT_X86_AVX_CFLAGS[@:>@])
AC_ARG_VAR([ARM_NEON_INTR_CFLAGS], [C compiler flags to compile ARM NEON intrinsics @<:@default=]DEFAULT_ARM_NEON_INTR_CFLAGS / DEFAULT_ARM_NEON_SOFTFP_INTR_CFLAGS[@:>@])
AS_VAR_SET_IF([X86_SSE_CFLAGS], [], [AS_VAR_SET([X86_SSE_CFLAGS], "DEFAULT_X86_SSE_CFLAGS")])
AS_VAR_SET_IF([X86_SSE2_CFLAGS], [], [AS_VAR_SET([X86_SSE2_CFLAGS], "DEFAULT_X86_SSE2_CFLAGS")])
AS_VAR_SET_IF([X86_SSE4_1_CFLAGS], [], [AS_VAR_SET([X86_SSE4_1_CFLAGS], "DEFAULT_X86_SSE4_1_CFLAGS")])
AS_VAR_SET_IF([X86_AVX_CFLAGS], [], [AS_VAR_SET([X86_AVX_CFLAGS], "DEFAULT_X86_AVX_CFLAGS")])
AS_VAR_SET_IF([ARM_NEON_INTR_CFLAGS], [], [AS_VAR_SET([ARM_NEON_INTR_CFLAGS], ["$RESOLVED_DEFAULT_ARM_NEON_INTR_CFLAGS"])])
AC_DEFUN([OPUS_PATH_NE10],
...
...
@@ -566,7 +570,24 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[
AC_SUBST([OPUS_X86_SSE4_1_CFLAGS])
]
)
OPUS_CHECK_INTRINSICS(
[AVX],
[$X86_AVX_CFLAGS],
[OPUS_X86_MAY_HAVE_AVX],
[OPUS_X86_PRESUME_AVX],
[[#include <immintrin.h>
]],
[[
static __m256 mtest;
mtest = _mm256_setzero_ps();
]]
)
AS_IF([test x"$OPUS_X86_MAY_HAVE_AVX" = x"1" && test x"$OPUS_X86_PRESUME_AVX" != x"1"],
[
OPUS_X86_AVX_CFLAGS="$X86_AVX_CFLAGS"
AC_SUBST([OPUS_X86_AVX_CFLAGS])
]
)
AS_IF([test x"$rtcd_support" = x"no"], [rtcd_support=""])
AS_IF([test x"$OPUS_X86_MAY_HAVE_SSE" = x"1"],
[
...
...
@@ -606,6 +627,19 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[
[
AC_MSG_WARN([Compiler does not support SSE4.1 intrinsics])
])
AS_IF([test x"$OPUS_X86_MAY_HAVE_AVX" = x"1"],
[
AC_DEFINE([OPUS_X86_MAY_HAVE_AVX], 1, [Compiler supports X86 AVX Intrinsics])
intrinsics_support="$intrinsics_support AVX"
AS_IF([test x"$OPUS_X86_PRESUME_AVX" = x"1"],
[AC_DEFINE([OPUS_X86_PRESUME_AVX], 1, [Define if binary requires AVX intrinsics support])],
[rtcd_support="$rtcd_support AVX"])
],
[
AC_MSG_WARN([Compiler does not support AVX intrinsics])
])
AS_IF([test x"$intrinsics_support" = x""],
[intrinsics_support=no],
[intrinsics_support="x86$intrinsics_support"]
...
...
@@ -672,6 +706,8 @@ AM_CONDITIONAL([HAVE_SSE2],
[test x"$OPUS_X86_MAY_HAVE_SSE2" = x"1"])
AM_CONDITIONAL([HAVE_SSE4_1],
[test x"$OPUS_X86_MAY_HAVE_SSE4_1" = x"1"])
AM_CONDITIONAL([HAVE_AVX],
[test x"$OPUS_X86_MAY_HAVE_AVX" = x"1"])
AS_IF([test x"$enable_rtcd" = x"yes"],[
AS_IF([test x"$rtcd_support" != x"no"],[
...
...
silk/x86/x86_silk_map.c
View file @
1632152b
...
...
@@ -50,6 +50,7 @@ opus_int64 (*const SILK_INNER_PROD16_ALIGNED_64_IMPL[ OPUS_ARCHMASK + 1 ] )(
silk_inner_prod16_aligned_64_c
,
silk_inner_prod16_aligned_64_c
,
MAY_HAVE_SSE4_1
(
silk_inner_prod16_aligned_64
),
/* sse4.1 */
MAY_HAVE_SSE4_1
(
silk_inner_prod16_aligned_64
)
/* avx */
};
#endif
...
...
@@ -62,6 +63,7 @@ opus_int (*const SILK_VAD_GETSA_Q8_IMPL[ OPUS_ARCHMASK + 1 ] )(
silk_VAD_GetSA_Q8_c
,
silk_VAD_GetSA_Q8_c
,
MAY_HAVE_SSE4_1
(
silk_VAD_GetSA_Q8
),
/* sse4.1 */
MAY_HAVE_SSE4_1
(
silk_VAD_GetSA_Q8
)
/* avx */
};
void
(
*
const
SILK_NSQ_IMPL
[
OPUS_ARCHMASK
+
1
]
)(
...
...
@@ -85,6 +87,7 @@ void (*const SILK_NSQ_IMPL[ OPUS_ARCHMASK + 1 ] )(
silk_NSQ_c
,
silk_NSQ_c
,
MAY_HAVE_SSE4_1
(
silk_NSQ
),
/* sse4.1 */
MAY_HAVE_SSE4_1
(
silk_NSQ
)
/* avx */
};
void
(
*
const
SILK_VQ_WMAT_EC_IMPL
[
OPUS_ARCHMASK
+
1
]
)(
...
...
@@ -104,6 +107,7 @@ void (*const SILK_VQ_WMAT_EC_IMPL[ OPUS_ARCHMASK + 1 ] )(
silk_VQ_WMat_EC_c
,
silk_VQ_WMat_EC_c
,
MAY_HAVE_SSE4_1
(
silk_VQ_WMat_EC
),
/* sse4.1 */
MAY_HAVE_SSE4_1
(
silk_VQ_WMat_EC
)
/* avx */
};
void
(
*
const
SILK_NSQ_DEL_DEC_IMPL
[
OPUS_ARCHMASK
+
1
]
)(
...
...
@@ -127,6 +131,7 @@ void (*const SILK_NSQ_DEL_DEC_IMPL[ OPUS_ARCHMASK + 1 ] )(
silk_NSQ_del_dec_c
,
silk_NSQ_del_dec_c
,
MAY_HAVE_SSE4_1
(
silk_NSQ_del_dec
),
/* sse4.1 */
MAY_HAVE_SSE4_1
(
silk_NSQ_del_dec
)
/* avx */
};
#if defined(FIXED_POINT)
...
...
@@ -144,6 +149,7 @@ void (*const SILK_WARPED_LPC_ANALYSIS_FILTER_FIX_IMPL[ OPUS_ARCHMASK + 1 ] )(
silk_warped_LPC_analysis_filter_FIX_c
,
silk_warped_LPC_analysis_filter_FIX_c
,
MAY_HAVE_SSE4_1
(
silk_warped_LPC_analysis_filter_FIX
),
/* sse4.1 */
MAY_HAVE_SSE4_1
(
silk_warped_LPC_analysis_filter_FIX
)
/* avx */
};
void
(
*
const
SILK_BURG_MODIFIED_IMPL
[
OPUS_ARCHMASK
+
1
]
)(
...
...
@@ -161,6 +167,7 @@ void (*const SILK_BURG_MODIFIED_IMPL[ OPUS_ARCHMASK + 1 ] )(
silk_burg_modified_c
,
silk_burg_modified_c
,
MAY_HAVE_SSE4_1
(
silk_burg_modified
),
/* sse4.1 */
MAY_HAVE_SSE4_1
(
silk_burg_modified
)
/* avx */
};
#endif
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment