diff --git a/autogen.sh b/autogen.sh index 13c55c41748849fe8e977c048a4b26e0790b4a78..c9542665e4eafb04a2b1c2abce65afb72c7caa6a 100755 --- a/autogen.sh +++ b/autogen.sh @@ -135,6 +135,7 @@ fi echo "Generating configuration files for $package, please wait...." +ACLOCAL_FLAGS="-I m4" echo " $ACLOCAL $ACLOCAL_FLAGS" $ACLOCAL $ACLOCAL_FLAGS || exit 1 echo " autoheader" diff --git a/celt/_kiss_fft_guts.h b/celt/_kiss_fft_guts.h index 33e62c6b3d993eaa48516098d3bea5f56e4474f5..76941302cb01c00a87345f3aaa837659390af91c 100644 --- a/celt/_kiss_fft_guts.h +++ b/celt/_kiss_fft_guts.h @@ -94,6 +94,179 @@ do {(res).r = ADD32((res).r,(a).r); (res).i = SUB32((res).i,(a).i); \ }while(0) +#if defined(ARMv4_ASM) + +#undef C_MUL +#define C_MUL(m,a,b) \ + do{ \ + int br__; \ + int bi__; \ + int tt__; \ + __asm__ __volatile__( \ + "#C_MUL\n\t" \ + "ldm %[ap], {r0,r1}\n\t" \ + "ldrsh %[br], [%[bp], #0]\n\t" \ + "ldrsh %[bi], [%[bp], #2]\n\t" \ + "smull %[tt], %[mi], r1, %[br]\n\t" \ + "smlal %[tt], %[mi], r0, %[bi]\n\t" \ + "rsb %[bi], %[bi], #0\n\t" \ + "smull r0, %[mr], r0, %[br]\n\t" \ + "mov %[tt], %[tt], lsr #15\n\t" \ + "smlal r0, %[mr], r1, %[bi]\n\t" \ + "orr %[mi], %[tt], %[mi], lsl #17\n\t" \ + "mov r0, r0, lsr #15\n\t" \ + "orr %[mr], r0, %[mr], lsl #17\n\t" \ + : [mr]"=r"((m).r), [mi]"=r"((m).i), \ + [br]"=&r"(br__), [bi]"=r"(bi__), [tt]"=r"(tt__) \ + : [ap]"r"(&(a)), [bp]"r"(&(b)) \ + : "r0", "r1" \ + ); \ + } \ + while(0) + +#undef C_MUL4 +#define C_MUL4(m,a,b) \ + do{ \ + int br__; \ + int bi__; \ + int tt__; \ + __asm__ __volatile__( \ + "#C_MUL4\n\t" \ + "ldm %[ap], {r0,r1}\n\t" \ + "ldrsh %[br], [%[bp], #0]\n\t" \ + "ldrsh %[bi], [%[bp], #2]\n\t" \ + "smull %[tt], %[mi], r1, %[br]\n\t" \ + "smlal %[tt], %[mi], r0, %[bi]\n\t" \ + "rsb %[bi], %[bi], #0\n\t" \ + "smull r0, %[mr], r0, %[br]\n\t" \ + "mov %[tt], %[tt], lsr #17\n\t" \ + "smlal r0, %[mr], r1, %[bi]\n\t" \ + "orr %[mi], %[tt], %[mi], lsl #15\n\t" \ + "mov r0, r0, lsr #17\n\t" \ + "orr %[mr], r0, %[mr], lsl #15\n\t" \ + : [mr]"=r"((m).r), [mi]"=r"((m).i), \ + [br]"=&r"(br__), [bi]"=r"(bi__), [tt]"=r"(tt__) \ + : [ap]"r"(&(a)), [bp]"r"(&(b)) \ + : "r0", "r1" \ + ); \ + } \ + while(0) + +#undef C_MULC +#define C_MULC(m,a,b) \ + do{ \ + int br__; \ + int bi__; \ + int tt__; \ + __asm__ __volatile__( \ + "#C_MULC\n\t" \ + "ldm %[ap], {r0,r1}\n\t" \ + "ldrsh %[br], [%[bp], #0]\n\t" \ + "ldrsh %[bi], [%[bp], #2]\n\t" \ + "smull %[tt], %[mr], r0, %[br]\n\t" \ + "smlal %[tt], %[mr], r1, %[bi]\n\t" \ + "rsb %[bi], %[bi], #0\n\t" \ + "smull r1, %[mi], r1, %[br]\n\t" \ + "mov %[tt], %[tt], lsr #15\n\t" \ + "smlal r1, %[mi], r0, %[bi]\n\t" \ + "orr %[mr], %[tt], %[mr], lsl #17\n\t" \ + "mov r1, r1, lsr #15\n\t" \ + "orr %[mi], r1, %[mi], lsl #17\n\t" \ + : [mr]"=r"((m).r), [mi]"=r"((m).i), \ + [br]"=&r"(br__), [bi]"=r"(bi__), [tt]"=r"(tt__) \ + : [ap]"r"(&(a)), [bp]"r"(&(b)) \ + : "r0", "r1" \ + ); \ + } \ + while(0) + +#endif /* ARMv4_ASM */ + +#if defined(ARMv5E_ASM) + +#if defined(__thumb__)||defined(__thumb2__) +#define LDRD_CONS "Q" +#else +#define LDRD_CONS "Uq" +#endif + +#undef C_MUL +#define C_MUL(m,a,b) \ + do{ \ + int mr1__; \ + int mr2__; \ + int mi__; \ + long long aval__; \ + int bval__; \ + __asm__( \ + "#C_MUL\n\t" \ + "ldrd %[aval], %H[aval], %[ap]\n\t" \ + "ldr %[bval], %[bp]\n\t" \ + "smulwb %[mi], %H[aval], %[bval]\n\t" \ + "smulwb %[mr1], %[aval], %[bval]\n\t" \ + "smulwt %[mr2], %H[aval], %[bval]\n\t" \ + "smlawt %[mi], %[aval], %[bval], %[mi]\n\t" \ + : [mr1]"=r"(mr1__), [mr2]"=r"(mr2__), [mi]"=r"(mi__), \ + [aval]"=&r"(aval__), [bval]"=r"(bval__) \ + : [ap]LDRD_CONS(a), [bp]"m"(b) \ + ); \ + (m).r = SHL32(SUB32(mr1__, mr2__), 1); \ + (m).i = SHL32(mi__, 1); \ + } \ + while(0) + +#undef C_MUL4 +#define C_MUL4(m,a,b) \ + do{ \ + int mr1__; \ + int mr2__; \ + int mi__; \ + long long aval__; \ + int bval__; \ + __asm__( \ + "#C_MUL4\n\t" \ + "ldrd %[aval], %H[aval], %[ap]\n\t" \ + "ldr %[bval], %[bp]\n\t" \ + "smulwb %[mi], %H[aval], %[bval]\n\t" \ + "smulwb %[mr1], %[aval], %[bval]\n\t" \ + "smulwt %[mr2], %H[aval], %[bval]\n\t" \ + "smlawt %[mi], %[aval], %[bval], %[mi]\n\t" \ + : [mr1]"=r"(mr1__), [mr2]"=r"(mr2__), [mi]"=r"(mi__), \ + [aval]"=&r"(aval__), [bval]"=r"(bval__) \ + : [ap]LDRD_CONS(a), [bp]"m"(b) \ + ); \ + (m).r = SHR32(SUB32(mr1__, mr2__), 1); \ + (m).i = SHR32(mi__, 1); \ + } \ + while(0) + +#undef C_MULC +#define C_MULC(m,a,b) \ + do{ \ + int mr__; \ + int mi1__; \ + int mi2__; \ + long long aval__; \ + int bval__; \ + __asm__( \ + "#C_MULC\n\t" \ + "ldrd %[aval], %H[aval], %[ap]\n\t" \ + "ldr %[bval], %[bp]\n\t" \ + "smulwb %[mr], %[aval], %[bval]\n\t" \ + "smulwb %[mi1], %H[aval], %[bval]\n\t" \ + "smulwt %[mi2], %[aval], %[bval]\n\t" \ + "smlawt %[mr], %H[aval], %[bval], %[mr]\n\t" \ + : [mr]"=r"(mr__), [mi1]"=r"(mi1__), [mi2]"=r"(mi2__), \ + [aval]"=&r"(aval__), [bval]"=r"(bval__) \ + : [ap]LDRD_CONS(a), [bp]"m"(b) \ + ); \ + (m).r = SHL32(mr__, 1); \ + (m).i = SHL32(SUB32(mi1__, mi2__), 1); \ + } \ + while(0) + +#endif /* ARMv5E_ASM */ + #else /* not FIXED_POINT*/ # define S_MUL(a,b) ( (a)*(b) ) diff --git a/celt/arch.h b/celt/arch.h index 03cda40f6948965f3666df20072e84477ae6de7b..529d252637e3a3e69f6d8a28c4f3ac80c5302d69 100644 --- a/celt/arch.h +++ b/celt/arch.h @@ -112,10 +112,10 @@ typedef opus_val32 celt_ener; #include "fixed_generic.h" -#ifdef ARM5E_ASM -#include "fixed_arm5e.h" -#elif defined (ARM4_ASM) -#include "fixed_arm4.h" +#ifdef ARMv5E_ASM +#include "fixed_armv5e.h" +#elif defined (ARMv4_ASM) +#include "fixed_armv4.h" #elif defined (BFIN_ASM) #include "fixed_bfin.h" #elif defined (TI_C5X_ASM) diff --git a/celt/fixed_armv4.h b/celt/fixed_armv4.h new file mode 100644 index 0000000000000000000000000000000000000000..d38880f1c31e0cf1b01be43cf11dc1843818724c --- /dev/null +++ b/celt/fixed_armv4.h @@ -0,0 +1,71 @@ +/* Copyright (C) 2013 Xiph.Org Foundation and contributors */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef FIXED_ARMv4_H +#define FIXED_ARMv4_H + +/** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */ +#undef MULT16_32_Q16 +static inline opus_val32 MULT16_32_Q16_armv4(opus_val16 a, opus_val32 b) +{ + unsigned rd_lo; + int rd_hi; + __asm__( + "#MULT16_32_Q16\n\t" + "smull %0, %1, %2, %3\n\t" + : "=r"(rd_lo), "=r"(rd_hi) + : "r"(b),"r"(a<<16) + ); + return rd_hi; +} +#define MULT16_32_Q16(a, b) (MULT16_32_Q16_armv4(a, b)) + + +/** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */ +#undef MULT16_32_Q15 +static inline opus_val32 MULT16_32_Q15_armv4(opus_val16 a, opus_val32 b) +{ + unsigned rd_lo; + int rd_hi; + __asm__( + "#MULT16_32_Q15\n\t" + "smull %0, %1, %2, %3\n\t" + : "=r"(rd_lo), "=r"(rd_hi) + : "%r"(b), "r"(a<<16) + ); + /*We intentionally don't OR in the high bit of rd_lo for speed.*/ + return rd_hi<<1; +} +#define MULT16_32_Q15(a, b) (MULT16_32_Q15_armv4(a, b)) + + +/** 16x32 multiply, followed by a 15-bit shift right and 32-bit add. + b must fit in 31 bits. + Result fits in 32 bits. */ +#undef MAC16_32_Q15 +#define MAC16_32_Q15(c, a, b) ADD32(c, MULT16_32_Q15(a, b)) + +#endif diff --git a/celt/fixed_armv5e.h b/celt/fixed_armv5e.h new file mode 100644 index 0000000000000000000000000000000000000000..6b96150abab32d1b2e680ec36ff061d525658e3d --- /dev/null +++ b/celt/fixed_armv5e.h @@ -0,0 +1,127 @@ +/* Copyright (C) 2007-2009 Xiph.Org Foundation + Copyright (C) 2003-2008 Jean-Marc Valin + Copyright (C) 2007-2008 CSIRO + Copyright (C) 2013 Parrot */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef FIXED_ARMv5E_H +#define FIXED_ARMv5E_H + +#include "fixed_armv4.h" + +/** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */ +#undef MULT16_32_Q16 +static inline opus_val32 MULT16_32_Q16_armv5e(opus_val16 a, opus_val32 b) +{ + int res; + __asm__( + "#MULT16_32_Q16\n\t" + "smulwb %0, %1, %2\n\t" + : "=r"(res) + : "r"(b),"r"(a) + ); + return res; +} +#define MULT16_32_Q16(a, b) (MULT16_32_Q16_armv5e(a, b)) + + +/** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */ +#undef MULT16_32_Q15 +static inline opus_val32 MULT16_32_Q15_armv5e(opus_val16 a, opus_val32 b) +{ +#if 0 + unsigned rd_lo; + int rd_hi; + __asm__( + "#MULT16_32_Q15\n\t" + "smull %0, %1, %2, %3\n\t" + : "=r"(rd_lo), "=r"(rd_hi) + : "%r"(b), "r"(a<<16) + ); + return (rd_lo>>31)|(rd_hi<<1); +#else + int res; + __asm__( + "#MULT16_32_Q15\n\t" + "smulwb %0, %1, %2\n\t" + : "=r"(res) + : "%r"(b), "r"(a) + ); + return res<<1; +#endif +} +#define MULT16_32_Q15(a, b) (MULT16_32_Q15_armv5e(a, b)) + + +/** 16x32 multiply, followed by a 15-bit shift right and 32-bit add. + b must fit in 31 bits. + Result fits in 32 bits. */ +#undef MAC16_32_Q15 +static inline opus_val32 MAC16_32_Q15_armv5e(opus_val32 c, opus_val16 a, + opus_val32 b) +{ + int res; + __asm__( + "#MAC16_32_Q15\n\t" + "smlawb %0, %1, %2, %3;\n" + : "=r"(res) + : "r"(b<<1), "r"(a), "r"(c) + ); + return res; +} +#define MAC16_32_Q15(c, a, b) (MAC16_32_Q15_armv5e(c, a, b)) + +/** 16x16 multiply-add where the result fits in 32 bits */ +#undef MAC16_16 +static inline opus_val32 MAC16_16(opus_val32 c, opus_val16 a, opus_val16 b) +{ + int res; + __asm__( + "#MAC16_16\n\t" + "smlabb %0, %1, %2, %3;\n" + : "=r"(res) + : "r"(a), "r"(b), "r"(c) + ); + return res; +} +#define MAC16_16(c, a, b) (MAC16_16(c, a, b)) + +/** 16x16 multiplication where the result fits in 32 bits */ +#undef MULT16_16 +static inline opus_val32 MULT16_16(opus_val16 a, opus_val16 b) +{ + int res; + __asm__( + "#MULT16_16\n\t" + "smulbb %0, %1, %2;\n" + : "=r"(res) + : "r"(a), "r"(b) + ); + return res; +} +#define MULT16_16(a, b) (MULT16_16(a, b)) + +#endif diff --git a/configure.ac b/configure.ac index 060bb5237d698a84f7958f0c1429902a0b22392f..1ccdca8071df22d1789435519f6e174201e49f54 100644 --- a/configure.ac +++ b/configure.ac @@ -18,7 +18,6 @@ AC_CONFIG_SRCDIR(src/opus_encoder.c) dnl enable silent rules on automake 1.11 and later m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) - # For libtool. dnl Please update these for releases. OPUS_LT_CURRENT=4 @@ -155,6 +154,36 @@ if test "x${float_approx}" = "xyes"; then AC_DEFINE([FLOAT_APPROX], , [Float approximations]) fi +cpu_arm=no +AC_ARG_ENABLE(asm, + AS_HELP_STRING([--disable-asm], [Disable assembly optimizations]), + [ ac_enable_asm=$enableval ], [ ac_enable_asm=yes] ) +if test "x${ac_enable_asm}" = xyes ; then + asm_optimization="no asm for your platform, please send patches" + case $host_cpu in + arm*) + cpu_arm=yes + AS_GCC_INLINE_ASSEMBLY([asm_optimization="ARM"], + [asm_optimization="disabled"]) + if test "x${asm_optimization}" = "xARM" ; then + AC_DEFINE([ARMv4_ASM], [], [Use generic ARMv4 asm optimizations]) + AS_ASM_ARM_EDSP([ARMv5E_ASM=1],[ARMv5E_ASM=0]) + if test "x${ARMv5E_ASM}" = "x1" ; then + AC_DEFINE(ARMv5E_ASM, 1, [Use ARMv5E asm optimizations]) + asm_optimization="${asm_optimization} (EDSP)" + fi + AS_ASM_ARM_MEDIA([ARMv6_ASM=1],[ARMv6_ASM=0]) + if test "x${ARMv6_ASM}" = "x1" ; then + AC_DEFINE(ARMv6_ASM, 1, [Use ARMv6 asm optimizations]) + asm_optimization="${asm_optimization} (Media)" + fi + fi + ;; + esac +else + asm_optimization="disabled" +fi + ac_enable_assertions="no" AC_ARG_ENABLE(assertions, [ --enable-assertions enable additional software error checking], [if test "$enableval" = yes; then @@ -281,6 +310,7 @@ AC_MSG_RESULT([ Floating point support: ........ ${ac_enable_float} Fast float approximations: ..... ${float_approx} Fixed point debugging: ......... ${ac_enable_fixed_debug} + Assembly optimization: ......... ${asm_optimization} Custom modes: .................. ${ac_enable_custom_modes} Assertion checking: ............ ${ac_enable_assertions} Fuzzing: ....................... ${ac_enable_fuzzing} diff --git a/m4/as-gcc-inline-assembly.m4 b/m4/as-gcc-inline-assembly.m4 new file mode 100644 index 0000000000000000000000000000000000000000..4437a9d0454889067559763e0a42513b2885b438 --- /dev/null +++ b/m4/as-gcc-inline-assembly.m4 @@ -0,0 +1,106 @@ +dnl as-gcc-inline-assembly.m4 0.1.0 + +dnl autostars m4 macro for detection of gcc inline assembly + +dnl David Schleef <ds@schleef.org> + +dnl $Id$ + +dnl AS_COMPILER_FLAG(ACTION-IF-ACCEPTED, [ACTION-IF-NOT-ACCEPTED]) +dnl Tries to compile with the given CFLAGS. +dnl Runs ACTION-IF-ACCEPTED if the compiler can compile with the flags, +dnl and ACTION-IF-NOT-ACCEPTED otherwise. + +AC_DEFUN([AS_GCC_INLINE_ASSEMBLY], +[ + AC_MSG_CHECKING([if compiler supports gcc-style inline assembly]) + + AC_TRY_COMPILE([], [ +#ifdef __GNUC_MINOR__ +#if (__GNUC__ * 1000 + __GNUC_MINOR__) < 3004 +#error GCC before 3.4 has critical bugs compiling inline assembly +#endif +#endif +__asm__ (""::) ], [flag_ok=yes], [flag_ok=no]) + + if test "X$flag_ok" = Xyes ; then + $1 + true + else + $2 + true + fi + AC_MSG_RESULT([$flag_ok]) +]) + +AC_DEFUN([AC_TRY_ASSEMBLE], +[ac_c_ext=$ac_ext + ac_ext=${ac_s_ext-s} + cat > conftest.$ac_ext <<EOF + .file "configure" +[$1] +EOF +if AC_TRY_EVAL(ac_compile); then + ac_ext=$ac_c_ext + ifelse([$2], , :, [ $2 + rm -rf conftest*]) +else + echo "configure: failed program was:" >&AC_FD_CC + cat conftest.$ac_ext >&AC_FD_CC + ac_ext=$ac_c_ext +ifelse([$3], , , [ rm -rf conftest* + $3 +])dnl +fi +rm -rf conftest*]) + + +AC_DEFUN([AS_ASM_ARM_NEON], +[ + AC_MSG_CHECKING([if assembler supports NEON instructions on ARM]) + + AC_TRY_ASSEMBLE([vorr d0,d0,d0], [flag_ok=yes], [flag_ok=no]) + + if test "X$flag_ok" = Xyes ; then + $1 + true + else + $2 + true + fi + AC_MSG_RESULT([$flag_ok]) +]) + + +AC_DEFUN([AS_ASM_ARM_MEDIA], +[ + AC_MSG_CHECKING([if assembler supports ARMv6 media instructions on ARM]) + + AC_TRY_ASSEMBLE([shadd8 r3,r3,r3], [flag_ok=yes], [flag_ok=no]) + + if test "X$flag_ok" = Xyes ; then + $1 + true + else + $2 + true + fi + AC_MSG_RESULT([$flag_ok]) +]) + + +AC_DEFUN([AS_ASM_ARM_EDSP], +[ + AC_MSG_CHECKING([if assembler supports EDSP instructions on ARM]) + + AC_TRY_ASSEMBLE([qadd r3,r3,r3], [flag_ok=yes], [flag_ok=no]) + + if test "X$flag_ok" = Xyes ; then + $1 + true + else + $2 + true + fi + AC_MSG_RESULT([$flag_ok]) +]) diff --git a/silk/SigProc_FIX.h b/silk/SigProc_FIX.h index cf1ab3627780dc0708f15ba927bcd0b156eaf082..cf9f30c066ea8f08722cb9db519c54cf8127dc27 100644 --- a/silk/SigProc_FIX.h +++ b/silk/SigProc_FIX.h @@ -576,6 +576,14 @@ static inline opus_int64 silk_max_64(opus_int64 a, opus_int64 b) #include "MacroCount.h" #include "MacroDebug.h" +#ifdef ARMv4_ASM +#include "SigProc_FIX_armv4.h" +#endif + +#ifdef ARMv5E_ASM +#include "SigProc_FIX_armv5e.h" +#endif + #ifdef __cplusplus } #endif diff --git a/silk/SigProc_FIX_armv4.h b/silk/SigProc_FIX_armv4.h new file mode 100644 index 0000000000000000000000000000000000000000..ea3720207b191b1fc7fe9899d4ba896fdeda0b4c --- /dev/null +++ b/silk/SigProc_FIX_armv4.h @@ -0,0 +1,47 @@ +/*********************************************************************** +Copyright (C) 2013 Xiph.Org Foundation and contributors +Copyright (c) 2013 Parrot +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifndef SILK_SIGPROC_FIX_ARMv4_H +#define SILK_SIGPROC_FIX_ARMv4_H + +#undef silk_MLA +static inline opus_int32 silk_MLA_armv4(opus_int32 a, opus_int32 b, + opus_int32 c) +{ + opus_int32 res; + __asm__( + "#silk_MLA\n\t" + "mla %0, %1, %2, %3\n\t" + : "=r"(res) + : "r"(b), "r"(c), "r"(a) + ); + return res; +} +#define silk_MLA(a, b, c) (silk_MLA_armv4(a, b, c)) + +#endif diff --git a/silk/SigProc_FIX_armv5e.h b/silk/SigProc_FIX_armv5e.h new file mode 100644 index 0000000000000000000000000000000000000000..804e2bc55287d40719745d0c9d41c683c85b80cc --- /dev/null +++ b/silk/SigProc_FIX_armv5e.h @@ -0,0 +1,61 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Copyright (c) 2013 Parrot +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifndef SILK_SIGPROC_FIX_ARMv5E_H +#define SILK_SIGPROC_FIX_ARMv5E_H + +#undef silk_SMULTT +static inline opus_int32 silk_SMULTT_armv5e(opus_int32 a, opus_int32 b) +{ + opus_int32 res; + __asm__( + "#silk_SMULTT\n\t" + "smultt %0, %1, %2\n\t" + : "=r"(res) + : "r"(a), "r"(b) + ); + return res; +} +#define silk_SMULTT(a, b) (silk_SMULTT_armv5e(a, b)) + +#undef silk_SMLATT +static inline opus_int32 silk_SMLATT_armv5e(opus_int32 a, opus_int32 b, + opus_int32 c) +{ + opus_int32 res; + __asm__( + "#silk_SMLATT\n\t" + "smlatt %0, %1, %2, %3\n\t" + : "=r"(res) + : "r"(b), "r"(c), "r"(a) + ); + return res; +} +#define silk_SMLATT(a, b, c) (silk_SMLATT_armv5e(a, b, c)) + +#endif diff --git a/silk/macros.h b/silk/macros.h index 31344cf4ba6d1f7643513fca31a8c07a1758f23a..6ac7005749bccae7570b1948dfe2f82584fba749 100644 --- a/silk/macros.h +++ b/silk/macros.h @@ -134,5 +134,13 @@ static inline opus_int32 silk_CLZ32(opus_int32 in32) (*((Matrix_base_adr) + ((row)+(M)*(column)))) #endif +#ifdef ARMv4_ASM +#include "macros_armv4.h" +#endif + +#ifdef ARMv5E_ASM +#include "macros_armv5e.h" +#endif + #endif /* SILK_MACROS_H */ diff --git a/silk/macros_armv4.h b/silk/macros_armv4.h new file mode 100644 index 0000000000000000000000000000000000000000..e5dfe69c7fb45ce9e8d8123295003a57df8db6ea --- /dev/null +++ b/silk/macros_armv4.h @@ -0,0 +1,103 @@ +/*********************************************************************** +Copyright (C) 2013 Xiph.Org Foundation and contributors. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifndef SILK_MACROS_ARMv4_H +#define SILK_MACROS_ARMv4_H + +/* (a32 * (opus_int32)((opus_int16)(b32))) >> 16 output have to be 32bit int */ +#undef silk_SMULWB +static inline opus_int32 silk_SMULWB_armv4(opus_int32 a, opus_int16 b) +{ + unsigned rd_lo; + int rd_hi; + __asm__( + "#silk_SMULWB\n\t" + "smull %0, %1, %2, %3\n\t" + : "=r"(rd_lo), "=r"(rd_hi) + : "%r"(a), "r"(b<<16) + ); + return rd_hi; +} +#define silk_SMULWB(a, b) (silk_SMULWB_armv4(a, b)) + +/* a32 + (b32 * (opus_int32)((opus_int16)(c32))) >> 16 output have to be 32bit int */ +#undef silk_SMLAWB +#define silk_SMLAWB(a, b, c) ((a) + silk_SMULWB(b, c)) + +/* (a32 * (b32 >> 16)) >> 16 */ +#undef silk_SMULWT +static inline opus_int32 silk_SMULWT_armv4(opus_int32 a, opus_int32 b) +{ + unsigned rd_lo; + int rd_hi; + __asm__( + "#silk_SMULWT\n\t" + "smull %0, %1, %2, %3\n\t" + : "=r"(rd_lo), "=r"(rd_hi) + : "%r"(a), "r"(b&~0xFFFF) + ); + return rd_hi; +} +#define silk_SMULWT(a, b) (silk_SMULWT_armv4(a, b)) + +/* a32 + (b32 * (c32 >> 16)) >> 16 */ +#undef silk_SMLAWT +#define silk_SMLAWT(a, b, c) ((a) + silk_SMULWT(b, c)) + +/* (a32 * b32) >> 16 */ +#undef silk_SMULWW +static inline opus_int32 silk_SMULWW_armv4(opus_int32 a, opus_int32 b) +{ + unsigned rd_lo; + int rd_hi; + __asm__( + "#silk_SMULWW\n\t" + "smull %0, %1, %2, %3\n\t" + : "=r"(rd_lo), "=r"(rd_hi) + : "%r"(a), "r"(b) + ); + return (rd_lo>>16)|(rd_hi<<16); +} +#define silk_SMULWW(a, b) (silk_SMULWW_armv4(a, b)) + +#undef silk_SMLAWW +static inline opus_int32 silk_SMLAWW_armv4(opus_int32 a, opus_int32 b, + opus_int32 c) +{ + unsigned rd_lo; + int rd_hi; + __asm__( + "#silk_SMULWW\n\t" + "smull %0, %1, %2, %3\n\t" + : "=r"(rd_lo), "=r"(rd_hi) + : "%r"(b), "r"(c) + ); + return a+((rd_lo>>16)|(rd_hi<<16)); +} +#define silk_SMLAWW(a, b, c) (silk_SMLAWW_armv4(a, b, c)) + +#endif /* SILK_MACROS_ARMv4_H */ diff --git a/silk/macros_armv5e.h b/silk/macros_armv5e.h new file mode 100644 index 0000000000000000000000000000000000000000..a86586b33cfa2c8b135a4ed38f8d4233296fd95f --- /dev/null +++ b/silk/macros_armv5e.h @@ -0,0 +1,213 @@ +/*********************************************************************** +Copyright (c) 2006-2011, Skype Limited. All rights reserved. +Copyright (c) 2013 Parrot +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +- Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +- Neither the name of Internet Society, IETF or IETF Trust, nor the +names of specific contributors, may be used to endorse or promote +products derived from this software without specific prior written +permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +***********************************************************************/ + +#ifndef SILK_MACROS_ARMv5E_H +#define SILK_MACROS_ARMv5E_H + +/* (a32 * (opus_int32)((opus_int16)(b32))) >> 16 output have to be 32bit int */ +#undef silk_SMULWB +static inline opus_int32 silk_SMULWB_armv5e(opus_int32 a, opus_int16 b) +{ + int res; + __asm__( + "#silk_SMULWB\n\t" + "smulwb %0, %1, %2\n\t" + : "=r"(res) + : "r"(a), "r"(b) + ); + return res; +} +#define silk_SMULWB(a, b) (silk_SMULWB_armv5e(a, b)) + +/* a32 + (b32 * (opus_int32)((opus_int16)(c32))) >> 16 output have to be 32bit int */ +#undef silk_SMLAWB +static inline opus_int32 silk_SMLAWB_armv5e(opus_int32 a, opus_int32 b, + opus_int16 c) +{ + int res; + __asm__( + "#silk_SMLAWB\n\t" + "smlawb %0, %1, %2, %3\n\t" + : "=r"(res) + : "r"(b), "r"(c), "r"(a) + ); + return res; +} +#define silk_SMLAWB(a, b, c) (silk_SMLAWB_armv5e(a, b, c)) + +/* (a32 * (b32 >> 16)) >> 16 */ +#undef silk_SMULWT +static inline opus_int32 silk_SMULWT_armv5e(opus_int32 a, opus_int32 b) +{ + int res; + __asm__( + "#silk_SMULWT\n\t" + "smulwt %0, %1, %2\n\t" + : "=r"(res) + : "r"(a), "r"(b) + ); + return res; +} +#define silk_SMULWT(a, b) (silk_SMULWT_armv5e(a, b)) + +/* a32 + (b32 * (c32 >> 16)) >> 16 */ +#undef silk_SMLAWT +static inline opus_int32 silk_SMLAWT_armv5e(opus_int32 a, opus_int32 b, + opus_int32 c) +{ + int res; + __asm__( + "#silk_SMLAWT\n\t" + "smlawt %0, %1, %2, %3\n\t" + : "=r"(res) + : "r"(b), "r"(c), "r"(a) + ); + return res; +} +#define silk_SMLAWT(a, b, c) (silk_SMLAWT_armv5e(a, b, c)) + +/* (opus_int32)((opus_int16)(a3))) * (opus_int32)((opus_int16)(b32)) output have to be 32bit int */ +#undef silk_SMULBB +static inline opus_int32 silk_SMULBB_armv5e(opus_int32 a, opus_int32 b) +{ + int res; + __asm__( + "#silk_SMULBB\n\t" + "smulbb %0, %1, %2\n\t" + : "=r"(res) + : "%r"(a), "r"(b) + ); + return res; +} +#define silk_SMULBB(a, b) (silk_SMULBB_armv5e(a, b)) + +/* a32 + (opus_int32)((opus_int16)(b32)) * (opus_int32)((opus_int16)(c32)) output have to be 32bit int */ +#undef silk_SMLABB +static inline opus_int32 silk_SMLABB_armv5e(opus_int32 a, opus_int32 b, + opus_int32 c) +{ + int res; + __asm__( + "#silk_SMLABB\n\t" + "smlabb %0, %1, %2, %3\n\t" + : "=r"(res) + : "%r"(b), "r"(c), "r"(a) + ); + return res; +} +#define silk_SMLABB(a, b, c) (silk_SMLABB_armv5e(a, b, c)) + +/* (opus_int32)((opus_int16)(a32)) * (b32 >> 16) */ +#undef silk_SMULBT +static inline opus_int32 silk_SMULBT_armv5e(opus_int32 a, opus_int32 b) +{ + int res; + __asm__( + "#silk_SMULBT\n\t" + "smulbt %0, %1, %2\n\t" + : "=r"(res) + : "r"(a), "r"(b) + ); + return res; +} +#define silk_SMULBT(a, b) (silk_SMULBT_armv5e(a, b)) + +/* a32 + (opus_int32)((opus_int16)(b32)) * (c32 >> 16) */ +#undef silk_SMLABT +static inline opus_int32 silk_SMLABT_armv5e(opus_int32 a, opus_int32 b, + opus_int32 c) +{ + int res; + __asm__( + "#silk_SMLABT\n\t" + "smlabt %0, %1, %2, %3\n\t" + : "=r"(res) + : "r"(b), "r"(c), "r"(a) + ); + return res; +} +#define silk_SMLABT(a, b, c) (silk_SMLABT_armv5e(a, b, c)) + +/* add/subtract with output saturated */ +#undef silk_ADD_SAT32 +static inline opus_int32 silk_ADD_SAT32_armv5e(opus_int32 a, opus_int32 b) +{ + int res; + __asm__( + "#silk_ADD_SAT32\n\t" + "qadd %0, %1, %2\n\t" + : "=r"(res) + : "%r"(a), "r"(b) + ); + return res; +} +#define silk_ADD_SAT32(a, b) (silk_ADD_SAT32_armv5e(a, b)) + +#undef silk_SUB_SAT32 +static inline opus_int32 silk_SUB_SAT32_armv5e(opus_int32 a, opus_int32 b) +{ + int res; + __asm__( + "#silk_SUB_SAT32\n\t" + "qsub %0, %1, %2\n\t" + : "=r"(res) + : "r"(a), "r"(b) + ); + return res; +} +#define silk_SUB_SAT32(a, b) (silk_SUB_SAT32_armv5e(a, b)) + +#undef silk_CLZ16 +static inline opus_int32 silk_CLZ16_armv5(opus_int16 in16) +{ + int res; + __asm__( + "#silk_CLZ16\n\t" + "clz %0, %1;\n" + : "=r"(res) + : "r"(in16<<16|0x8000) + ); + return res; +} +#define silk_CLZ16(in16) (silk_CLZ16_armv5(in16)) + +#undef silk_CLZ32 +static inline opus_int32 silk_CLZ32_armv5(opus_int32 in32) +{ + int res; + __asm__( + "#silk_CLZ32\n\t" + "clz %0, %1\n\t" + : "=&r"(res) + : "r"(in32) + ); + return res; +} +#define silk_CLZ32(in32) (silk_CLZ32_armv5(in32)) + +#endif /* SILK_MACROS_ARMv5E_H */