Skip to content
Snippets Groups Projects
Verified Commit cfdaf365 authored by Linfeng Zhang's avatar Linfeng Zhang Committed by Jean-Marc Valin
Browse files

Optimize silk_NSQ_del_dec() for ARM NEON


The optimization is bit exact with C function.

This optimization speeds up SILK encoder on NEON as following.

Fixed-point:
Complexity 0-5:  0%
Complexity 6-7:  6%
Complexity 8-9: 10%
Complexity  10:  8%

Got similar results on floating-point.

Signed-off-by: default avatarJean-Marc Valin <jmvalin@jmvalin.ca>
parent 68afa490
No related branches found
No related tags found
No related merge requests found
......@@ -580,7 +580,9 @@ static OPUS_INLINE opus_int64 silk_max_64(opus_int64 a, opus_int64 b)
/* Make sure to store the result as the seed for the next call (also in between */
/* frames), otherwise result won't be random at all. When only using some of the */
/* bits, take the most significant bits by right-shifting. */
#define silk_RAND(seed) (silk_MLA_ovflw(907633515, (seed), 196314165))
#define RAND_MULTIPLIER 196314165
#define RAND_INCREMENT 907633515
#define silk_RAND(seed) (silk_MLA_ovflw((RAND_INCREMENT), (seed), (RAND_MULTIPLIER)))
/* Add some multiplication functions that can be easily mapped to ARM. */
......
/***********************************************************************
Copyright (c) 2017 Google Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of Internet Society, IETF or IETF Trust, nor the
names of specific contributors, may be used to endorse or promote
products derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
#ifndef SILK_NSQ_DEL_DEC_ARM_H
#define SILK_NSQ_DEL_DEC_ARM_H
#include "celt/arm/armcpu.h"
#if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
void silk_NSQ_del_dec_neon(
const silk_encoder_state *psEncC, silk_nsq_state *NSQ,
SideInfoIndices *psIndices, const opus_int16 x16[], opus_int8 pulses[],
const opus_int16 PredCoef_Q12[2 * MAX_LPC_ORDER],
const opus_int16 LTPCoef_Q14[LTP_ORDER * MAX_NB_SUBFR],
const opus_int16 AR_Q13[MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER],
const opus_int HarmShapeGain_Q14[MAX_NB_SUBFR],
const opus_int Tilt_Q14[MAX_NB_SUBFR],
const opus_int32 LF_shp_Q14[MAX_NB_SUBFR],
const opus_int32 Gains_Q16[MAX_NB_SUBFR],
const opus_int pitchL[MAX_NB_SUBFR], const opus_int Lambda_Q10,
const opus_int LTP_scale_Q14);
#endif
#if !defined(OPUS_HAVE_RTCD)
#define OVERRIDE_silk_NSQ_del_dec (1)
#define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, \
LTPCoef_Q14, AR_Q13, HarmShapeGain_Q14, Tilt_Q14, \
LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, \
LTP_scale_Q14, arch) \
((void)(arch), \
PRESUME_NEON(silk_NSQ_del_dec)( \
psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, \
AR_Q13, HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, \
Lambda_Q10, LTP_scale_Q14))
#endif
#if !defined(OVERRIDE_silk_NSQ_del_dec)
/*Is run-time CPU detection enabled on this platform?*/
#if defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && \
!defined(OPUS_ARM_PRESUME_NEON_INTR))
extern void (*const SILK_NSQ_DEL_DEC_IMPL[OPUS_ARCHMASK + 1])(
const silk_encoder_state *psEncC, silk_nsq_state *NSQ,
SideInfoIndices *psIndices, const opus_int16 x16[], opus_int8 pulses[],
const opus_int16 PredCoef_Q12[2 * MAX_LPC_ORDER],
const opus_int16 LTPCoef_Q14[LTP_ORDER * MAX_NB_SUBFR],
const opus_int16 AR_Q13[MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER],
const opus_int HarmShapeGain_Q14[MAX_NB_SUBFR],
const opus_int Tilt_Q14[MAX_NB_SUBFR],
const opus_int32 LF_shp_Q14[MAX_NB_SUBFR],
const opus_int32 Gains_Q16[MAX_NB_SUBFR],
const opus_int pitchL[MAX_NB_SUBFR], const opus_int Lambda_Q10,
const opus_int LTP_scale_Q14);
#define OVERRIDE_silk_NSQ_del_dec (1)
#define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, \
LTPCoef_Q14, AR_Q13, HarmShapeGain_Q14, Tilt_Q14, \
LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, \
LTP_scale_Q14, arch) \
((*SILK_NSQ_DEL_DEC_IMPL[(arch)&OPUS_ARCHMASK])( \
psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, \
AR_Q13, HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, \
Lambda_Q10, LTP_scale_Q14))
#elif defined(OPUS_ARM_PRESUME_NEON_INTR)
#define OVERRIDE_silk_NSQ_del_dec (1)
#define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, \
LTPCoef_Q14, AR_Q13, HarmShapeGain_Q14, Tilt_Q14, \
LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, \
LTP_scale_Q14, arch) \
((void)(arch), \
silk_NSQ_del_dec_neon(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, \
LTPCoef_Q14, AR_Q13, HarmShapeGain_Q14, Tilt_Q14, \
LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, \
LTP_scale_Q14))
#endif
#endif
#endif /* end SILK_NSQ_DEL_DEC_ARM_H */
This diff is collapsed.
......@@ -28,6 +28,7 @@ POSSIBILITY OF SUCH DAMAGE.
# include "config.h"
#endif
#include "main_FIX.h"
#include "NSQ.h"
#if defined(OPUS_HAVE_RTCD)
......@@ -35,6 +36,29 @@ POSSIBILITY OF SUCH DAMAGE.
# if (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && \
!defined(OPUS_ARM_PRESUME_NEON_INTR))
void (*const SILK_NSQ_DEL_DEC_IMPL[OPUS_ARCHMASK + 1])(
const silk_encoder_state *psEncC, /* I Encoder State */
silk_nsq_state *NSQ, /* I/O NSQ state */
SideInfoIndices *psIndices, /* I/O Quantization Indices */
const opus_int16 x16[], /* I Input */
opus_int8 pulses[], /* O Quantized pulse signal */
const opus_int16 PredCoef_Q12[ 2 * MAX_LPC_ORDER ], /* I Short term prediction coefs */
const opus_int16 LTPCoef_Q14[ LTP_ORDER * MAX_NB_SUBFR ], /* I Long term prediction coefs */
const opus_int16 AR_Q13[ MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER ], /* I Noise shaping coefs */
const opus_int HarmShapeGain_Q14[ MAX_NB_SUBFR ], /* I Long term shaping coefs */
const opus_int Tilt_Q14[ MAX_NB_SUBFR ], /* I Spectral tilt */
const opus_int32 LF_shp_Q14[ MAX_NB_SUBFR ], /* I Low frequency shaping coefs */
const opus_int32 Gains_Q16[ MAX_NB_SUBFR ], /* I Quantization step sizes */
const opus_int pitchL[ MAX_NB_SUBFR ], /* I Pitch lags */
const opus_int Lambda_Q10, /* I Rate/distortion tradeoff */
const opus_int LTP_scale_Q14 /* I LTP state scaling */
) = {
silk_NSQ_del_dec_c, /* ARMv4 */
silk_NSQ_del_dec_c, /* EDSP */
silk_NSQ_del_dec_c, /* Media */
MAY_HAVE_NEON(silk_NSQ_del_dec), /* Neon */
};
/*There is no table for silk_noise_shape_quantizer_short_prediction because the
NEON version takes different parameters than the C version.
Instead RTCD is done via if statements at the call sites.
......
......@@ -42,6 +42,10 @@ POSSIBILITY OF SUCH DAMAGE.
#include "x86/main_sse.h"
#endif
#if (defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR))
#include "arm/NSQ_del_dec_arm.h"
#endif
/* Convert Left/Right stereo signal to adaptive Mid/Side representation */
void silk_stereo_LR_to_MS(
stereo_enc_state *state, /* I/O State */
......
......@@ -27,6 +27,7 @@ silk/arm/macros_armv5e.h \
silk/arm/macros_arm64.h \
silk/arm/SigProc_FIX_armv4.h \
silk/arm/SigProc_FIX_armv5e.h \
silk/arm/NSQ_del_dec_arm.h \
silk/arm/NSQ_neon.h \
silk/fixed/main_FIX.h \
silk/fixed/structs_FIX.h \
......
......@@ -85,6 +85,7 @@ silk/x86/VQ_WMat_EC_sse.c
SILK_SOURCES_ARM_NEON_INTR = \
silk/arm/arm_silk_map.c \
silk/arm/NSQ_del_dec_neon_intr.c \
silk/arm/NSQ_neon.c
SILK_SOURCES_FIXED = \
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment