Commit f48abe83 authored by Viswanath Puttagunta's avatar Viswanath Puttagunta Committed by Jean-Marc Valin

armv7(float): Optimize encode usecase using NE10 library

Optimize opus encode (float only) usecase using ARM NE10
library. Mainly effects opus_fft and ctl_mdct_forward
and related functions.

This optimization can be used for ARM CPUs that have NEON
VFP unit. This patch only enables optimizations for ARMv7.

Official ARM NE10 library page available at
http://projectne10.github.io/Ne10/

To enable this optimization, use
--enable-intrinsics --with-NE10=<install_prefix>
or
--enable-intrinsics --with-NE10-libraries=<NE10_lib_dir> --with-NE10-includes=<NE10_includes_dir>

Compile time checks made during configure process to make sure
optimization option available only when compiler supports NEON
instrinsics.

Runtime checks made to make sure optimized functions only called
on appropriate hardware.
Signed-off-by: Timothy B. Terriberry's avatarTimothy B. Terriberry <tterribe@xiph.org>
parent 0fe51435
......@@ -10,7 +10,7 @@ lib_LTLIBRARIES = libopus.la
DIST_SUBDIRS = doc
AM_CPPFLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/celt -I$(top_srcdir)/silk \
-I$(top_srcdir)/silk/float -I$(top_srcdir)/silk/fixed
-I$(top_srcdir)/silk/float -I$(top_srcdir)/silk/fixed $(NE10_CFLAGS)
include celt_sources.mk
include silk_sources.mk
......@@ -51,6 +51,10 @@ if OPUS_ARM_NEON_INTR
CELT_SOURCES += $(CELT_SOURCES_ARM_NEON_INTR)
endif
if HAVE_ARM_NE10
CELT_SOURCES += $(CELT_SOURCES_ARM_NE10)
endif
if OPUS_ARM_EXTERNAL_ASM
noinst_LTLIBRARIES = libarmasm.la
libarmasm_la_SOURCES = $(CELT_SOURCES_ARM_ASM:.s=-gnu.S)
......@@ -69,7 +73,7 @@ include opus_headers.mk
libopus_la_SOURCES = $(CELT_SOURCES) $(SILK_SOURCES) $(OPUS_SOURCES)
libopus_la_LDFLAGS = -no-undefined -version-info @OPUS_LT_CURRENT@:@OPUS_LT_REVISION@:@OPUS_LT_AGE@
libopus_la_LIBADD = $(LIBM)
libopus_la_LIBADD = $(NE10_LIBS) $(LIBM)
if OPUS_ARM_EXTERNAL_ASM
libopus_la_LIBADD += libarmasm.la
endif
......@@ -85,32 +89,35 @@ TESTS = celt/tests/test_unit_types celt/tests/test_unit_mathops celt/tests/test_
opus_demo_SOURCES = src/opus_demo.c
opus_demo_LDADD = libopus.la $(LIBM)
opus_demo_LDADD = libopus.la $(NE10_LIBS) $(LIBM)
repacketizer_demo_SOURCES = src/repacketizer_demo.c
repacketizer_demo_LDADD = libopus.la $(LIBM)
repacketizer_demo_LDADD = libopus.la $(NE10_LIBS) $(LIBM)
opus_compare_SOURCES = src/opus_compare.c
opus_compare_LDADD = $(LIBM)
tests_test_opus_api_SOURCES = tests/test_opus_api.c tests/test_opus_common.h
tests_test_opus_api_LDADD = libopus.la $(LIBM)
tests_test_opus_api_LDADD = libopus.la $(NE10_LIBS) $(LIBM)
tests_test_opus_encode_SOURCES = tests/test_opus_encode.c tests/test_opus_common.h
tests_test_opus_encode_LDADD = libopus.la $(LIBM)
tests_test_opus_encode_LDADD = libopus.la $(NE10_LIBS) $(LIBM)
tests_test_opus_decode_SOURCES = tests/test_opus_decode.c tests/test_opus_common.h
tests_test_opus_decode_LDADD = libopus.la $(LIBM)
tests_test_opus_decode_LDADD = libopus.la $(NE10_LIBS) $(LIBM)
tests_test_opus_padding_SOURCES = tests/test_opus_padding.c tests/test_opus_common.h
tests_test_opus_padding_LDADD = libopus.la $(LIBM)
tests_test_opus_padding_LDADD = libopus.la $(NE10_LIBS) $(LIBM)
celt_tests_test_unit_cwrs32_SOURCES = celt/tests/test_unit_cwrs32.c
celt_tests_test_unit_cwrs32_LDADD = $(LIBM)
celt_tests_test_unit_dft_SOURCES = celt/tests/test_unit_dft.c
celt_tests_test_unit_dft_LDADD = $(LIBM)
celt_tests_test_unit_dft_LDADD = $(NE10_LIBS) $(LIBM)
if OPUS_ARM_EXTERNAL_ASM
celt_tests_test_unit_dft_LDADD += libarmasm.la
endif
celt_tests_test_unit_entropy_SOURCES = celt/tests/test_unit_entropy.c
celt_tests_test_unit_entropy_LDADD = $(LIBM)
......@@ -119,16 +126,19 @@ celt_tests_test_unit_laplace_SOURCES = celt/tests/test_unit_laplace.c
celt_tests_test_unit_laplace_LDADD = $(LIBM)
celt_tests_test_unit_mathops_SOURCES = celt/tests/test_unit_mathops.c
celt_tests_test_unit_mathops_LDADD = $(LIBM)
celt_tests_test_unit_mathops_LDADD = $(NE10_LIBS) $(LIBM)
if OPUS_ARM_EXTERNAL_ASM
celt_tests_test_unit_mathops_LDADD += libarmasm.la
endif
celt_tests_test_unit_mdct_SOURCES = celt/tests/test_unit_mdct.c
celt_tests_test_unit_mdct_LDADD = $(LIBM)
celt_tests_test_unit_mdct_LDADD = $(NE10_LIBS) $(LIBM)
if OPUS_ARM_EXTERNAL_ASM
celt_tests_test_unit_mdct_LDADD += libarmasm.la
endif
celt_tests_test_unit_rotation_SOURCES = celt/tests/test_unit_rotation.c
celt_tests_test_unit_rotation_LDADD = $(LIBM)
celt_tests_test_unit_rotation_LDADD = $(NE10_LIBS) $(LIBM)
if OPUS_ARM_EXTERNAL_ASM
celt_tests_test_unit_rotation_LDADD += libarmasm.la
endif
......@@ -286,5 +296,6 @@ endif
if OPUS_ARM_NEON_INTR
CELT_ARM_NEON_INTR_OBJ = $(CELT_SOURCES_ARM_NEON_INTR:.c=.lo)
$(CELT_ARM_NEON_INTR_OBJ) $(OPT_UNIT_TEST_OBJ): CFLAGS += $(OPUS_ARM_NEON_INTR_CFLAGS)
$(CELT_ARM_NEON_INTR_OBJ) $(OPT_UNIT_TEST_OBJ): CFLAGS += \
$(OPUS_ARM_NEON_INTR_CFLAGS) $(NE10_CFLAGS)
endif
......@@ -30,6 +30,8 @@
#endif
#include "pitch.h"
#include "kiss_fft.h"
#include "mdct.h"
#if defined(OPUS_HAVE_RTCD)
......@@ -42,7 +44,7 @@ opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
MAY_HAVE_NEON(celt_pitch_xcorr) /* NEON */
};
# else /* !FIXED_POINT */
# if defined(OPUS_ARM_NEON_INTR)
# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
void (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
const opus_val16 *, opus_val32 *, int, int) = {
celt_pitch_xcorr_c, /* ARMv4 */
......@@ -50,7 +52,46 @@ void (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
celt_pitch_xcorr_c, /* Media */
celt_pitch_xcorr_float_neon /* Neon */
};
# endif
# endif
#endif
# if defined(HAVE_ARM_NE10)
# if defined(CUSTOM_MODES)
int (*const OPUS_FFT_ALLOC_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st) = {
opus_fft_alloc_arch_c, /* ARMv4 */
opus_fft_alloc_arch_c, /* EDSP */
opus_fft_alloc_arch_c, /* Media */
opus_fft_alloc_arm_float_neon /* Neon with NE10 library support */
};
void (*const OPUS_FFT_FREE_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st) = {
opus_fft_free_arch_c, /* ARMv4 */
opus_fft_free_arch_c, /* EDSP */
opus_fft_free_arch_c, /* Media */
opus_fft_free_arm_float_neon /* Neon with NE10 */
};
# endif /* CUSTOM_MODES */
void (*const OPUS_FFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
const kiss_fft_cpx *fin,
kiss_fft_cpx *fout) = {
opus_fft_c, /* ARMv4 */
opus_fft_c, /* EDSP */
opus_fft_c, /* Media */
opus_fft_float_neon /* Neon with NE10 */
};
void (*const CLT_MDCT_FORWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l,
kiss_fft_scalar *in,
kiss_fft_scalar * OPUS_RESTRICT out,
const opus_val16 *window,
int overlap, int shift,
int stride, int arch) = {
clt_mdct_forward_c, /* ARMv4 */
clt_mdct_forward_c, /* EDSP */
clt_mdct_forward_c, /* Media */
clt_mdct_forward_float_neon /* Neon with NE10 */
};
# endif /* HAVE_ARM_NE10 */
# endif /* OPUS_ARM_MAY_HAVE_NEON_INTR */
# endif /* FIXED_POINT */
#endif /* OPUS_HAVE_RTCD */
/* Copyright (c) 2015 Xiph.Org Foundation
Written by Viswanath Puttagunta */
/**
@file celt_ne10_fft.c
@brief ARM Neon optimizations for fft using NE10 library
*/
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef SKIP_CONFIG_H
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#endif
#include <NE10_init.h>
#include <NE10_dsp.h>
#include "os_support.h"
#include "kiss_fft.h"
#include "stack_alloc.h"
#if !defined(FIXED_POINT)
# if defined(CUSTOM_MODES)
/* nfft lengths in NE10 that support scaled fft */
#define NE10_FFTSCALED_SUPPORT_MAX 4
static const int ne10_fft_scaled_support[NE10_FFTSCALED_SUPPORT_MAX] = {
480, 240, 120, 60
};
int opus_fft_alloc_arm_float_neon(kiss_fft_state *st)
{
int i;
size_t memneeded = sizeof(struct arch_fft_state);
st->arch_fft = (arch_fft_state *)opus_alloc(memneeded);
if (!st->arch_fft)
return -1;
for (i = 0; i < NE10_FFTSCALED_SUPPORT_MAX; i++) {
if(st->nfft == ne10_fft_scaled_support[i])
break;
}
if (i == NE10_FFTSCALED_SUPPORT_MAX) {
/* This nfft length (scaled fft) is not supported in NE10 */
st->arch_fft->is_supported = 0;
st->arch_fft->priv = NULL;
}
else {
st->arch_fft->is_supported = 1;
st->arch_fft->priv = (void *)ne10_fft_alloc_c2c_float32_neon(st->nfft);
if (st->arch_fft->priv == NULL) {
return -1;
}
}
return 0;
}
void opus_fft_free_arm_float_neon(kiss_fft_state *st)
{
ne10_fft_cfg_float32_t cfg;
if (!st->arch_fft)
return;
cfg = (ne10_fft_cfg_float32_t)st->arch_fft->priv;
if (cfg)
ne10_fft_destroy_c2c_float32(cfg);
opus_free(st->arch_fft);
}
# endif
void opus_fft_float_neon(const kiss_fft_state *st,
const kiss_fft_cpx *fin,
kiss_fft_cpx *fout)
{
ne10_fft_state_float32_t state;
ne10_fft_cfg_float32_t cfg = &state;
VARDECL(ne10_fft_cpx_float32_t, buffer);
SAVE_STACK;
ALLOC(buffer, st->nfft, ne10_fft_cpx_float32_t);
if (!st->arch_fft->is_supported) {
/* This nfft length (scaled fft) not supported in NE10 */
opus_fft_c(st, fin, fout);
}
else {
memcpy((void *)cfg, st->arch_fft->priv, sizeof(ne10_fft_state_float32_t));
state.buffer = (ne10_fft_cpx_float32_t *)&buffer[0];
state.is_forward_scaled = 1;
ne10_fft_c2c_1d_float32_neon((ne10_fft_cpx_float32_t *)fout,
(ne10_fft_cpx_float32_t *)fin,
cfg, 0);
}
RESTORE_STACK;
}
#endif /* !defined(FIXED_POINT) */
/* Copyright (c) 2015 Xiph.Org Foundation
Written by Viswanath Puttagunta */
/**
@file celt_ne10_mdct.c
@brief ARM Neon optimizations for mdct using NE10 library
*/
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef SKIP_CONFIG_H
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#endif
#include "kiss_fft.h"
#include "_kiss_fft_guts.h"
#include "mdct.h"
#include "stack_alloc.h"
#if !defined(FIXED_POINT)
void clt_mdct_forward_float_neon(const mdct_lookup *l,
kiss_fft_scalar *in,
kiss_fft_scalar * OPUS_RESTRICT out,
const opus_val16 *window,
int overlap, int shift, int stride, int arch)
{
int i;
int N, N2, N4;
VARDECL(kiss_fft_scalar, f);
VARDECL(kiss_fft_cpx, f2);
const kiss_fft_state *st = l->kfft[shift];
const kiss_twiddle_scalar *trig;
SAVE_STACK;
N = l->n;
trig = l->trig;
for (i=0;i<shift;i++)
{
N >>= 1;
trig += N;
}
N2 = N>>1;
N4 = N>>2;
ALLOC(f, N2, kiss_fft_scalar);
ALLOC(f2, N4, kiss_fft_cpx);
/* Consider the input to be composed of four blocks: [a, b, c, d] */
/* Window, shuffle, fold */
{
/* Temp pointers to make it really clear to the compiler what we're doing */
const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1);
const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1);
kiss_fft_scalar * OPUS_RESTRICT yp = f;
const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1);
const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1;
for(i=0;i<((overlap+3)>>2);i++)
{
/* Real part arranged as -d-cR, Imag part arranged as -b+aR*/
*yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2);
*yp++ = MULT16_32_Q15(*wp1, *xp1) - MULT16_32_Q15(*wp2, xp2[-N2]);
xp1+=2;
xp2-=2;
wp1+=2;
wp2-=2;
}
wp1 = window;
wp2 = window+overlap-1;
for(;i<N4-((overlap+3)>>2);i++)
{
/* Real part arranged as a-bR, Imag part arranged as -c-dR */
*yp++ = *xp2;
*yp++ = *xp1;
xp1+=2;
xp2-=2;
}
for(;i<N4;i++)
{
/* Real part arranged as a-bR, Imag part arranged as -c-dR */
*yp++ = -MULT16_32_Q15(*wp1, xp1[-N2]) + MULT16_32_Q15(*wp2, *xp2);
*yp++ = MULT16_32_Q15(*wp2, *xp1) + MULT16_32_Q15(*wp1, xp2[N2]);
xp1+=2;
xp2-=2;
wp1+=2;
wp2-=2;
}
}
/* Pre-rotation */
{
kiss_fft_scalar * OPUS_RESTRICT yp = f;
const kiss_twiddle_scalar *t = &trig[0];
for(i=0;i<N4;i++)
{
kiss_fft_cpx yc;
kiss_twiddle_scalar t0, t1;
kiss_fft_scalar re, im, yr, yi;
t0 = t[i];
t1 = t[N4+i];
re = *yp++;
im = *yp++;
yr = S_MUL(re,t0) - S_MUL(im,t1);
yi = S_MUL(im,t0) + S_MUL(re,t1);
yc.r = yr;
yc.i = yi;
f2[i] = yc;
}
}
opus_fft(st, f2, (kiss_fft_cpx *)f, arch);
/* Post-rotate */
{
/* Temp pointers to make it really clear to the compiler what we're doing */
const kiss_fft_cpx * OPUS_RESTRICT fp = (kiss_fft_cpx *)f;
kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1);
const kiss_twiddle_scalar *t = &trig[0];
/* Temp pointers to make it really clear to the compiler what we're doing */
for(i=0;i<N4;i++)
{
kiss_fft_scalar yr, yi;
yr = S_MUL(fp->i,t[N4+i]) - S_MUL(fp->r,t[i]);
yi = S_MUL(fp->r,t[N4+i]) + S_MUL(fp->i,t[i]);
*yp1 = yr;
*yp2 = yi;
fp++;
yp1 += 2*stride;
yp2 -= 2*stride;
}
}
RESTORE_STACK;
}
#endif /* !defined(FIXED_POINT) */
......@@ -29,9 +29,15 @@
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <arm_neon.h>
#include "../pitch.h"
#if !defined(FIXED_POINT)
/*
* Function: xcorr_kernel_neon_float
* ---------------------------------
......@@ -243,3 +249,4 @@ void celt_pitch_xcorr_float_neon(const opus_val16 *_x, const opus_val16 *_y,
(const float32_t *)_y+i, (float32_t *)xcorr+i, len);
}
}
#endif
/* Copyright (c) 2015 Xiph.Org Foundation
Written by Viswanath Puttagunta */
/**
@file fft_arm.h
@brief ARM Neon Intrinsic optimizations for fft using NE10 library
*/
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#if !defined(FFT_ARM_H)
#define FFT_ARM_H
#include "config.h"
#include "kiss_fft.h"
#if !defined(FIXED_POINT)
#if defined(HAVE_ARM_NE10)
int opus_fft_alloc_arm_float_neon(kiss_fft_state *st);
void opus_fft_free_arm_float_neon(kiss_fft_state *st);
void opus_fft_float_neon(const kiss_fft_state *st,
const kiss_fft_cpx *fin,
kiss_fft_cpx *fout);
#if !defined(OPUS_HAVE_RTCD)
#define OVERRIDE_OPUS_FFT (1)
#define opus_fft_alloc_arch(_st, arch) \
((void)(arch), opus_fft_alloc_arm_float_neon(_st))
#define opus_fft_free_arch(_st, arch) \
((void)(arch), opus_fft_free_arm_float_neon(_st))
#define opus_fft(_st, _fin, _fout, arch) \
((void)(arch), opus_fft_float_neon(_st, _fin, _fout))
#endif /* OPUS_HAVE_RTCD */
#endif /* HAVE_ARM_NE10 */
#endif /* FIXED_POINT */
#endif
/* Copyright (c) 2015 Xiph.Org Foundation
Written by Viswanath Puttagunta */
/**
@file arm_mdct.h
@brief ARM Neon Intrinsic optimizations for mdct using NE10 library
*/
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#if !defined(MDCT_ARM_H)
#define MDCT_ARM_H
#include "config.h"
#include "mdct.h"
#if !defined(FIXED_POINT) && defined(HAVE_ARM_NE10)
/** Compute a forward MDCT and scale by 4/N, trashes the input array */
void clt_mdct_forward_float_neon(const mdct_lookup *l, kiss_fft_scalar *in,
kiss_fft_scalar * OPUS_RESTRICT out,
const opus_val16 *window, int overlap,
int shift, int stride, int arch);
#if !defined(OPUS_HAVE_RTCD)
#define OVERRIDE_OPUS_MDCT (1)
#define clt_mdct_forward(_l, _in, _out, _window, _int, _shift, _stride, _arch) \
clt_mdct_forward_float_neon(_l, _in, _out, _window, _int, _shift, _stride, _arch)
#endif /* OPUS_HAVE_RTCD */
#endif /* !defined(FIXED_POINT) && defined(HAVE_ARM_NE10) */
#endif
......@@ -414,7 +414,8 @@ int patch_transient_decision(opus_val16 *newE, opus_val16 *oldE, int nbEBands,
/** Apply window and compute the MDCT for all sub-frames and
all channels in a frame */
static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig * OPUS_RESTRICT in,
celt_sig * OPUS_RESTRICT out, int C, int CC, int LM, int upsample)
celt_sig * OPUS_RESTRICT out, int C, int CC, int LM, int upsample,
int arch)
{
const int overlap = mode->overlap;
int N;
......@@ -435,7 +436,9 @@ static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig * OPUS
for (b=0;b<B;b++)
{
/* Interleaving the sub-frames while doing the MDCTs */
clt_mdct_forward(&mode->mdct, in+c*(B*N+overlap)+b*N, &out[b+c*N*B], mode->window, overlap, shift, B);
clt_mdct_forward(&mode->mdct, in+c*(B*N+overlap)+b*N,
&out[b+c*N*B], mode->window, overlap, shift, B,
arch);
}
} while (++c<CC);
if (CC==2&&C==1)
......@@ -1603,14 +1606,14 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
ALLOC(bandLogE2, C*nbEBands, opus_val16);
if (secondMdct)
{
compute_mdcts(mode, 0, in, freq, C, CC, LM, st->upsample);
compute_mdcts(mode, 0, in, freq, C, CC, LM, st->upsample, st->arch);
compute_band_energies(mode, freq, bandE, effEnd, C, LM);