Commit ba8713cc authored by Viswanath Puttagunta's avatar Viswanath Puttagunta Committed by Jean-Marc Valin

armv7: Optimize fixed point FFT using NE10 library

Uses NEON optimized fixed point FFT routines in NE10 library.
Signed-off-by: Timothy B. Terriberry's avatarTimothy B. Terriberry <tterribe@xiph.org>
parent 3807af3b
......@@ -52,23 +52,26 @@ void (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
celt_pitch_xcorr_c, /* Media */
celt_pitch_xcorr_float_neon /* Neon */
};
# endif
# endif /* FIXED_POINT */
# if defined(HAVE_ARM_NE10)
# if defined(CUSTOM_MODES)
# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
# if defined(HAVE_ARM_NE10)
# if defined(CUSTOM_MODES)
int (*const OPUS_FFT_ALLOC_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st) = {
opus_fft_alloc_arch_c, /* ARMv4 */
opus_fft_alloc_arch_c, /* EDSP */
opus_fft_alloc_arch_c, /* Media */
opus_fft_alloc_arm_float_neon /* Neon with NE10 library support */
opus_fft_alloc_arm_neon /* Neon with NE10 library support */
};
void (*const OPUS_FFT_FREE_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st) = {
opus_fft_free_arch_c, /* ARMv4 */
opus_fft_free_arch_c, /* EDSP */
opus_fft_free_arch_c, /* Media */
opus_fft_free_arm_float_neon /* Neon with NE10 */
opus_fft_free_arm_neon /* Neon with NE10 */
};
# endif /* CUSTOM_MODES */
# endif /* CUSTOM_MODES */
void (*const OPUS_FFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
const kiss_fft_cpx *fin,
......@@ -76,7 +79,7 @@ void (*const OPUS_FFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
opus_fft_c, /* ARMv4 */
opus_fft_c, /* EDSP */
opus_fft_c, /* Media */
opus_fft_float_neon /* Neon with NE10 */
opus_fft_neon /* Neon with NE10 */
};
void (*const OPUS_IFFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
......@@ -85,9 +88,10 @@ void (*const OPUS_IFFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
opus_ifft_c, /* ARMv4 */
opus_ifft_c, /* EDSP */
opus_ifft_c, /* Media */
opus_ifft_float_neon /* Neon with NE10 */
opus_ifft_neon /* Neon with NE10 */
};
# if !defined(FIXED_POINT)
void (*const CLT_MDCT_FORWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l,
kiss_fft_scalar *in,
kiss_fft_scalar * OPUS_RESTRICT out,
......@@ -112,8 +116,8 @@ void (*const CLT_MDCT_BACKWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l,
clt_mdct_backward_float_neon /* Neon with NE10 */
};
# endif /* HAVE_ARM_NE10 */
# endif /* OPUS_ARM_MAY_HAVE_NEON_INTR */
# endif /* FIXED_POINT */
# endif /* !FIXED_POINT */
# endif /* HAVE_ARM_NE10 */
# endif /* OPUS_ARM_MAY_HAVE_NEON_INTR */
#endif /* OPUS_HAVE_RTCD */
......@@ -43,15 +43,31 @@
#include "stack_alloc.h"
#if !defined(FIXED_POINT)
# if defined(CUSTOM_MODES)
# define NE10_FFT_ALLOC_C2C_TYPE_NEON ne10_fft_alloc_c2c_float32_neon
# define NE10_FFT_CFG_TYPE_T ne10_fft_cfg_float32_t
# define NE10_FFT_STATE_TYPE_T ne10_fft_state_float32_t
# define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_float32
# define NE10_FFT_CPX_TYPE_T ne10_fft_cpx_float32_t
# define NE10_FFT_C2C_1D_TYPE_NEON ne10_fft_c2c_1d_float32_neon
#else
# define NE10_FFT_ALLOC_C2C_TYPE_NEON(nfft) ne10_fft_alloc_c2c_int32_neon(nfft)
# define NE10_FFT_CFG_TYPE_T ne10_fft_cfg_int32_t
# define NE10_FFT_STATE_TYPE_T ne10_fft_state_int32_t
# define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_int32
# define NE10_FFT_DESTROY_C2C_TYPE ne10_fft_destroy_c2c_int32
# define NE10_FFT_CPX_TYPE_T ne10_fft_cpx_int32_t
# define NE10_FFT_C2C_1D_TYPE_NEON ne10_fft_c2c_1d_int32_neon
#endif
#if defined(CUSTOM_MODES)
/* nfft lengths in NE10 that support scaled fft */
#define NE10_FFTSCALED_SUPPORT_MAX 4
# define NE10_FFTSCALED_SUPPORT_MAX 4
static const int ne10_fft_scaled_support[NE10_FFTSCALED_SUPPORT_MAX] = {
480, 240, 120, 60
};
int opus_fft_alloc_arm_float_neon(kiss_fft_state *st)
int opus_fft_alloc_arm_neon(kiss_fft_state *st)
{
int i;
size_t memneeded = sizeof(struct arch_fft_state);
......@@ -71,7 +87,7 @@ int opus_fft_alloc_arm_float_neon(kiss_fft_state *st)
}
else {
st->arch_fft->is_supported = 1;
st->arch_fft->priv = (void *)ne10_fft_alloc_c2c_float32_neon(st->nfft);
st->arch_fft->priv = (void *)NE10_FFT_ALLOC_C2C_TYPE_NEON(st->nfft);
if (st->arch_fft->priv == NULL) {
return -1;
}
......@@ -79,69 +95,80 @@ int opus_fft_alloc_arm_float_neon(kiss_fft_state *st)
return 0;
}
void opus_fft_free_arm_float_neon(kiss_fft_state *st)
void opus_fft_free_arm_neon(kiss_fft_state *st)
{
ne10_fft_cfg_float32_t cfg;
NE10_FFT_CFG_TYPE_T cfg;
if (!st->arch_fft)
return;
cfg = (ne10_fft_cfg_float32_t)st->arch_fft->priv;
cfg = (NE10_FFT_CFG_TYPE_T)st->arch_fft->priv;
if (cfg)
ne10_fft_destroy_c2c_float32(cfg);
NE10_FFT_DESTROY_C2C_TYPE(cfg);
opus_free(st->arch_fft);
}
# endif
#endif
void opus_fft_float_neon(const kiss_fft_state *st,
const kiss_fft_cpx *fin,
kiss_fft_cpx *fout)
void opus_fft_neon(const kiss_fft_state *st,
const kiss_fft_cpx *fin,
kiss_fft_cpx *fout)
{
ne10_fft_state_float32_t state;
ne10_fft_cfg_float32_t cfg = &state;
VARDECL(ne10_fft_cpx_float32_t, buffer);
NE10_FFT_STATE_TYPE_T state;
NE10_FFT_CFG_TYPE_T cfg = &state;
VARDECL(NE10_FFT_CPX_TYPE_T, buffer);
SAVE_STACK;
ALLOC(buffer, st->nfft, ne10_fft_cpx_float32_t);
ALLOC(buffer, st->nfft, NE10_FFT_CPX_TYPE_T);
if (!st->arch_fft->is_supported) {
/* This nfft length (scaled fft) not supported in NE10 */
opus_fft_c(st, fin, fout);
}
else {
memcpy((void *)cfg, st->arch_fft->priv, sizeof(ne10_fft_state_float32_t));
state.buffer = (ne10_fft_cpx_float32_t *)&buffer[0];
memcpy((void *)cfg, st->arch_fft->priv, sizeof(NE10_FFT_STATE_TYPE_T));
state.buffer = (NE10_FFT_CPX_TYPE_T *)&buffer[0];
#if !defined(FIXED_POINT)
state.is_forward_scaled = 1;
ne10_fft_c2c_1d_float32_neon((ne10_fft_cpx_float32_t *)fout,
(ne10_fft_cpx_float32_t *)fin,
cfg, 0);
NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
(NE10_FFT_CPX_TYPE_T *)fin,
cfg, 0);
#else
NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
(NE10_FFT_CPX_TYPE_T *)fin,
cfg, 0, 1);
#endif
}
RESTORE_STACK;
}
void opus_ifft_float_neon(const kiss_fft_state *st,
const kiss_fft_cpx *fin,
kiss_fft_cpx *fout)
void opus_ifft_neon(const kiss_fft_state *st,
const kiss_fft_cpx *fin,
kiss_fft_cpx *fout)
{
ne10_fft_state_float32_t state;
ne10_fft_cfg_float32_t cfg = &state;
VARDECL(ne10_fft_cpx_float32_t, buffer);
NE10_FFT_STATE_TYPE_T state;
NE10_FFT_CFG_TYPE_T cfg = &state;
VARDECL(NE10_FFT_CPX_TYPE_T, buffer);
SAVE_STACK;
ALLOC(buffer, st->nfft, ne10_fft_cpx_float32_t);
ALLOC(buffer, st->nfft, NE10_FFT_CPX_TYPE_T);
if (!st->arch_fft->is_supported) {
/* This nfft length (scaled fft) not supported in NE10 */
opus_ifft_c(st, fin, fout);
}
else {
memcpy((void *)cfg, st->arch_fft->priv, sizeof(ne10_fft_state_float32_t));
state.buffer = (ne10_fft_cpx_float32_t *)&buffer[0];
memcpy((void *)cfg, st->arch_fft->priv, sizeof(NE10_FFT_STATE_TYPE_T));
state.buffer = (NE10_FFT_CPX_TYPE_T *)&buffer[0];
#if !defined(FIXED_POINT)
state.is_backward_scaled = 0;
ne10_fft_c2c_1d_float32_neon((ne10_fft_cpx_float32_t *)fout,
(ne10_fft_cpx_float32_t *)fin,
cfg, 1);
NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
(NE10_FFT_CPX_TYPE_T *)fin,
cfg, 1);
#else
NE10_FFT_C2C_1D_TYPE_NEON((NE10_FFT_CPX_TYPE_T *)fout,
(NE10_FFT_CPX_TYPE_T *)fin,
cfg, 1, 0);
#endif
}
RESTORE_STACK;
}
#endif /* !defined(FIXED_POINT) */
......@@ -37,38 +37,36 @@
#include "config.h"
#include "kiss_fft.h"
#if !defined(FIXED_POINT)
#if defined(HAVE_ARM_NE10)
int opus_fft_alloc_arm_float_neon(kiss_fft_state *st);
void opus_fft_free_arm_float_neon(kiss_fft_state *st);
int opus_fft_alloc_arm_neon(kiss_fft_state *st);
void opus_fft_free_arm_neon(kiss_fft_state *st);
void opus_fft_float_neon(const kiss_fft_state *st,
const kiss_fft_cpx *fin,
kiss_fft_cpx *fout);
void opus_fft_neon(const kiss_fft_state *st,
const kiss_fft_cpx *fin,
kiss_fft_cpx *fout);
void opus_ifft_float_neon(const kiss_fft_state *st,
const kiss_fft_cpx *fin,
kiss_fft_cpx *fout);
void opus_ifft_neon(const kiss_fft_state *st,
const kiss_fft_cpx *fin,
kiss_fft_cpx *fout);
#if !defined(OPUS_HAVE_RTCD)
#define OVERRIDE_OPUS_FFT (1)
#define opus_fft_alloc_arch(_st, arch) \
((void)(arch), opus_fft_alloc_arm_float_neon(_st))
((void)(arch), opus_fft_alloc_arm_neon(_st))
#define opus_fft_free_arch(_st, arch) \
((void)(arch), opus_fft_free_arm_float_neon(_st))
((void)(arch), opus_fft_free_arm_neon(_st))
#define opus_fft(_st, _fin, _fout, arch) \
((void)(arch), opus_fft_float_neon(_st, _fin, _fout))
((void)(arch), opus_fft_neon(_st, _fin, _fout))
#define opus_ifft(_st, _fin, _fout, arch) \
((void)(arch), opus_ifft_float_neon(_st, _fin, _fout))
((void)(arch), opus_ifft_neon(_st, _fin, _fout))
#endif /* OPUS_HAVE_RTCD */
#endif /* HAVE_ARM_NE10 */
#endif /* FIXED_POINT */
#endif
......@@ -32,7 +32,11 @@ void dump_modes_arch_init();
void dump_mode_arch(CELTMode *mode);
void dump_modes_arch_finalize();
#if !defined(FIXED_POINT)
#define ARM_NE10_ARCH_FILE_NAME "static_modes_float_arm_ne10.h"
#else
#define ARM_NE10_ARCH_FILE_NAME "static_modes_fixed_arm_ne10.h"
#endif
#if defined(HAVE_ARM_NE10)
#define OVERRIDE_FFT (1)
......
......@@ -35,6 +35,16 @@
#include "dump_modes_arch.h"
#include <NE10_dsp.h>
#if !defined(FIXED_POINT)
# define NE10_FFT_CFG_TYPE_T ne10_fft_cfg_float32_t
# define NE10_FFT_CPX_TYPE_T_STR "ne10_fft_cpx_float32_t"
# define NE10_FFT_STATE_TYPE_T_STR "ne10_fft_state_float32_t"
#else
# define NE10_FFT_CFG_TYPE_T ne10_fft_cfg_int32_t
# define NE10_FFT_CPX_TYPE_T_STR "ne10_fft_cpx_int32_t"
# define NE10_FFT_STATE_TYPE_T_STR "ne10_fft_state_int32_t"
#endif
static FILE *file;
void dump_modes_arch_init(CELTMode **modes, int nb_modes)
......@@ -69,8 +79,8 @@ void dump_mode_arch(CELTMode *mode)
fprintf(file, "#define NE10_FFT_PARAMS%d_%d\n", mode->Fs, mdctSize);
/* cfg->factors */
for(k=0;k<=mode->mdct.maxshift;k++) {
ne10_fft_cfg_float32_t cfg;
cfg = (ne10_fft_cfg_float32_t)mode->mdct.kfft[k]->arch_fft->priv;
NE10_FFT_CFG_TYPE_T cfg;
cfg = (NE10_FFT_CFG_TYPE_T)mode->mdct.kfft[k]->arch_fft->priv;
if (!cfg)
continue;
fprintf(file, "static const ne10_int32_t ne10_factors_%d[%d] = {\n",
......@@ -83,21 +93,28 @@ void dump_mode_arch(CELTMode *mode)
/* cfg->twiddles */
for(k=0;k<=mode->mdct.maxshift;k++) {
ne10_fft_cfg_float32_t cfg;
cfg = (ne10_fft_cfg_float32_t)mode->mdct.kfft[k]->arch_fft->priv;
NE10_FFT_CFG_TYPE_T cfg;
cfg = (NE10_FFT_CFG_TYPE_T)mode->mdct.kfft[k]->arch_fft->priv;
if (!cfg)
continue;
fprintf(file, "static const ne10_fft_cpx_float32_t ne10_twiddles_%d[%d] = {\n",
mode->mdct.kfft[k]->nfft, mode->mdct.kfft[k]->nfft);
fprintf(file, "static const %s ne10_twiddles_%d[%d] = {\n",
NE10_FFT_CPX_TYPE_T_STR, mode->mdct.kfft[k]->nfft,
mode->mdct.kfft[k]->nfft);
for(j=0;j<mode->mdct.kfft[k]->nfft;j++) {
fprintf(file, "{%#0.8gf,%#0.8gf},%c", cfg->twiddles[j].r, cfg->twiddles[j].i,(j+4)%3==0?'\n':' ');
#if !defined(FIXED_POINT)
fprintf(file, "{%#0.8gf,%#0.8gf},%c",
cfg->twiddles[j].r, cfg->twiddles[j].i,(j+4)%3==0?'\n':' ');
#else
fprintf(file, "{%d,%d},%c",
cfg->twiddles[j].r, cfg->twiddles[j].i,(j+4)%3==0?'\n':' ');
#endif
}
fprintf (file, "};\n");
}
for(k=0;k<=mode->mdct.maxshift;k++) {
ne10_fft_cfg_float32_t cfg;
cfg = (ne10_fft_cfg_float32_t)mode->mdct.kfft[k]->arch_fft->priv;
NE10_FFT_CFG_TYPE_T cfg;
cfg = (NE10_FFT_CFG_TYPE_T)mode->mdct.kfft[k]->arch_fft->priv;
if (!cfg) {
fprintf(file, "/* Ne10 does not support scaled FFT for length = %d */\n",
mode->mdct.kfft[k]->nfft);
......@@ -107,14 +124,15 @@ void dump_mode_arch(CELTMode *mode)
fprintf(file, "};\n");
continue;
}
fprintf(file, "static const ne10_fft_state_float32_t ne10_fft_state_float32_%d = {\n",
mode->mdct.kfft[k]->nfft);
fprintf(file, "static const %s %s_%d = {\n", NE10_FFT_STATE_TYPE_T_STR,
NE10_FFT_STATE_TYPE_T_STR, mode->mdct.kfft[k]->nfft);
fprintf(file, "%d,\n", cfg->nfft);
fprintf(file, "(ne10_int32_t *)ne10_factors_%d,\n", mode->mdct.kfft[k]->nfft);
fprintf(file, "(ne10_fft_cpx_float32_t *)ne10_twiddles_%d,\n", mode->mdct.kfft[k]->nfft);
fprintf(file, "(%s *)ne10_twiddles_%d,\n",
NE10_FFT_CPX_TYPE_T_STR, mode->mdct.kfft[k]->nfft);
fprintf(file, "NULL,\n"); /* buffer */
fprintf(file, "(ne10_fft_cpx_float32_t *)&ne10_twiddles_%d[%d],\n",
mode->mdct.kfft[k]->nfft, cfg->nfft);
fprintf(file, "(%s *)&ne10_twiddles_%d[%d],\n",
NE10_FFT_CPX_TYPE_T_STR, mode->mdct.kfft[k]->nfft, cfg->nfft);
fprintf(file, "/* is_forward_scaled = true */\n");
fprintf(file, "(ne10_int32_t) 1,\n");
fprintf(file, "/* is_backward_scaled = false */\n");
......@@ -124,7 +142,8 @@ void dump_mode_arch(CELTMode *mode)
fprintf(file, "static const arch_fft_state cfg_arch_%d = {\n",
mode->mdct.kfft[k]->nfft);
fprintf(file, "1,\n");
fprintf(file, "(void *)&ne10_fft_state_float32_%d,\n", mode->mdct.kfft[k]->nfft);
fprintf(file, "(void *)&%s_%d,\n",
NE10_FFT_STATE_TYPE_T_STR, mode->mdct.kfft[k]->nfft);
fprintf(file, "};\n\n");
}
fprintf(file, "#endif /* end NE10_FFT_PARAMS%d_%d */\n", mode->Fs, mdctSize);
......
......@@ -93,12 +93,10 @@ typedef struct kiss_fft_state{
opus_int16 factors[2*MAXFACTORS];
const opus_int16 *bitrev;
const kiss_twiddle_cpx *twiddles;
#ifndef FIXED_POINT
arch_fft_state *arch_fft;
#endif
} kiss_fft_state;
#if !defined(FIXED_POINT) && defined(HAVE_ARM_NE10)
#if defined(HAVE_ARM_NE10)
#include "arm/fft_arm.h"
#endif
......
......@@ -76,7 +76,7 @@ void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in,
#if !defined(OVERRIDE_OPUS_MDCT)
/* Is run-time CPU detection enabled on this platform? */
#if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10))
#if defined(OPUS_HAVE_RTCD) && defined(HAVE_ARM_NE10) && !defined(FIXED_POINT)
extern void (*const CLT_MDCT_FORWARD_IMPL[OPUS_ARCHMASK+1])(
const mdct_lookup *l, kiss_fft_scalar *in,
......@@ -98,7 +98,7 @@ extern void (*const CLT_MDCT_BACKWARD_IMPL[OPUS_ARCHMASK+1])(
_window, _overlap, _shift, \
_stride, _arch)
#else /* else for if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) */
#else /* if defined(OPUS_HAVE_RTCD) && defined(HAVE_ARM_NE10) && !defined(FIXED_POINT) */
#define clt_mdct_forward(_l, _in, _out, _window, _overlap, _shift, _stride, _arch) \
clt_mdct_forward_c(_l, _in, _out, _window, _overlap, _shift, _stride, _arch)
......@@ -106,7 +106,7 @@ extern void (*const CLT_MDCT_BACKWARD_IMPL[OPUS_ARCHMASK+1])(
#define clt_mdct_backward(_l, _in, _out, _window, _overlap, _shift, _stride, _arch) \
clt_mdct_backward_c(_l, _in, _out, _window, _overlap, _shift, _stride, _arch)
#endif /* end if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) */
#endif /* end if defined(OPUS_HAVE_RTCD) && defined(HAVE_ARM_NE10) && !defined(FIXED_POINT) */
#endif /* end if !defined(OVERRIDE_OPUS_MDCT) */
#endif
......@@ -4,6 +4,11 @@
#include "modes.h"
#include "rate.h"
#ifdef HAVE_ARM_NE10
#define OVERRIDE_FFT 1
#include "static_modes_fixed_arm_ne10.h"
#endif
#ifndef DEF_WINDOW120
#define DEF_WINDOW120
static const opus_val16 window120[120] = {
......@@ -432,6 +437,11 @@ static const kiss_fft_state fft_state48000_960_0 = {
{5, 96, 3, 32, 4, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, }, /* factors */
fft_bitrev480, /* bitrev */
fft_twiddles48000_960, /* bitrev */
#ifdef OVERRIDE_FFT
(arch_fft_state *)&cfg_arch_480,
#else
NULL,
#endif
};
#endif
......@@ -445,6 +455,11 @@ static const kiss_fft_state fft_state48000_960_1 = {
{5, 48, 3, 16, 4, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
fft_bitrev240, /* bitrev */
fft_twiddles48000_960, /* bitrev */
#ifdef OVERRIDE_FFT
(arch_fft_state *)&cfg_arch_240,
#else
NULL,
#endif
};
#endif
......@@ -458,6 +473,11 @@ static const kiss_fft_state fft_state48000_960_2 = {
{5, 24, 3, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
fft_bitrev120, /* bitrev */
fft_twiddles48000_960, /* bitrev */
#ifdef OVERRIDE_FFT
(arch_fft_state *)&cfg_arch_120,
#else
NULL,
#endif
};
#endif
......@@ -471,6 +491,11 @@ static const kiss_fft_state fft_state48000_960_3 = {
{5, 12, 3, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
fft_bitrev60, /* bitrev */
fft_twiddles48000_960, /* bitrev */
#ifdef OVERRIDE_FFT
(arch_fft_state *)&cfg_arch_60,
#else
NULL,
#endif
};
#endif
......
This diff is collapsed.
......@@ -54,7 +54,7 @@
# include "pitch.c"
# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
# include "arm/celt_neon_intr.c"
# if !defined(FIXED_POINT) && defined(HAVE_ARM_NE10)
# if defined(HAVE_ARM_NE10)
# include "mdct.c"
# include "arm/celt_ne10_fft.c"
# include "arm/celt_ne10_mdct.c"
......
......@@ -68,7 +68,7 @@
# include "arm/armcpu.c"
# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
# include "arm/celt_neon_intr.c"
# if !defined(FIXED_POINT) && defined(HAVE_ARM_NE10)
# if defined(HAVE_ARM_NE10)
# include "kiss_fft.c"
# include "mdct.c"
# include "arm/celt_ne10_fft.c"
......
......@@ -55,7 +55,7 @@
# include "celt_lpc.c"
# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
# include "arm/celt_neon_intr.c"
# if !defined(FIXED_POINT) && defined(HAVE_ARM_NE10)
# if defined(HAVE_ARM_NE10)
# include "arm/celt_ne10_fft.c"
# include "arm/celt_ne10_mdct.c"
# endif
......
......@@ -66,7 +66,7 @@
# include "arm/armcpu.c"
# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
# include "arm/celt_neon_intr.c"
# if !defined(FIXED_POINT) && defined(HAVE_ARM_NE10)
# if defined(HAVE_ARM_NE10)
# include "kiss_fft.c"
# include "mdct.c"
# include "arm/celt_ne10_fft.c"
......
......@@ -32,6 +32,7 @@ celt/vq.h \
celt/static_modes_float.h \
celt/static_modes_fixed.h \
celt/static_modes_float_arm_ne10.h \
celt/static_modes_fixed_arm_ne10.h \
celt/arm/armcpu.h \
celt/arm/fixed_armv4.h \
celt/arm/fixed_armv5e.h \
......
......@@ -477,43 +477,36 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[
]
)
#Currently we only have intrinsic optimizations for floating point
AS_IF([test x"$enable_float" = x"yes"],
AS_IF([test x"$OPUS_ARM_MAY_HAVE_NEON_INTR" = x"1"],
[
AS_IF([test x"$OPUS_ARM_MAY_HAVE_NEON_INTR" = x"1"],
[
AC_DEFINE([OPUS_ARM_MAY_HAVE_NEON_INTR], 1, [Compiler supports ARMv7 Neon Intrinsics])
intrinsics_support="$intrinsics_support (Neon_Intrinsics)"
AS_IF([test x"enable_rtcd" != x"" && test x"$OPUS_ARM_PRESUME_NEON_INTR" != x"1"],
[rtcd_support="$rtcd_support (ARMv7_Neon_Intrinsics)"],[])
AS_IF([test x"$OPUS_ARM_PRESUME_NEON_INTR" = x"1"],
[AC_DEFINE([OPUS_ARM_PRESUME_NEON_INTR], 1, [Define if binary requires NEON intrinsics support])])
AC_DEFINE([OPUS_ARM_MAY_HAVE_NEON_INTR], 1, [Compiler supports ARMv7 Neon Intrinsics])
intrinsics_support="$intrinsics_support (Neon_Intrinsics)"
OPUS_PATH_NE10()
AS_IF([test x"$NE10_LIBS" != x""],
[
intrinsics_support="$intrinsics_support (NE10)"
AS_IF([test x"enable_rtcd" != x"" \
&& test x"$OPUS_ARM_PRESUME_NEON_INTR" != x"1"],
[rtcd_support="$rtcd_support (NE10)"],[])
])
AS_IF([test x"enable_rtcd" != x"" && test x"$OPUS_ARM_PRESUME_NEON_INTR" != x"1"],
[rtcd_support="$rtcd_support (ARMv7_Neon_Intrinsics)"],[])
AS_IF([test x"$rtcd_support" = x""],
[rtcd_support=no])
AS_IF([test x"$OPUS_ARM_PRESUME_NEON_INTR" = x"1"],
[AC_DEFINE([OPUS_ARM_PRESUME_NEON_INTR], 1, [Define if binary requires NEON intrinsics support])])
AS_IF([test x"$intrinsics_support" = x""],
[intrinsics_support=no],
[intrinsics_support="arm$intrinsics_support"])
],
OPUS_PATH_NE10()
AS_IF([test x"$NE10_LIBS" != x""],
[
AC_MSG_WARN([Compiler does not support ARM intrinsics])
intrinsics_support=no
intrinsics_support="$intrinsics_support (NE10)"
AS_IF([test x"enable_rtcd" != x"" \
&& test x"$OPUS_ARM_PRESUME_NEON_INTR" != x"1"],
[rtcd_support="$rtcd_support (NE10)"],[])
])
], [
AC_MSG_WARN([Currently only have ARM intrinsics for float])
intrinsics_support=no
AS_IF([test x"$rtcd_support" = x""],
[rtcd_support=no])
AS_IF([test x"$intrinsics_support" = x""],
[intrinsics_support=no],
[intrinsics_support="arm$intrinsics_support"])
],
[
AC_MSG_WARN([Compiler does not support ARM intrinsics])
intrinsics_support=no
])
],
[i?86|x86_64],
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment