Skip to content
Snippets Groups Projects
Commit 61a67f38 authored by Jean-Marc Valin's avatar Jean-Marc Valin
Browse files

Feature extraction

parent f7c106d5
No related branches found
No related tags found
No related merge requests found
OPUS_SOURCES = src/opus_decoder.c \
src/opus_encoder.c
src/opus_encoder.c \
src/features.c
......@@ -32,19 +32,44 @@
#include "kiss_fft.h"
#include "celt.h"
#include "modes.h"
#include "arch.h"
#include "features.h"
#include "quant_bands.h"
#define NBANDS 17
const int bands[NBANDS+1] =
{1, 4, 8, 12, 16, 20, 24, 28, 32, 40, 48, 56, 64, 80, 96,112,136,160};
void feature_analysis(CELTEncoder *celt_enc, celt_word16 *x)
float dct_table[128] = {
0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000,
0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000,
0.351851, 0.338330, 0.311806, 0.273300, 0.224292, 0.166664, 0.102631, 0.034654,
-0.034654, -0.102631, -0.166664, -0.224292, -0.273300, -0.311806, -0.338330, -0.351851,
0.346760, 0.293969, 0.196424, 0.068975, -0.068975, -0.196424, -0.293969, -0.346760,
-0.346760, -0.293969, -0.196424, -0.068975, 0.068975, 0.196424, 0.293969, 0.346760,
0.338330, 0.224292, 0.034654, -0.166664, -0.311806, -0.351851, -0.273300, -0.102631,
0.102631, 0.273300, 0.351851, 0.311806, 0.166664, -0.034654, -0.224292, -0.338330,
0.326641, 0.135299, -0.135299, -0.326641, -0.326641, -0.135299, 0.135299, 0.326641,
0.326641, 0.135299, -0.135299, -0.326641, -0.326641, -0.135299, 0.135299, 0.326641,
0.311806, 0.034654, -0.273300, -0.338330, -0.102631, 0.224292, 0.351851, 0.166664,
-0.166664, -0.351851, -0.224292, 0.102631, 0.338330, 0.273300, -0.034654, -0.311806,
0.293969, -0.068975, -0.346760, -0.196424, 0.196424, 0.346760, 0.068975, -0.293969,
-0.293969, 0.068975, 0.346760, 0.196424, -0.196424, -0.346760, -0.068975, 0.293969,
0.273300, -0.166664, -0.338330, 0.034654, 0.351851, 0.102631, -0.311806, -0.224292,
0.224292, 0.311806, -0.102631, -0.351851, -0.034654, 0.338330, 0.166664, -0.273300,
};
static void feature_analysis(CELTEncoder *celt_enc, const celt_word16 *x,
celt_word16 *features, celt_word16 *mem)
{
int i;
const CELTMode *mode;
const kiss_fft_state *kfft;
kiss_fft_cpx in[480], out[480];
const celt_word16 *window;
celt_word32 E[NBANDS+1];
celt_word32 E[NBANDS];
celt_word16 logE[NBANDS];
celt_word16 BFCC[16];
int overlap = 120;
int N = 480;
......@@ -70,11 +95,62 @@ void feature_analysis(CELTEncoder *celt_enc, celt_word16 *x)
for (i=0;i<NBANDS;i++)
{
int j;
E[i] = 0;
celt_word32 sum = 0;
for (j=bands[i];j<bands[i+1];j++)
E[i] = E[i] + MULT_32_32_Q31(out[ j].r, out[ j].r)
+ MULT_32_32_Q31(out[ j].i, out[ j].i)
+ MULT_32_32_Q31(out[N-j].r, out[N-j].r)
+ MULT_32_32_Q31(out[N-j].i, out[N-j].i);
sum = sum + MULT32_32_Q31(out[ j].r, out[ j].r)
+ MULT32_32_Q31(out[ j].i, out[ j].i)
+ MULT32_32_Q31(out[N-j].r, out[N-j].r)
+ MULT32_32_Q31(out[N-j].i, out[N-j].i);
E[i] = MAX32(EPSILON, sum);
//printf ("%f ", E[i]);
}
amp2Log2(mode, NBANDS, NBANDS, E, logE, 1);
for (i=0;i<NBANDS;i++)
logE[i] = MAX32(logE[i], -14.);
//for (i=0;i<16;i++)
// printf ("%f ", logE[i]);
for (i=0;i<8;i++)
{
int j;
float sum = 0;
for (j=0;j<16;j++)
sum += dct_table[i*16+j]*logE[j];
BFCC[i] = sum;
//printf ("%f ", BFCC[i]);
}
for (i=0;i<7;i++)
features[i] = BFCC[i+1];
for (i=0;i<8;i++)
features[7+i] = .707*(BFCC[i] - mem[i+8]);
for (i=0;i<8;i++)
features[15+i] = .5*(BFCC[i] - 2*mem[i+8] + mem[i]);
for (i=0;i<8;i++)
{
mem[i+8] = mem[i];
mem[i] = BFCC[i];
}
for (i=0;i<23;i++)
printf ("%f ", features[i]);
printf("\n");
}
void feature_analysis_fixed(CELTEncoder *celt_enc, const celt_int16 *x)
{
/* FIXME: Get rid of this static var ASAP! */
static float mem[16];
float features[23];
#ifdef FIXED_POINT
feature_analysis(celt_enc, x);
#else
int i;
int N = 960-120;
celt_word16 x2[960-120];
for (i=0;i<N;i++)
x2[i] = x[i];
feature_analysis(celt_enc, x2, features, mem);
#endif
}
......@@ -38,6 +38,7 @@
#include "entenc.h"
#include "modes.h"
#include "silk_API.h"
#include "features.h"
/* Transition tables for the voice and audio modes. First column is the
middle (memoriless) threshold. The second column is the hysteresis
......@@ -180,6 +181,8 @@ int opus_encode(OpusEncoder *st, const short *pcm, int frame_size,
silk_enc = (char*)st+st->silk_enc_offset;
celt_enc = (CELTEncoder*)((char*)st+st->celt_enc_offset);
feature_analysis_fixed(celt_enc, pcm);
if (st->user_bitrate_bps==OPUS_BITRATE_AUTO)
st->bitrate_bps = 60*st->Fs/frame_size + st->Fs*st->channels;
else
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment