diff --git a/dnn/dump_data.c b/dnn/dump_data.c index 053ca774e4332dec17c6ddd0fe9c7f58b779c06b..2c1b783a5b71d8286f15e048235ecf56df2d5b12 100644 --- a/dnn/dump_data.c +++ b/dnn/dump_data.c @@ -73,7 +73,7 @@ void compute_noise(int *noise, float noise_std) { } } -static short float2short(float x) +static opus_int16 float2short(float x) { int i; i = (int)floor(.5+x); @@ -81,9 +81,9 @@ static short float2short(float x) } -void write_audio(LPCNetEncState *st, const short *pcm, const int *noise, FILE *file) { +void write_audio(LPCNetEncState *st, const opus_int16 *pcm, const int *noise, FILE *file) { int i; - short data[2*FRAME_SIZE]; + opus_int16 data[2*FRAME_SIZE]; for (i=0;i<FRAME_SIZE;i++) { float p=0; float e; @@ -121,9 +121,9 @@ int main(int argc, char **argv) { FILE *f1; FILE *ffeat; FILE *fpcm=NULL; - short pcm[FRAME_SIZE]={0}; + opus_int16 pcm[FRAME_SIZE]={0}; int noisebuf[FRAME_SIZE]={0}; - short tmp[FRAME_SIZE] = {0}; + opus_int16 tmp[FRAME_SIZE] = {0}; float savedX[FRAME_SIZE] = {0}; float speech_gain=1; int last_silent = 1; @@ -173,11 +173,11 @@ int main(int argc, char **argv) { int silent; size_t ret; for (i=0;i<FRAME_SIZE;i++) x[i] = tmp[i]; - ret = fread(tmp, sizeof(short), FRAME_SIZE, f1); + ret = fread(tmp, sizeof(opus_int16), FRAME_SIZE, f1); if (feof(f1) || ret != FRAME_SIZE) { if (!training) break; rewind(f1); - ret = fread(tmp, sizeof(short), FRAME_SIZE, f1); + ret = fread(tmp, sizeof(opus_int16), FRAME_SIZE, f1); if (ret != FRAME_SIZE) { fprintf(stderr, "error reading\n"); exit(1); @@ -240,7 +240,7 @@ int main(int argc, char **argv) { process_single_frame(st, ffeat); if (fpcm) write_audio(st, pcm, noisebuf, fpcm); - /*if (fpcm) fwrite(pcm, sizeof(short), FRAME_SIZE, fpcm);*/ + /*if (fpcm) fwrite(pcm, sizeof(opus_int16), FRAME_SIZE, fpcm);*/ for (i=0;i<TRAINING_OFFSET;i++) pcm[i] = float2short(x[i+FRAME_SIZE-TRAINING_OFFSET]); old_speech_gain = speech_gain; count++; diff --git a/dnn/lpcnet.c b/dnn/lpcnet.c index 1a1b16b798f955e25199553ef29e54df9dfed55b..99a58644630b5c7093a22aab0eddd75ace37a421 100644 --- a/dnn/lpcnet.c +++ b/dnn/lpcnet.c @@ -232,7 +232,7 @@ void lpcnet_reset_signal(LPCNetState *lpcnet) RNN_CLEAR(lpcnet->nnet.gru_b_state, GRU_B_STATE_SIZE); } -void lpcnet_synthesize_tail_impl(LPCNetState *lpcnet, short *output, int N, int preload) +void lpcnet_synthesize_tail_impl(LPCNetState *lpcnet, opus_int16 *output, int N, int preload) { int i; @@ -270,12 +270,12 @@ void lpcnet_synthesize_tail_impl(LPCNetState *lpcnet, short *output, int N, int } } -void lpcnet_synthesize_impl(LPCNetState *lpcnet, const float *features, short *output, int N, int preload) +void lpcnet_synthesize_impl(LPCNetState *lpcnet, const float *features, opus_int16 *output, int N, int preload) { run_frame_network(lpcnet, lpcnet->gru_a_condition, lpcnet->gru_b_condition, lpcnet->lpc, features); lpcnet_synthesize_tail_impl(lpcnet, output, N, preload); } -void lpcnet_synthesize(LPCNetState *lpcnet, const float *features, short *output, int N) { +void lpcnet_synthesize(LPCNetState *lpcnet, const float *features, opus_int16 *output, int N) { lpcnet_synthesize_impl(lpcnet, features, output, N, 0); } diff --git a/dnn/lpcnet.h b/dnn/lpcnet.h index 52813c21e4ff7dfadd7cedf2001459a4cf3cb13e..836b168b9bad94130b80ffc9e31ec5c1fa867d98 100644 --- a/dnn/lpcnet.h +++ b/dnn/lpcnet.h @@ -27,7 +27,7 @@ #ifndef _LPCNET_H_ #define _LPCNET_H_ - +#include "opus_types.h" #define NB_FEATURES 20 #define NB_TOTAL_FEATURES 36 @@ -73,10 +73,10 @@ void lpcnet_decoder_destroy(LPCNetDecState *st); /** Decodes a packet of LPCNET_COMPRESSED_SIZE bytes (currently 8) into LPCNET_PACKET_SAMPLES samples (currently 640). * @param [in] st <tt>LPCNetDecState*</tt>: Decoder state * @param [in] buf <tt>const unsigned char *</tt>: Compressed packet - * @param [out] pcm <tt>short *</tt>: Decoded audio + * @param [out] pcm <tt>opus_int16 *</tt>: Decoded audio * @retval 0 Success */ -int lpcnet_decode(LPCNetDecState *st, const unsigned char *buf, short *pcm); +int lpcnet_decode(LPCNetDecState *st, const unsigned char *buf, opus_int16 *pcm); @@ -106,27 +106,19 @@ void lpcnet_encoder_destroy(LPCNetEncState *st); /** Encodes LPCNET_PACKET_SAMPLES speech samples (currently 640) into a packet of LPCNET_COMPRESSED_SIZE bytes (currently 8). * @param [in] st <tt>LPCNetDecState*</tt>: Encoder state - * @param [in] pcm <tt>short *</tt>: Input speech to be encoded + * @param [in] pcm <tt>opus_int16 *</tt>: Input speech to be encoded * @param [out] buf <tt>const unsigned char *</tt>: Compressed packet * @retval 0 Success */ -int lpcnet_encode(LPCNetEncState *st, const short *pcm, unsigned char *buf); - -/** Compute features on LPCNET_PACKET_SAMPLES speech samples (currently 640) and output features for 4 10-ms frames at once. - * @param [in] st <tt>LPCNetDecState*</tt>: Encoder state - * @param [in] pcm <tt>short *</tt>: Input speech to be analyzed - * @param [out] features <tt>float[4][NB_TOTAL_FEATURES]</tt>: Four feature vectors - * @retval 0 Success - */ -int lpcnet_compute_features(LPCNetEncState *st, const short *pcm, float features[4][NB_TOTAL_FEATURES]); +int lpcnet_encode(LPCNetEncState *st, const opus_int16 *pcm, unsigned char *buf); /** Compute features on LPCNET_FRAME_SIZE speech samples (currently 160) and output features for one 10-ms frame. * @param [in] st <tt>LPCNetDecState*</tt>: Encoder state - * @param [in] pcm <tt>short *</tt>: Input speech to be analyzed + * @param [in] pcm <tt>opus_int16 *</tt>: Input speech to be analyzed * @param [out] features <tt>float[NB_TOTAL_FEATURES]</tt>: Four feature vectors * @retval 0 Success */ -int lpcnet_compute_single_frame_features(LPCNetEncState *st, const short *pcm, float features[NB_TOTAL_FEATURES]); +int lpcnet_compute_single_frame_features(LPCNetEncState *st, const opus_int16 *pcm, float features[NB_TOTAL_FEATURES]); /** Compute features on LPCNET_FRAME_SIZE speech samples (currently 160) and output features for one 10-ms frame. @@ -164,11 +156,11 @@ void lpcnet_destroy(LPCNetState *st); /** Synthesizes speech from an LPCNet feature vector. * @param [in] st <tt>LPCNetState*</tt>: Synthesis state * @param [in] features <tt>const float *</tt>: Compressed packet - * @param [out] output <tt>short **</tt>: Synthesized speech + * @param [out] output <tt>opus_int16 **</tt>: Synthesized speech * @param [in] N <tt>int</tt>: Number of samples to generate * @retval 0 Success */ -void lpcnet_synthesize(LPCNetState *st, const float *features, short *output, int N); +void lpcnet_synthesize(LPCNetState *st, const float *features, opus_int16 *output, int N); #define LPCNET_PLC_CAUSAL 0 @@ -184,9 +176,9 @@ LPCNetPLCState *lpcnet_plc_create(int options); void lpcnet_plc_destroy(LPCNetPLCState *st); -int lpcnet_plc_update(LPCNetPLCState *st, short *pcm); +int lpcnet_plc_update(LPCNetPLCState *st, opus_int16 *pcm); -int lpcnet_plc_conceal(LPCNetPLCState *st, short *pcm); +int lpcnet_plc_conceal(LPCNetPLCState *st, opus_int16 *pcm); void lpcnet_plc_fec_add(LPCNetPLCState *st, const float *features); diff --git a/dnn/lpcnet_demo.c b/dnn/lpcnet_demo.c index 1b1a56505fd4116499a8c3ef2071b7e472909c03..a41f51208e3539c4aab06b00f2d9c940021f3743 100644 --- a/dnn/lpcnet_demo.c +++ b/dnn/lpcnet_demo.c @@ -158,7 +158,7 @@ int main(int argc, char **argv) { net = lpcnet_encoder_create(); while (1) { float features[NB_TOTAL_FEATURES]; - short pcm[LPCNET_FRAME_SIZE]; + opus_int16 pcm[LPCNET_FRAME_SIZE]; size_t ret; ret = fread(pcm, sizeof(pcm[0]), LPCNET_FRAME_SIZE, fin); if (feof(fin) || ret != LPCNET_FRAME_SIZE) break; @@ -175,7 +175,7 @@ int main(int argc, char **argv) { while (1) { float in_features[NB_TOTAL_FEATURES]; float features[NB_FEATURES]; - short pcm[LPCNET_FRAME_SIZE]; + opus_int16 pcm[LPCNET_FRAME_SIZE]; size_t ret; ret = fread(in_features, sizeof(features[0]), NB_TOTAL_FEATURES, fin); if (feof(fin) || ret != NB_TOTAL_FEATURES) break; @@ -185,7 +185,7 @@ int main(int argc, char **argv) { } lpcnet_destroy(net); } else if (mode == MODE_PLC) { - short pcm[FRAME_SIZE]; + opus_int16 pcm[FRAME_SIZE]; int count=0; int loss=0; int skip=0, extra=0; diff --git a/dnn/lpcnet_enc.c b/dnn/lpcnet_enc.c index f5e33689f980cfd66a5beb1e039eb68f96c9006e..0e77ced2778df6bbb5d8d97ba7d194f403c667a5 100644 --- a/dnn/lpcnet_enc.c +++ b/dnn/lpcnet_enc.c @@ -233,7 +233,7 @@ static int lpcnet_compute_single_frame_features_impl(LPCNetEncState *st, float * return 0; } -int lpcnet_compute_single_frame_features(LPCNetEncState *st, const short *pcm, float features[NB_TOTAL_FEATURES]) { +int lpcnet_compute_single_frame_features(LPCNetEncState *st, const opus_int16 *pcm, float features[NB_TOTAL_FEATURES]) { int i; float x[FRAME_SIZE]; for (i=0;i<FRAME_SIZE;i++) x[i] = pcm[i]; diff --git a/dnn/lpcnet_plc.c b/dnn/lpcnet_plc.c index 6d384a0370c12a472dd484ee692c2fc8eb188712..6f61b9c74afdaa34abbe02d3b8dd364a9f1fcb5b 100644 --- a/dnn/lpcnet_plc.c +++ b/dnn/lpcnet_plc.c @@ -175,10 +175,10 @@ void clear_state(LPCNetPLCState *st) { /* In this causal version of the code, the DNN model implemented by compute_plc_pred() needs to generate two feature vectors to conceal the first lost packet.*/ -int lpcnet_plc_update(LPCNetPLCState *st, short *pcm) { +int lpcnet_plc_update(LPCNetPLCState *st, opus_int16 *pcm) { int i; float x[FRAME_SIZE]; - short output[FRAME_SIZE]; + opus_int16 output[FRAME_SIZE]; float plc_features[2*NB_BANDS+NB_FEATURES+1]; int delta = 0; for (i=0;i<FRAME_SIZE;i++) x[i] = pcm[i]; @@ -186,7 +186,7 @@ int lpcnet_plc_update(LPCNetPLCState *st, short *pcm) { if (st->skip_analysis) { /*fprintf(stderr, "skip update\n");*/ if (st->blend) { - short tmp[FRAME_SIZE-TRAINING_OFFSET]; + opus_int16 tmp[FRAME_SIZE-TRAINING_OFFSET]; float zeros[2*NB_BANDS+NB_FEATURES+1] = {0}; RNN_COPY(zeros, plc_features, 2*NB_BANDS); zeros[2*NB_BANDS+NB_FEATURES] = 1; @@ -263,9 +263,9 @@ int lpcnet_plc_update(LPCNetPLCState *st, short *pcm) { } static const float att_table[10] = {0, 0, -.2, -.2, -.4, -.4, -.8, -.8, -1.6, -1.6}; -int lpcnet_plc_conceal(LPCNetPLCState *st, short *pcm) { +int lpcnet_plc_conceal(LPCNetPLCState *st, opus_int16 *pcm) { int i; - short output[FRAME_SIZE]; + opus_int16 output[FRAME_SIZE]; run_frame_network_flush(&st->lpcnet); /* If we concealed the previous frame, finish synthesizing the rest of the samples. */ /* FIXME: Copy/predict features. */ diff --git a/dnn/lpcnet_private.h b/dnn/lpcnet_private.h index a0f0f7dac1670229440a6f8c57465ef8a190bab0..a2eba6dbd5d6ca0b0e4b5c25402e5c460dd66ede 100644 --- a/dnn/lpcnet_private.h +++ b/dnn/lpcnet_private.h @@ -77,7 +77,7 @@ struct LPCNetPLCState { int fec_read_pos; int fec_fill_pos; int fec_skip; - short pcm[PLC_BUF_SIZE+FRAME_SIZE]; + opus_int16 pcm[PLC_BUF_SIZE+FRAME_SIZE]; int pcm_fill; int skip_analysis; int blend; @@ -91,19 +91,17 @@ void preemphasis(float *y, float *mem, const float *x, float coef, int N); void compute_frame_features(LPCNetEncState *st, const float *in); -void decode_packet(float features[4][NB_TOTAL_FEATURES], float *vq_mem, const unsigned char buf[8]); - void lpcnet_reset_signal(LPCNetState *lpcnet); void run_frame_network(LPCNetState *lpcnet, float *gru_a_condition, float *gru_b_condition, float *lpc, const float *features); void run_frame_network_deferred(LPCNetState *lpcnet, const float *features); void run_frame_network_flush(LPCNetState *lpcnet); -void lpcnet_synthesize_tail_impl(LPCNetState *lpcnet, short *output, int N, int preload); -void lpcnet_synthesize_impl(LPCNetState *lpcnet, const float *features, short *output, int N, int preload); -void lpcnet_synthesize_blend_impl(LPCNetState *lpcnet, const short *pcm_in, short *output, int N); +void lpcnet_synthesize_tail_impl(LPCNetState *lpcnet, opus_int16 *output, int N, int preload); +void lpcnet_synthesize_impl(LPCNetState *lpcnet, const float *features, opus_int16 *output, int N, int preload); +void lpcnet_synthesize_blend_impl(LPCNetState *lpcnet, const opus_int16 *pcm_in, opus_int16 *output, int N); void process_single_frame(LPCNetEncState *st, FILE *ffeat); -int lpcnet_compute_single_frame_features(LPCNetEncState *st, const short *pcm, float features[NB_TOTAL_FEATURES]); +int lpcnet_compute_single_frame_features(LPCNetEncState *st, const opus_int16 *pcm, float features[NB_TOTAL_FEATURES]); void process_single_frame(LPCNetEncState *st, FILE *ffeat);