diff --git a/dnn/dump_data.c b/dnn/dump_data.c index 3187484fa6896d9e11b4beb8c8818ff7c083b1c1..1fd4224c4221a745cc5e1d64fbdd42d94ce3c37e 100644 --- a/dnn/dump_data.c +++ b/dnn/dump_data.c @@ -83,7 +83,7 @@ void write_audio(LPCNetEncState *st, const short *pcm, const int *noise, FILE *f float p=0; float e; int j; - for (j=0;j<LPC_ORDER;j++) p -= st->features[k][2*NB_BANDS+3+j]*st->sig_mem[j]; + for (j=0;j<LPC_ORDER;j++) p -= st->features[k][NB_BANDS+2+j]*st->sig_mem[j]; e = lin2ulaw(pcm[k*FRAME_SIZE+i] - p); /* Signal. */ data[4*i] = lin2ulaw(st->sig_mem[0]); diff --git a/dnn/include/lpcnet.h b/dnn/include/lpcnet.h index 2b4e547a780551b459b0bae63515f9584723e350..5fa5bbc0654bbcb534526b25e236511c6d4c6fd9 100644 --- a/dnn/include/lpcnet.h +++ b/dnn/include/lpcnet.h @@ -42,8 +42,8 @@ #endif -#define NB_FEATURES 38 -#define NB_TOTAL_FEATURES 55 +#define NB_FEATURES 20 +#define NB_TOTAL_FEATURES 36 /** Number of bytes in a compressed packet. */ #define LPCNET_COMPRESSED_SIZE 8 diff --git a/dnn/lpcnet.c b/dnn/lpcnet.c index 101d2720506cf2f459bf89faae91bc5d414a9886..0020eb75cfcf6275cb653702c60d97c1ed6181be 100644 --- a/dnn/lpcnet.c +++ b/dnn/lpcnet.c @@ -139,7 +139,7 @@ LPCNET_EXPORT void lpcnet_synthesize(LPCNetState *lpcnet, const float *features, float gru_b_condition[3*GRU_B_STATE_SIZE]; int pitch; /* Matches the Python code -- the 0.1 avoids rounding issues. */ - pitch = (int)floor(.1 + 50*features[36]+100); + pitch = (int)floor(.1 + 50*features[18]+100); pitch = IMIN(255, IMAX(33, pitch)); memmove(&lpcnet->old_gain[1], &lpcnet->old_gain[0], (FEATURES_DELAY-1)*sizeof(lpcnet->old_gain[0])); lpcnet->old_gain[0] = features[PITCH_GAIN_FEATURE]; diff --git a/dnn/lpcnet_dec.c b/dnn/lpcnet_dec.c index 77430885073925700454f4c490782a2e0f1e0383..6c8b2c4eb7498e346c517e890c31f63edf9700ea 100644 --- a/dnn/lpcnet_dec.c +++ b/dnn/lpcnet_dec.c @@ -124,8 +124,8 @@ void decode_packet(float features[4][NB_TOTAL_FEATURES], float *vq_mem, const un float p = pow(2.f, main_pitch/21.)*PITCH_MIN_PERIOD; p *= 1 + modulation/16./7.*(2*sub-3); p = MIN16(255, MAX16(33, p)); - features[sub][2*NB_BANDS] = .02*(p-100); - features[sub][2*NB_BANDS + 1] = frame_corr-.5; + features[sub][NB_BANDS] = .02*(p-100); + features[sub][NB_BANDS + 1] = frame_corr-.5; } features[3][0] = (c0_id-64)/4.; diff --git a/dnn/lpcnet_demo.c b/dnn/lpcnet_demo.c index a838840bdb4c43a34a9406202e304109677f5b2d..14616d3c54e71cda2bcc3252eaed30f5046a76f4 100644 --- a/dnn/lpcnet_demo.c +++ b/dnn/lpcnet_demo.c @@ -115,7 +115,6 @@ int main(int argc, char **argv) { fread(in_features, sizeof(features[0]), NB_TOTAL_FEATURES, fin); if (feof(fin)) break; RNN_COPY(features, in_features, NB_FEATURES); - RNN_CLEAR(&features[18], 18); lpcnet_synthesize(net, features, pcm, LPCNET_FRAME_SIZE); fwrite(pcm, sizeof(pcm[0]), LPCNET_FRAME_SIZE, fout); } diff --git a/dnn/lpcnet_enc.c b/dnn/lpcnet_enc.c index 062d0df44bab770637c36990d756a6f4aa772a31..1196a3e14691a7b52ac54580978d0c05e6de35a3 100644 --- a/dnn/lpcnet_enc.c +++ b/dnn/lpcnet_enc.c @@ -43,7 +43,7 @@ #include "lpcnet.h" -//#define NB_FEATURES (2*NB_BANDS+3+LPC_ORDER) +//#define NB_FEATURES (NB_BANDS+2+LPC_ORDER) #define SURVIVORS 5 @@ -499,7 +499,6 @@ void compute_frame_features(LPCNetEncState *st, const float *in) { float E = 0; float Ly[NB_BANDS]; float follow, logMax; - float g; kiss_fft_cpx X[FREQ_SIZE]; float Ex[NB_BANDS]; float xcorr[PITCH_MAX_PERIOD]; @@ -519,9 +518,8 @@ void compute_frame_features(LPCNetEncState *st, const float *in) { } dct(st->features[st->pcount], Ly); st->features[st->pcount][0] -= 4; - g = lpc_from_cepstrum(st->lpc, st->features[st->pcount]); - st->features[st->pcount][2*NB_BANDS+2] = log10(g); - for (i=0;i<LPC_ORDER;i++) st->features[st->pcount][2*NB_BANDS+3+i] = st->lpc[i]; + lpc_from_cepstrum(st->lpc, st->features[st->pcount]); + for (i=0;i<LPC_ORDER;i++) st->features[st->pcount][NB_BANDS+2+i] = st->lpc[i]; RNN_MOVE(st->exc_buf, &st->exc_buf[FRAME_SIZE], PITCH_MAX_PERIOD); RNN_COPY(&aligned_in[TRAINING_OFFSET], in, FRAME_SIZE-TRAINING_OFFSET); for (i=0;i<FRAME_SIZE;i++) { @@ -663,13 +661,13 @@ void process_superframe(LPCNetEncState *st, unsigned char *buf, FILE *ffeat, int float p = pow(2.f, main_pitch/21.)*PITCH_MIN_PERIOD; p *= 1 + modulation/16./7.*(2*sub-3); p = MIN16(255, MAX16(33, p)); - st->features[sub][2*NB_BANDS] = .02*(p-100); - st->features[sub][2*NB_BANDS + 1] = frame_corr-.5; + st->features[sub][NB_BANDS] = .02*(p-100); + st->features[sub][NB_BANDS + 1] = frame_corr-.5; } else { - st->features[sub][2*NB_BANDS] = .01*(IMAX(66, IMIN(510, best[2+2*sub]+best[2+2*sub+1]))-200); - st->features[sub][2*NB_BANDS + 1] = frame_corr-.5; + st->features[sub][NB_BANDS] = .01*(IMAX(66, IMIN(510, best[2+2*sub]+best[2+2*sub+1]))-200); + st->features[sub][NB_BANDS + 1] = frame_corr-.5; } - //printf("%f %d %f\n", st->features[sub][2*NB_BANDS], best[2+2*sub], frame_corr); + //printf("%f %d %f\n", st->features[sub][NB_BANDS], best[2+2*sub], frame_corr); } //printf("%d %f %f %f\n", best_period, best_a, best_b, best_corr); RNN_COPY(&st->xc[0][0], &st->xc[8][0], PITCH_MAX_PERIOD); @@ -686,9 +684,8 @@ void process_superframe(LPCNetEncState *st, unsigned char *buf, FILE *ffeat, int perform_double_interp(st->features, st->vq_mem, interp_id); } for (sub=0;sub<4;sub++) { - float g = lpc_from_cepstrum(st->lpc, st->features[sub]); - st->features[sub][2*NB_BANDS+2] = log10(g); - for (i=0;i<LPC_ORDER;i++) st->features[sub][2*NB_BANDS+3+i] = st->lpc[i]; + lpc_from_cepstrum(st->lpc, st->features[sub]); + for (i=0;i<LPC_ORDER;i++) st->features[sub][NB_BANDS+2+i] = st->lpc[i]; } //printf("\n"); RNN_COPY(st->vq_mem, &st->features[3][0], NB_BANDS); diff --git a/dnn/test_lpcnet.c b/dnn/test_lpcnet.c index dadbcfc27b55f91875dec2eaf005693d3a7a8cb3..01917993f50cb0d202a453ac8c2c07a5112eff61 100644 --- a/dnn/test_lpcnet.c +++ b/dnn/test_lpcnet.c @@ -59,7 +59,6 @@ int main(int argc, char **argv) { fread(in_features, sizeof(features[0]), NB_TOTAL_FEATURES, fin); if (feof(fin)) break; RNN_COPY(features, in_features, NB_FEATURES); - RNN_CLEAR(&features[18], 18); lpcnet_synthesize(net, features, pcm, FRAME_SIZE); fwrite(pcm, sizeof(pcm[0]), FRAME_SIZE, fout); } diff --git a/dnn/training_tf2/lpcnet.py b/dnn/training_tf2/lpcnet.py index e4346c3eca8890340332da7acaab6717f465b2b6..11d5f329e9de1dc783f4c2d312db21764f182c02 100644 --- a/dnn/training_tf2/lpcnet.py +++ b/dnn/training_tf2/lpcnet.py @@ -212,7 +212,7 @@ class WeightClip(Constraint): constraint = WeightClip(0.992) -def new_lpcnet_model(rnn_units1=384, rnn_units2=16, nb_used_features = 38, training=False, adaptation=False, quantize=False): +def new_lpcnet_model(rnn_units1=384, rnn_units2=16, nb_used_features = 20, training=False, adaptation=False, quantize=False): pcm = Input(shape=(None, 3)) feat = Input(shape=(None, nb_used_features)) pitch = Input(shape=(None, 1)) diff --git a/dnn/training_tf2/test_lpcnet.py b/dnn/training_tf2/test_lpcnet.py index 90216275913d737d9c6f84fbbb21634ef6f97d4e..9a48d5667aeeedd885e32e846652116f8f899476 100755 --- a/dnn/training_tf2/test_lpcnet.py +++ b/dnn/training_tf2/test_lpcnet.py @@ -40,7 +40,7 @@ model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics= feature_file = sys.argv[1] out_file = sys.argv[2] frame_size = model.frame_size -nb_features = 55 +nb_features = 36 nb_used_features = model.nb_used_features features = np.fromfile(feature_file, dtype='float32') @@ -50,12 +50,11 @@ feature_chunk_size = features.shape[0] pcm_chunk_size = frame_size*feature_chunk_size features = np.reshape(features, (nb_frames, feature_chunk_size, nb_features)) -features[:,:,18:36] = 0 -periods = (.1 + 50*features[:,:,36:37]+100).astype('int16') +periods = (.1 + 50*features[:,:,18:19]+100).astype('int16') -model.load_weights('lpcnet34bq17_384_01.h5') +model.load_weights('lpcnet38Sn_384_02.h5'); order = 16 @@ -81,7 +80,7 @@ for c in range(0, nb_frames): p, state1, state2 = dec.predict([fexc, cfeat[:, fr:fr+1, :], state1, state2]) #Lower the temperature for voiced frames to reduce noisiness - p *= np.power(p, np.maximum(0, 1.5*features[c, fr, 37] - .5)) + p *= np.power(p, np.maximum(0, 1.5*features[c, fr, 19] - .5)) p = p/(1e-18 + np.sum(p)) #Cut off the tail of the remaining distribution p = np.maximum(p-0.002, 0).astype('float64') diff --git a/dnn/training_tf2/train_lpcnet.py b/dnn/training_tf2/train_lpcnet.py index 0e98ada6f272a67d210762450de545f633750f48..c3ecd44b2a8dc3fc1b0d37534400f6a7e90d7a98 100755 --- a/dnn/training_tf2/train_lpcnet.py +++ b/dnn/training_tf2/train_lpcnet.py @@ -104,7 +104,7 @@ with strategy.scope(): feature_file = args.features pcm_file = args.data # 16 bit unsigned short PCM samples frame_size = model.frame_size -nb_features = 55 +nb_features = 36 nb_used_features = model.nb_used_features feature_chunk_size = 15 pcm_chunk_size = frame_size*feature_chunk_size @@ -130,14 +130,13 @@ print("ulaw std = ", np.std(out_exc)) features = np.reshape(features, (nb_frames, feature_chunk_size, nb_features)) features = features[:, :, :nb_used_features] -features[:,:,18:36] = 0 fpad1 = np.concatenate([features[0:1, 0:2, :], features[:-1, -2:, :]], axis=0) fpad2 = np.concatenate([features[1:, :2, :], features[0:1, -2:, :]], axis=0) features = np.concatenate([fpad1, features, fpad2], axis=1) -periods = (.1 + 50*features[:,:,36:37]+100).astype('int16') +periods = (.1 + 50*features[:,:,18:19]+100).astype('int16') #periods = np.minimum(periods, 255) # dump models to disk as we go