From f36685fc974394aa0d0f4db1bb601afc4780e3ed Mon Sep 17 00:00:00 2001 From: Marcus Asteborg <xnorpx@outlook.com> Date: Thu, 22 Jun 2023 05:27:54 -0700 Subject: [PATCH] Remove trailing whitespace in dnn --- dnn/README.md | 5 +- dnn/datasets.txt | 1 - dnn/download_model.bat | 1 - dnn/dump_data.c | 5 +- dnn/freq.c | 1 - dnn/lpcnet.c | 4 +- dnn/nnet.c | 10 +-- dnn/parse_lpcnet_weights.c | 4 +- dnn/test_vec.c | 6 +- dnn/torch/rdovae/export_rdovae_weights.py | 66 +++++++-------- dnn/torch/rdovae/fec_encoder.py | 15 ++-- dnn/torch/rdovae/import_rdovae_weights.py | 10 +-- dnn/torch/rdovae/packets/fec_packets.c | 4 +- dnn/torch/rdovae/packets/fec_packets.py | 40 +++++----- dnn/torch/rdovae/rdovae/dataset.py | 5 +- dnn/torch/rdovae/rdovae/rdovae.py | 97 +++++++++++------------ dnn/torch/rdovae/train_rdovae.py | 22 ++--- dnn/training_tf2/diffembed.py | 8 +- dnn/training_tf2/dump_lpcnet.py | 6 +- dnn/training_tf2/dump_plc.py | 2 +- dnn/training_tf2/dump_rdovae.py | 6 +- dnn/training_tf2/fec_encoder.py | 7 +- dnn/training_tf2/fec_packets.c | 4 +- dnn/training_tf2/fec_packets.py | 40 +++++----- dnn/training_tf2/lossfuncs.py | 7 +- dnn/training_tf2/lpcnet.py | 14 ++-- dnn/training_tf2/lpcnet_plc.py | 4 +- dnn/training_tf2/mdense.py | 2 +- dnn/training_tf2/parameters.py | 12 +-- dnn/training_tf2/plc_loader.py | 2 +- dnn/training_tf2/rdovae.py | 13 ++- dnn/training_tf2/rdovae_exchange.py | 4 +- dnn/training_tf2/rdovae_import.py | 14 ++-- dnn/training_tf2/test_lpcnet.py | 2 - dnn/training_tf2/tf_funcs.py | 4 +- dnn/training_tf2/train_lpcnet.py | 2 +- dnn/vec_neon.h | 26 +++--- 37 files changed, 230 insertions(+), 245 deletions(-) diff --git a/dnn/README.md b/dnn/README.md index 930b043c2..ad4a6724f 100644 --- a/dnn/README.md +++ b/dnn/README.md @@ -115,7 +115,7 @@ This codebase is also meant for research and it is possible to train new models. and move the generated nnet\_data.\* files to the src/ directory. Then you just need to rebuild the software and use lpcnet\_demo as explained above. -# Speech Material for Training +# Speech Material for Training Suitable training material can be obtained from [Open Speech and Language Resources](https://www.openslr.org/). See the datasets.txt file for details on suitable training data. @@ -123,5 +123,4 @@ Suitable training material can be obtained from [Open Speech and Language Resour 1. [LPCNet: DSP-Boosted Neural Speech Synthesis](https://people.xiph.org/~jm/demo/lpcnet/) 1. [A Real-Time Wideband Neural Vocoder at 1.6 kb/s Using LPCNet](https://people.xiph.org/~jm/demo/lpcnet_codec/) -1. Sample model files (check compatibility): https://media.xiph.org/lpcnet/data/ - +1. Sample model files (check compatibility): https://media.xiph.org/lpcnet/data/ diff --git a/dnn/datasets.txt b/dnn/datasets.txt index 160bc316e..00445216a 100644 --- a/dnn/datasets.txt +++ b/dnn/datasets.txt @@ -171,4 +171,3 @@ The corresponding citations for all these datasets are: journal={arXiv preprint arXiv:2104.01497}, year={2021} } - diff --git a/dnn/download_model.bat b/dnn/download_model.bat index df5f1169e..ba16f0f0d 100644 --- a/dnn/download_model.bat +++ b/dnn/download_model.bat @@ -9,4 +9,3 @@ if not exist %model% ( tar -xvzf %model% move .\src\*.c . move .\src\*.h . - diff --git a/dnn/dump_data.c b/dnn/dump_data.c index 7a0da10be..0515352b1 100644 --- a/dnn/dump_data.c +++ b/dnn/dump_data.c @@ -98,7 +98,7 @@ void write_audio(LPCNetEncState *st, const short *pcm, const int *noise, FILE *f /* Simulate error on excitation. */ e += noise[k*FRAME_SIZE+i]; e = IMIN(255, IMAX(0, e)); - + RNN_MOVE(&st->sig_mem[1], &st->sig_mem[0], LPC_ORDER-1); st->sig_mem[0] = p + ulaw2lin(e); st->exc_mem = e; @@ -241,7 +241,7 @@ int main(int argc, char **argv) { if (fpcm) { compute_noise(&noisebuf[st->pcount*FRAME_SIZE], noise_std); } - + process_single_frame(st, ffeat); if (fpcm) write_audio(st, pcm, &noisebuf[st->pcount*FRAME_SIZE], fpcm, 1); st->pcount++; @@ -260,4 +260,3 @@ int main(int argc, char **argv) { lpcnet_encoder_destroy(st); return 0; } - diff --git a/dnn/freq.c b/dnn/freq.c index ee867d390..8ef1efe60 100644 --- a/dnn/freq.c +++ b/dnn/freq.c @@ -326,4 +326,3 @@ void apply_window(float *x) { x[WINDOW_SIZE - 1 - i] *= half_window[i]; } } - diff --git a/dnn/lpcnet.c b/dnn/lpcnet.c index 7eee53495..1a1b16b79 100644 --- a/dnn/lpcnet.c +++ b/dnn/lpcnet.c @@ -61,7 +61,7 @@ void rc2lpc(float *lpc, const float *rc) float ntmp[LPC_ORDER] = {0.0}; RNN_COPY(tmp, rc, LPC_ORDER); for(i = 0; i < LPC_ORDER ; i++) - { + { for(j = 0; j <= i-1; j++) { ntmp[j] = tmp[j] + tmp[i]*tmp[i - j - 1]; @@ -106,7 +106,7 @@ void run_frame_network(LPCNetState *lpcnet, float *gru_a_condition, float *gru_b _lpcnet_compute_dense(&lpcnet->model.gru_b_dense_feature, gru_b_condition, condition); #ifdef END2END rc2lpc(lpc, rc); -#elif FEATURES_DELAY>0 +#elif FEATURES_DELAY>0 memcpy(lpc, lpcnet->old_lpc[FEATURES_DELAY-1], LPC_ORDER*sizeof(lpc[0])); memmove(lpcnet->old_lpc[1], lpcnet->old_lpc[0], (FEATURES_DELAY-1)*LPC_ORDER*sizeof(lpc[0])); lpc_from_cepstrum(lpcnet->old_lpc[0], features); diff --git a/dnn/nnet.c b/dnn/nnet.c index 580496ee0..e5e12c20e 100644 --- a/dnn/nnet.c +++ b/dnn/nnet.c @@ -170,7 +170,7 @@ int sample_mdense(const MDenseLayer *layer, const float *input, const float *sam C = layer->nb_channels; celt_assert(N*C <= MAX_MDENSE_TMP); stride = M*C; - + celt_assert(N <= DUAL_FC_OUT_SIZE); /* Computing all the random thresholds in advance. These thresholds are directly @@ -188,7 +188,7 @@ int sample_mdense(const MDenseLayer *layer, const float *input, const float *sam int bit; int i; float sum1, sum2; - + i = (1<<b) | val; sum1 = layer->bias[i]; @@ -426,7 +426,7 @@ void compute_sparse_gru(const SparseGRULayer *gru, float *state, const float *in #ifdef USE_SU_BIAS bias = &gru->subias[3*N]; #else - bias = &gru->bias[3*N]; + bias = &gru->bias[3*N]; #endif for (k=0;k<2;k++) { @@ -478,7 +478,7 @@ void compute_embedding(const EmbeddingLayer *layer, float *output, int input) for (i=0;i<layer->dim;i++) { output[i] = layer->embedding_weights[input*layer->dim + i]; - } + } } void compute_gru_a_input(float *output, const float *input, int N, const EmbeddingLayer *layer1, int val1, const EmbeddingLayer *layer2, int val2, const EmbeddingLayer *layer3, int val3) { @@ -499,5 +499,5 @@ void accum_embedding(const EmbeddingLayer *layer, float *output, int input) for (i=0;i<layer->dim;i++) { output[i] += layer->embedding_weights[input*layer->dim + i]; - } + } } diff --git a/dnn/parse_lpcnet_weights.c b/dnn/parse_lpcnet_weights.c index 457d44e8e..493ecb0a5 100644 --- a/dnn/parse_lpcnet_weights.c +++ b/dnn/parse_lpcnet_weights.c @@ -45,7 +45,7 @@ int parse_record(const unsigned char **data, int *len, WeightArray *array) { array->type = h->type; array->size = h->size; array->data = (*data)+WEIGHT_BLOCK_SIZE; - + *data += h->block_size+WEIGHT_BLOCK_SIZE; *len -= h->block_size+WEIGHT_BLOCK_SIZE; return array->size; @@ -103,7 +103,7 @@ static const void *find_idx_check(const WeightArray *arrays, const char *name, i if (remain < nb_blocks+1) return NULL; for (i=0;i<nb_blocks;i++) { int pos = *idx++; - if (pos+3 >= nb_in || (pos&0x3)) return NULL; + if (pos+3 >= nb_in || (pos&0x3)) return NULL; } nb_out -= 8; remain -= nb_blocks+1; diff --git a/dnn/test_vec.c b/dnn/test_vec.c index 1fdc7cb40..d14d2502a 100644 --- a/dnn/test_vec.c +++ b/dnn/test_vec.c @@ -63,7 +63,7 @@ int test_sgemv_accum16() { out[i] = 0; out_fast[i] = 0; } - + for(i=0; i<COLS; i++) { x[i] = i+1; } @@ -101,7 +101,7 @@ int test_sparse_sgemv_accum16() { out[i] = 0; out_fast[i] = 0; } - + sparse_sgemv_accum16(out, w, rows, indx, x); sparse_sgemv_accum16_fast(out_fast, w, rows, indx, x); @@ -126,5 +126,3 @@ int main() { int test2 = test_sparse_sgemv_accum16(); return test1 || test2; } - - diff --git a/dnn/torch/rdovae/export_rdovae_weights.py b/dnn/torch/rdovae/export_rdovae_weights.py index fdef5f63c..b6fbaa4b3 100644 --- a/dnn/torch/rdovae/export_rdovae_weights.py +++ b/dnn/torch/rdovae/export_rdovae_weights.py @@ -80,14 +80,14 @@ extern const opus_uint16 dred_p0_q15[{levels * N}]; def c_export(args, model): - + message = f"Auto generated from checkpoint {os.path.basename(args.checkpoint)}" - + enc_writer = CWriter(os.path.join(args.output_dir, "dred_rdovae_enc_data"), message=message) dec_writer = CWriter(os.path.join(args.output_dir, "dred_rdovae_dec_data"), message=message) stats_writer = CWriter(os.path.join(args.output_dir, "dred_rdovae_stats_data"), message=message) constants_writer = CWriter(os.path.join(args.output_dir, "dred_rdovae_constants"), message=message, header_only=True) - + # some custom includes for writer in [enc_writer, dec_writer, stats_writer]: writer.header.write( @@ -99,10 +99,10 @@ f""" #include "nnet.h" """ ) - + # encoder encoder_dense_layers = [ - ('core_encoder.module.dense_1' , 'enc_dense1', 'TANH'), + ('core_encoder.module.dense_1' , 'enc_dense1', 'TANH'), ('core_encoder.module.dense_2' , 'enc_dense3', 'TANH'), ('core_encoder.module.dense_3' , 'enc_dense5', 'TANH'), ('core_encoder.module.dense_4' , 'enc_dense7', 'TANH'), @@ -110,31 +110,31 @@ f""" ('core_encoder.module.state_dense_1' , 'gdense1' , 'TANH'), ('core_encoder.module.state_dense_2' , 'gdense2' , 'TANH') ] - + for name, export_name, activation in encoder_dense_layers: layer = model.get_submodule(name) dump_torch_weights(enc_writer, layer, name=export_name, activation=activation, verbose=True) - - - encoder_gru_layers = [ + + + encoder_gru_layers = [ ('core_encoder.module.gru_1' , 'enc_dense2', 'TANH'), ('core_encoder.module.gru_2' , 'enc_dense4', 'TANH'), ('core_encoder.module.gru_3' , 'enc_dense6', 'TANH') ] - + enc_max_rnn_units = max([dump_torch_weights(enc_writer, model.get_submodule(name), export_name, activation, verbose=True, input_sparse=True, dotp=True) for name, export_name, activation in encoder_gru_layers]) - - - encoder_conv_layers = [ - ('core_encoder.module.conv1' , 'bits_dense' , 'LINEAR') + + + encoder_conv_layers = [ + ('core_encoder.module.conv1' , 'bits_dense' , 'LINEAR') ] - - enc_max_conv_inputs = max([dump_torch_weights(enc_writer, model.get_submodule(name), export_name, activation, verbose=True) for name, export_name, activation in encoder_conv_layers]) - + enc_max_conv_inputs = max([dump_torch_weights(enc_writer, model.get_submodule(name), export_name, activation, verbose=True) for name, export_name, activation in encoder_conv_layers]) + + del enc_writer - + # decoder decoder_dense_layers = [ ('core_decoder.module.gru_1_init' , 'state1', 'TANH'), @@ -151,25 +151,25 @@ f""" for name, export_name, activation in decoder_dense_layers: layer = model.get_submodule(name) dump_torch_weights(dec_writer, layer, name=export_name, activation=activation, verbose=True) - + decoder_gru_layers = [ ('core_decoder.module.gru_1' , 'dec_dense2', 'TANH'), ('core_decoder.module.gru_2' , 'dec_dense4', 'TANH'), ('core_decoder.module.gru_3' , 'dec_dense6', 'TANH') ] - + dec_max_rnn_units = max([dump_torch_weights(dec_writer, model.get_submodule(name), export_name, activation, verbose=True, input_sparse=True, dotp=True) for name, export_name, activation in decoder_gru_layers]) - + del dec_writer - + # statistical model qembedding = model.statistical_model.quant_embedding dump_statistical_model(stats_writer, qembedding) - + del stats_writer - + # constants constants_writer.header.write( f""" @@ -193,12 +193,12 @@ f""" """ ) - + del constants_writer def numpy_export(args, model): - + exchange_name_to_name = { 'encoder_stack_layer1_dense' : 'core_encoder.module.dense_1', 'encoder_stack_layer3_dense' : 'core_encoder.module.dense_2', @@ -225,20 +225,20 @@ def numpy_export(args, model): 'decoder_stack_layer4_gru' : 'core_decoder.module.gru_2', 'decoder_stack_layer6_gru' : 'core_decoder.module.gru_3' } - + name_to_exchange_name = {value : key for key, value in exchange_name_to_name.items()} - + for name, exchange_name in name_to_exchange_name.items(): print(f"printing layer {name}...") dump_torch_weights(os.path.join(args.output_dir, exchange_name), model.get_submodule(name)) if __name__ == "__main__": - - + + os.makedirs(args.output_dir, exist_ok=True) - - + + # load model from checkpoint checkpoint = torch.load(args.checkpoint, map_location='cpu') model = RDOVAE(*checkpoint['model_args'], **checkpoint['model_kwargs']) @@ -249,7 +249,7 @@ if __name__ == "__main__": if len(unmatched_keys) > 0: print(f"warning: the following keys were unmatched {unmatched_keys}") - + if args.format == 'C': c_export(args, model) elif args.format == 'numpy': diff --git a/dnn/torch/rdovae/fec_encoder.py b/dnn/torch/rdovae/fec_encoder.py index 291c0628b..20ab4ac3a 100644 --- a/dnn/torch/rdovae/fec_encoder.py +++ b/dnn/torch/rdovae/fec_encoder.py @@ -84,7 +84,7 @@ total_delay = silk_delay + zero_history + args.extra_delay - dump_data_delay # load signal if args.input.endswith('.raw') or args.input.endswith('.pcm'): signal = np.fromfile(args.input, dtype='int16') - + elif args.input.endswith('.wav'): fs, signal = wavfile.read(args.input) else: @@ -94,7 +94,7 @@ else: padded_signal_length = len(signal) + total_delay tail = padded_signal_length % frame_size right_padding = (frame_size - tail) % frame_size - + signal = np.concatenate((np.zeros(total_delay, dtype=np.int16), signal, np.zeros(right_padding, dtype=np.int16))) padded_signal_file = os.path.splitext(args.input)[0] + '_padded.raw' @@ -152,7 +152,7 @@ with torch.no_grad(): zi = torch.clone(z[:, i - 2 * input_length + 2: i + 1 : 2, :]) zi, rates = model.quantize(zi, quant_ids) zi = model.unquantize(zi, quant_ids) - + features = model.decode(zi, states[:, i : i + 1, :]) packets.append(features.squeeze(0).numpy()) packet_size = 8 * int((torch.sum(rates) + 7 + state_size) / 8) @@ -176,7 +176,7 @@ if args.lossfile != None: count = 2 for i in range(num_packets): if (loss[i] == 0) or (i == num_packets - 1): - + fec_out[ptr:ptr+count,:] = packets[i][foffset:, :] ptr += count @@ -190,14 +190,14 @@ if args.lossfile != None: fec_out_full[:, : fec_out.shape[-1]] = fec_out fec_out_full.tofile(packet_file[:-4] + f'_fec.f32') - - + + if args.debug_output: import itertools batches = [4] offsets = [0, 2 * args.num_redundancy_frames - 4] - + # sanity checks # 1. concatenate features at offset 0 for batch, offset in itertools.product(batches, offsets): @@ -210,4 +210,3 @@ if args.debug_output: print(f"writing debug output {packet_file[:-4] + f'_torch_batch{batch}_offset{offset}.f32'}") test_features_full.tofile(packet_file[:-4] + f'_torch_batch{batch}_offset{offset}.f32') - diff --git a/dnn/torch/rdovae/import_rdovae_weights.py b/dnn/torch/rdovae/import_rdovae_weights.py index eba05018c..c824986d5 100644 --- a/dnn/torch/rdovae/import_rdovae_weights.py +++ b/dnn/torch/rdovae/import_rdovae_weights.py @@ -90,7 +90,7 @@ if __name__ == "__main__": cond_size = args.cond_size cond_size2 = args.cond_size2 state_dim = args.state_dim - + # model checkpoint['model_args'] = (num_features, latent_dim, quant_levels, cond_size, cond_size2) @@ -105,9 +105,9 @@ if __name__ == "__main__": 'encoder_stack_layer8_dense', 'encoder_state_layer1_dense', 'encoder_state_layer2_dense', - 'decoder_state1_dense', - 'decoder_state2_dense', - 'decoder_state3_dense', + 'decoder_state1_dense', + 'decoder_state2_dense', + 'decoder_state3_dense', 'decoder_stack_layer1_dense', 'decoder_stack_layer3_dense', 'decoder_stack_layer5_dense', @@ -122,7 +122,7 @@ if __name__ == "__main__": 'encoder_stack_layer6_gru', 'decoder_stack_layer2_gru', 'decoder_stack_layer4_gru', - 'decoder_stack_layer6_gru' + 'decoder_stack_layer6_gru' ] conv1d_layer_names = [ diff --git a/dnn/torch/rdovae/packets/fec_packets.c b/dnn/torch/rdovae/packets/fec_packets.c index 376fb4f16..ee08ba95c 100644 --- a/dnn/torch/rdovae/packets/fec_packets.c +++ b/dnn/torch/rdovae/packets/fec_packets.c @@ -43,7 +43,7 @@ int get_fec_frame(const char * const filename, float *features, int packet_index long offset; FILE *fid = fopen(filename, "rb"); - + /* read header */ if (fread(&version, sizeof(version), 1, fid) != 1) goto error; if (fread(&header_size, sizeof(header_size), 1, fid) != 1) goto error; @@ -88,7 +88,7 @@ int get_fec_rate(const char * const filename, int packet_index) int16_t rate; FILE *fid = fopen(filename, "rb"); - + /* read header */ if (fread(&version, sizeof(version), 1, fid) != 1) goto error; if (fread(&header_size, sizeof(header_size), 1, fid) != 1) goto error; diff --git a/dnn/torch/rdovae/packets/fec_packets.py b/dnn/torch/rdovae/packets/fec_packets.py index 14bed1f8c..f44c1a957 100644 --- a/dnn/torch/rdovae/packets/fec_packets.py +++ b/dnn/torch/rdovae/packets/fec_packets.py @@ -33,25 +33,25 @@ import numpy as np def write_fec_packets(filename, packets, rates=None): """ writes packets in binary format """ - + assert np.dtype(np.float32).itemsize == 4 assert np.dtype(np.int16).itemsize == 2 - - # derive some sizes + + # derive some sizes num_packets = len(packets) subframes_per_packet = packets[0].shape[-2] num_features = packets[0].shape[-1] - + # size of float is 4 subframe_size = num_features * 4 packet_size = subframe_size * subframes_per_packet + 2 # two bytes for rate - + version = 1 # header size (version, header_size, num_packets, packet_size, subframe_size, subrames_per_packet, num_features) header_size = 14 - + with open(filename, 'wb') as f: - + # header f.write(np.int16(version).tobytes()) f.write(np.int16(header_size).tobytes()) @@ -60,28 +60,28 @@ def write_fec_packets(filename, packets, rates=None): f.write(np.int16(subframe_size).tobytes()) f.write(np.int16(subframes_per_packet).tobytes()) f.write(np.int16(num_features).tobytes()) - + # packets for i, packet in enumerate(packets): if type(rates) == type(None): rate = 0 else: rate = rates[i] - + f.write(np.int16(rate).tobytes()) - + features = np.flip(packet, axis=-2) f.write(features.astype(np.float32).tobytes()) - - + + def read_fec_packets(filename): """ reads packets from binary format """ - + assert np.dtype(np.float32).itemsize == 4 assert np.dtype(np.int16).itemsize == 2 - + with open(filename, 'rb') as f: - + # header version = np.frombuffer(f.read(2), dtype=np.int16).item() header_size = np.frombuffer(f.read(2), dtype=np.int16).item() @@ -90,19 +90,19 @@ def read_fec_packets(filename): subframe_size = np.frombuffer(f.read(2), dtype=np.int16).item() subframes_per_packet = np.frombuffer(f.read(2), dtype=np.int16).item() num_features = np.frombuffer(f.read(2), dtype=np.int16).item() - + dummy_features = np.zeros((subframes_per_packet, num_features), dtype=np.float32) - + # packets rates = [] packets = [] for i in range(num_packets): - + rate = np.frombuffer(f.read(2), dtype=np.int16).item rates.append(rate) - + features = np.reshape(np.frombuffer(f.read(subframe_size * subframes_per_packet), dtype=np.float32), dummy_features.shape) packet = np.flip(features, axis=-2) packets.append(packet) - + return packets \ No newline at end of file diff --git a/dnn/torch/rdovae/rdovae/dataset.py b/dnn/torch/rdovae/rdovae/dataset.py index 99630d8b9..cfb32b05e 100644 --- a/dnn/torch/rdovae/rdovae/dataset.py +++ b/dnn/torch/rdovae/rdovae/dataset.py @@ -40,7 +40,7 @@ class RDOVAEDataset(torch.utils.data.Dataset): lambda_max=0.0135, quant_levels=16, enc_stride=2): - + self.sequence_length = sequence_length self.lambda_min = lambda_min self.lambda_max = lambda_max @@ -50,7 +50,7 @@ class RDOVAEDataset(torch.utils.data.Dataset): if sequence_length % enc_stride: raise ValueError(f"RDOVAEDataset.__init__: enc_stride {enc_stride} does not divide sequence length {sequence_length}") - + self.features = np.reshape(np.fromfile(feature_file, dtype=np.float32), (-1, num_features)) self.features = self.features[:, :num_used_features] self.num_sequences = self.features.shape[0] // sequence_length @@ -65,4 +65,3 @@ class RDOVAEDataset(torch.utils.data.Dataset): rate_lambda = self.lambda_min * np.exp(q_ids.astype(np.float32) / self.denominator).astype(np.float32) return features, rate_lambda, q_ids - diff --git a/dnn/torch/rdovae/rdovae/rdovae.py b/dnn/torch/rdovae/rdovae/rdovae.py index b45d2b8c3..1eec42c17 100644 --- a/dnn/torch/rdovae/rdovae/rdovae.py +++ b/dnn/torch/rdovae/rdovae/rdovae.py @@ -42,7 +42,7 @@ def soft_pvq(x, k): # L2 normalization x_norm2 = x / (1e-15 + torch.norm(x, dim=-1, keepdim=True)) - + with torch.no_grad(): # quantization loop, no need to track gradients here @@ -84,19 +84,19 @@ def cache_parameters(func): return cache[args] else: cache[args] = func(*args) - + return cache[args] return cached_func - + @cache_parameters def pvq_codebook_size(n, k): - + if k == 0: return 1 - + if n == 0: return 0 - + return pvq_codebook_size(n - 1, k) + pvq_codebook_size(n, k - 1) + pvq_codebook_size(n - 1, k - 1) @@ -121,7 +121,7 @@ def hard_rate_estimate(z, r, theta, reduce=True): p0 = 1 - r ** (0.5 + 0.5 * theta) alpha = torch.relu(1 - torch.abs(z_q)) ** 2 rate = - torch.sum( - (alpha * torch.log2(p0 * r ** torch.abs(z_q) + 1e-6) + (alpha * torch.log2(p0 * r ** torch.abs(z_q) + 1e-6) + (1 - alpha) * torch.log2(0.5 * (1 - p0) * (1 - r) * r ** (torch.abs(z_q) - 1) + 1e-6)), dim=-1 ) @@ -154,7 +154,7 @@ def noise_quantize(x): def distortion_loss(y_true, y_pred, rate_lambda=None): """ custom distortion loss for LPCNet features """ - + if y_true.size(-1) != 20: raise ValueError('distortion loss is designed to work with 20 features') @@ -169,7 +169,7 @@ def distortion_loss(y_true, y_pred, rate_lambda=None): loss = loss / torch.sqrt(rate_lambda) loss = torch.mean(loss) - + return loss @@ -181,23 +181,23 @@ import random def random_split(start, stop, num_splits=3, min_len=3): get_min_len = lambda x : min([x[i+1] - x[i] for i in range(len(x) - 1)]) candidate = [start] + sorted([random.randint(start, stop-1) for i in range(num_splits)]) + [stop] - - while get_min_len(candidate) < min_len: + + while get_min_len(candidate) < min_len: candidate = [start] + sorted([random.randint(start, stop-1) for i in range(num_splits)]) + [stop] - + return candidate # weight initialization and clipping def init_weights(module): - + if isinstance(module, nn.GRU): for p in module.named_parameters(): if p[0].startswith('weight_hh_'): nn.init.orthogonal_(p[1]) - + def weight_clip_factory(max_value): """ weight clipping function concerning sum of abs values of adjecent weights """ def clip_weight_(w): @@ -213,13 +213,13 @@ def weight_clip_factory(max_value): 1)) with torch.no_grad(): w[:, :stop] *= factor - + def clip_weights(module): if isinstance(module, nn.GRU) or isinstance(module, nn.Linear): for name, w in module.named_parameters(): if name.startswith('weight'): clip_weight_(w) - + return clip_weights # RDOVAE module and submodules @@ -229,12 +229,12 @@ class CoreEncoder(nn.Module): STATE_HIDDEN = 128 FRAMES_PER_STEP = 2 CONV_KERNEL_SIZE = 4 - + def __init__(self, feature_dim, output_dim, cond_size, cond_size2, state_size=24): """ core encoder for RDOVAE - + Computes latents, initial states, and rate estimates from features and lambda parameter - + """ super(CoreEncoder, self).__init__() @@ -289,7 +289,7 @@ class CoreEncoder(nn.Module): # concatenation of all hidden layer outputs x9 = torch.cat((x1, x2, x3, x4, x5, x6, x7, x8), dim=-1) - + # init state for decoder states = torch.tanh(self.state_dense_1(x9)) states = torch.tanh(self.state_dense_2(states)) @@ -309,9 +309,9 @@ class CoreDecoder(nn.Module): def __init__(self, input_dim, output_dim, cond_size, cond_size2, state_size=24): """ core decoder for RDOVAE - + Computes features from latents, initial state, and quantization index - + """ super(CoreDecoder, self).__init__() @@ -324,7 +324,7 @@ class CoreDecoder(nn.Module): self.state_size = state_size self.input_size = self.input_dim - + self.concat_size = 4 * self.cond_size + 4 * self.cond_size2 # layers @@ -348,7 +348,7 @@ class CoreDecoder(nn.Module): self.apply(init_weights) def forward(self, z, initial_state): - + gru_1_state = torch.tanh(self.gru_1_init(initial_state).permute(1, 0, 2)) gru_2_state = torch.tanh(self.gru_2_init(initial_state).permute(1, 0, 2)) gru_3_state = torch.tanh(self.gru_3_init(initial_state).permute(1, 0, 2)) @@ -374,9 +374,9 @@ class CoreDecoder(nn.Module): class StatisticalModel(nn.Module): def __init__(self, quant_levels, latent_dim): """ Statistical model for latent space - - Computes scaling, deadzone, r, and theta - + + Computes scaling, deadzone, r, and theta + """ super(StatisticalModel, self).__init__() @@ -388,7 +388,7 @@ class StatisticalModel(nn.Module): # quantization embedding self.quant_embedding = nn.Embedding(quant_levels, self.embedding_dim) - + # initialize embedding to 0 with torch.no_grad(): self.quant_embedding.weight[:] = 0 @@ -406,7 +406,7 @@ class StatisticalModel(nn.Module): r_soft = torch.sigmoid(x[..., 3 * self.latent_dim : 4 * self.latent_dim]) theta_hard = torch.sigmoid(x[..., 4 * self.latent_dim : 5 * self.latent_dim]) r_hard = torch.sigmoid(x[..., 5 * self.latent_dim : 6 * self.latent_dim]) - + return { 'quant_embedding' : x, @@ -443,34 +443,34 @@ class RDOVAE(nn.Module): self.state_dim = state_dim self.pvq_num_pulses = pvq_num_pulses self.state_dropout_rate = state_dropout_rate - + # submodules encoder and decoder share the statistical model self.statistical_model = StatisticalModel(quant_levels, latent_dim) self.core_encoder = nn.DataParallel(CoreEncoder(feature_dim, latent_dim, cond_size, cond_size2, state_size=state_dim)) self.core_decoder = nn.DataParallel(CoreDecoder(latent_dim, feature_dim, cond_size, cond_size2, state_size=state_dim)) - + self.enc_stride = CoreEncoder.FRAMES_PER_STEP self.dec_stride = CoreDecoder.FRAMES_PER_STEP - + if clip_weights: self.weight_clip_fn = weight_clip_factory(0.496) else: self.weight_clip_fn = None - + if self.dec_stride % self.enc_stride != 0: raise ValueError(f"get_decoder_chunks_generic: encoder stride does not divide decoder stride") - + def clip_weights(self): if not type(self.weight_clip_fn) == type(None): self.apply(self.weight_clip_fn) - + def get_decoder_chunks(self, z_frames, mode='split', chunks_per_offset = 4): - + enc_stride = self.enc_stride dec_stride = self.dec_stride stride = dec_stride // enc_stride - + chunks = [] for offset in range(stride): @@ -529,7 +529,7 @@ class RDOVAE(nn.Module): z_q = hard_quantize(z) / statistical_model['quant_scale'] z_n = noise_quantize(z) / statistical_model['quant_scale'] states_q = soft_pvq(states, self.pvq_num_pulses) - + if self.state_dropout_rate > 0: drop = torch.rand(states_q.size(0)) < self.state_dropout_rate mask = torch.ones_like(states_q) @@ -552,7 +552,7 @@ class RDOVAE(nn.Module): # decoder with soft quantized input z_dec_reverse = torch.flip(z_n[..., chunk['z_start'] : chunk['z_stop'] : chunk['z_stride'], :], [1]) features_reverse = self.core_decoder(z_dec_reverse, dec_initial_state) - outputs_sq.append((torch.flip(features_reverse, [1]), chunk['features_start'], chunk['features_stop'])) + outputs_sq.append((torch.flip(features_reverse, [1]), chunk['features_start'], chunk['features_stop'])) return { 'outputs_hard_quant' : outputs_hq, @@ -563,24 +563,24 @@ class RDOVAE(nn.Module): def encode(self, features): """ encoder with quantization and rate estimation """ - + z, states = self.core_encoder(features) - + # quantization of initial states - states = soft_pvq(states, self.pvq_num_pulses) + states = soft_pvq(states, self.pvq_num_pulses) state_size = m.log2(pvq_codebook_size(self.state_dim, self.pvq_num_pulses)) - + return z, states, state_size def decode(self, z, initial_state): """ decoder (flips sequences by itself) """ - + z_reverse = torch.flip(z, [1]) features_reverse = self.core_decoder(z_reverse, initial_state) features = torch.flip(features_reverse, [1]) - + return features - + def quantize(self, z, q_ids): """ quantization of latent vectors """ @@ -602,13 +602,12 @@ class RDOVAE(nn.Module): z = zq / stats['quant_scale'] return z - + def freeze_model(self): # freeze all parameters for p in self.parameters(): p.requires_grad = False - + for p in self.statistical_model.parameters(): p.requires_grad = True - diff --git a/dnn/torch/rdovae/train_rdovae.py b/dnn/torch/rdovae/train_rdovae.py index 68ccf2eb0..f29ed98f3 100644 --- a/dnn/torch/rdovae/train_rdovae.py +++ b/dnn/torch/rdovae/train_rdovae.py @@ -89,7 +89,7 @@ adam_eps = 1e-8 checkpoint['batch_size'] = batch_size checkpoint['lr'] = lr -checkpoint['lr_decay_factor'] = lr_decay_factor +checkpoint['lr_decay_factor'] = lr_decay_factor checkpoint['split_mode'] = split_mode checkpoint['epochs'] = epochs checkpoint['sequence_length'] = sequence_length @@ -130,10 +130,10 @@ checkpoint['state_dict'] = model.state_dict() if args.train_decoder_only: if args.initial_checkpoint is None: print("warning: training decoder only without providing initial checkpoint") - + for p in model.core_encoder.module.parameters(): p.requires_grad = False - + for p in model.statistical_model.parameters(): p.requires_grad = False @@ -180,15 +180,15 @@ if __name__ == '__main__': # zero out gradients optimizer.zero_grad() - + # push inputs to device features = features.to(device) q_ids = q_ids.to(device) rate_lambda = rate_lambda.to(device) - + rate_lambda_upsamp = torch.repeat_interleave(rate_lambda, 2, 1) - + # run model model_output = model(features, q_ids) @@ -224,17 +224,17 @@ if __name__ == '__main__': # total loss total_loss = rate_loss + (distortion_loss_hard_quant + distortion_loss_soft_quant) / 2 - + if args.enable_first_frame_loss: total_loss = total_loss + 0.5 * torch.relu(first_frame_loss - distortion_loss_hard_quant) - + total_loss.backward() - + optimizer.step() - + model.clip_weights() - + scheduler.step() # collect running stats diff --git a/dnn/training_tf2/diffembed.py b/dnn/training_tf2/diffembed.py index 64f098e21..e04ae154b 100644 --- a/dnn/training_tf2/diffembed.py +++ b/dnn/training_tf2/diffembed.py @@ -3,7 +3,7 @@ Modification of Tensorflow's Embedding Layer: 1. Not restricted to be the first layer of a model 2. Differentiable (allows non-integer lookups) - For non integer lookup, this layer linearly interpolates between the adjacent embeddings in the following way to preserver gradient flow - - E = (1 - frac(x))*embed(floor(x)) + frac(x)*embed(ceil(x)) + - E = (1 - frac(x))*embed(floor(x)) + frac(x)*embed(ceil(x)) """ import tensorflow as tf @@ -26,13 +26,13 @@ class diff_Embed(Layer): self.pcm_init = pcm_init self.initializer = initializer - def build(self, input_shape): + def build(self, input_shape): w_init = tf.random_normal_initializer() - if self.pcm_init: + if self.pcm_init: w_init = self.initializer self.w = tf.Variable(initial_value=w_init(shape=(self.dict_size, self.units),dtype='float32'),trainable=True) - def call(self, inputs): + def call(self, inputs): alpha = inputs - tf.math.floor(inputs) alpha = tf.expand_dims(alpha,axis = -1) alpha = tf.tile(alpha,[1,1,1,self.units]) diff --git a/dnn/training_tf2/dump_lpcnet.py b/dnn/training_tf2/dump_lpcnet.py index ea63ee8b4..97ce0cedd 100755 --- a/dnn/training_tf2/dump_lpcnet.py +++ b/dnn/training_tf2/dump_lpcnet.py @@ -309,13 +309,13 @@ if __name__ == "__main__": else: hf.write('/* This is *not* an end-to-end model */\n') hf.write('/* #define END2END */\n\n') - + # LPC weighting factor if type(args.lpc_gamma) == type(None): lpc_gamma = get_parameter(model, 'lpc_gamma', 1) else: lpc_gamma = args.lpc_gamma - + hf.write('/* LPC weighting factor */\n') hf.write('#define LPC_GAMMA ' + str(lpc_gamma) +'f\n\n') @@ -376,7 +376,7 @@ if __name__ == "__main__": hf.write('typedef struct {\n') for i, name in enumerate(layer_list): - hf.write(' float {}_state[{}_STATE_SIZE];\n'.format(name, name.upper())) + hf.write(' float {}_state[{}_STATE_SIZE];\n'.format(name, name.upper())) hf.write('} NNetState;\n\n') model_struct.write('} LPCNetModel;\n\n') diff --git a/dnn/training_tf2/dump_plc.py b/dnn/training_tf2/dump_plc.py index 8bd8cfb13..a490ade1d 100755 --- a/dnn/training_tf2/dump_plc.py +++ b/dnn/training_tf2/dump_plc.py @@ -283,7 +283,7 @@ hf.write('#define PLC_MAX_RNN_NEURONS {}\n\n'.format(max_rnn_neurons)) hf.write('typedef struct {\n') for i, name in enumerate(layer_list): - hf.write(' float {}_state[{}_STATE_SIZE];\n'.format(name, name.upper())) + hf.write(' float {}_state[{}_STATE_SIZE];\n'.format(name, name.upper())) hf.write('} PLCNetState;\n\n') model_struct.write('} PLCModel;\n\n') diff --git a/dnn/training_tf2/dump_rdovae.py b/dnn/training_tf2/dump_rdovae.py index 778a68992..1858c8a49 100644 --- a/dnn/training_tf2/dump_rdovae.py +++ b/dnn/training_tf2/dump_rdovae.py @@ -173,7 +173,7 @@ f""" [ dump_conv1d_layer(encoder.get_layer(name), source_fid, header_fid) for name in encoder_conv1d_names - ] + ] ) # dump Dense layers @@ -232,13 +232,13 @@ f""" 'dec_dense7', 'dec_dense8', 'dec_final' - ] + ] decoder_gru_names = [ 'dec_dense2', 'dec_dense4', 'dec_dense6' - ] + ] source_fid = open("dred_rdovae_dec_data.c", 'w') header_fid = open("dred_rdovae_dec_data.h", 'w') diff --git a/dnn/training_tf2/fec_encoder.py b/dnn/training_tf2/fec_encoder.py index 95b7cc7a8..15ef12b2c 100644 --- a/dnn/training_tf2/fec_encoder.py +++ b/dnn/training_tf2/fec_encoder.py @@ -97,7 +97,7 @@ total_delay = silk_delay + zero_history + args.extra_delay - dump_data_delay # load signal if args.input.endswith('.raw') or args.input.endswith('.pcm') or args.input.endswith('.sw'): signal = np.fromfile(args.input, dtype='int16') - + elif args.input.endswith('.wav'): fs, signal = wavfile.read(args.input) else: @@ -107,7 +107,7 @@ else: padded_signal_length = len(signal) + total_delay tail = padded_signal_length % frame_size right_padding = (frame_size - tail) % frame_size - + signal = np.concatenate((np.zeros(total_delay, dtype=np.int16), signal, np.zeros(right_padding, dtype=np.int16))) padded_signal_file = os.path.splitext(args.input)[0] + '_padded.raw' @@ -228,7 +228,7 @@ if args.lossfile != None: fec_out_full[:, :nb_used_features] = fec_out fec_out_full.tofile(packet_file[:-4] + f'_fec.f32') - + #create packets array like in the original version for debugging purposes for i in range(offset, num_frames): @@ -254,4 +254,3 @@ if args.debug_output: print(f"writing debug output {packet_file[:-4] + f'_tf_batch{batch}_offset{offset}.f32'}") test_features_full.tofile(packet_file[:-4] + f'_tf_batch{batch}_offset{offset}.f32') - diff --git a/dnn/training_tf2/fec_packets.c b/dnn/training_tf2/fec_packets.c index 376fb4f16..ee08ba95c 100644 --- a/dnn/training_tf2/fec_packets.c +++ b/dnn/training_tf2/fec_packets.c @@ -43,7 +43,7 @@ int get_fec_frame(const char * const filename, float *features, int packet_index long offset; FILE *fid = fopen(filename, "rb"); - + /* read header */ if (fread(&version, sizeof(version), 1, fid) != 1) goto error; if (fread(&header_size, sizeof(header_size), 1, fid) != 1) goto error; @@ -88,7 +88,7 @@ int get_fec_rate(const char * const filename, int packet_index) int16_t rate; FILE *fid = fopen(filename, "rb"); - + /* read header */ if (fread(&version, sizeof(version), 1, fid) != 1) goto error; if (fread(&header_size, sizeof(header_size), 1, fid) != 1) goto error; diff --git a/dnn/training_tf2/fec_packets.py b/dnn/training_tf2/fec_packets.py index 5cd9201a6..6acbe9d2c 100644 --- a/dnn/training_tf2/fec_packets.py +++ b/dnn/training_tf2/fec_packets.py @@ -33,25 +33,25 @@ import numpy as np def write_fec_packets(filename, packets, rates=None): """ writes packets in binary format """ - + assert np.dtype(np.float32).itemsize == 4 assert np.dtype(np.int16).itemsize == 2 - - # derive some sizes + + # derive some sizes num_packets = len(packets) subframes_per_packet = packets[0].shape[-2] num_features = packets[0].shape[-1] - + # size of float is 4 subframe_size = num_features * 4 packet_size = subframe_size * subframes_per_packet + 2 # two bytes for rate - + version = 1 # header size (version, header_size, num_packets, packet_size, subframe_size, subrames_per_packet, num_features) header_size = 14 - + with open(filename, 'wb') as f: - + # header f.write(np.int16(version).tobytes()) f.write(np.int16(header_size).tobytes()) @@ -60,28 +60,28 @@ def write_fec_packets(filename, packets, rates=None): f.write(np.int16(subframe_size).tobytes()) f.write(np.int16(subframes_per_packet).tobytes()) f.write(np.int16(num_features).tobytes()) - + # packets for i, packet in enumerate(packets): if type(rates) == type(None): rate = 0 else: rate = rates[i] - + f.write(np.int16(rate).tobytes()) - + features = np.flip(packet, axis=-2) f.write(features.astype(np.float32).tobytes()) - - + + def read_fec_packets(filename): """ reads packets from binary format """ - + assert np.dtype(np.float32).itemsize == 4 assert np.dtype(np.int16).itemsize == 2 - + with open(filename, 'rb') as f: - + # header version = np.frombuffer(f.read(2), dtype=np.int16).item() header_size = np.frombuffer(f.read(2), dtype=np.int16).item() @@ -90,19 +90,19 @@ def read_fec_packets(filename): subframe_size = np.frombuffer(f.read(2), dtype=np.int16).item() subframes_per_packet = np.frombuffer(f.read(2), dtype=np.int16).item() num_features = np.frombuffer(f.read(2), dtype=np.int16).item() - + dummy_features = np.zeros((1, subframes_per_packet, num_features), dtype=np.float32) - + # packets rates = [] packets = [] for i in range(num_packets): - + rate = np.frombuffer(f.read(2), dtype=np.int16).item rates.append(rate) - + features = np.reshape(np.frombuffer(f.read(subframe_size * subframes_per_packet), dtype=np.float32), dummy_features.shape) packet = np.flip(features, axis=-2) packets.append(packet) - + return packets \ No newline at end of file diff --git a/dnn/training_tf2/lossfuncs.py b/dnn/training_tf2/lossfuncs.py index eb8317646..78be1fd61 100644 --- a/dnn/training_tf2/lossfuncs.py +++ b/dnn/training_tf2/lossfuncs.py @@ -35,7 +35,7 @@ def interp_mulaw(gamma = 1): alpha = e_gt - tf.math.floor(e_gt) alpha = tf.tile(alpha,[1,1,256]) e_gt = tf.cast(e_gt,'int32') - e_gt = tf.clip_by_value(e_gt,0,254) + e_gt = tf.clip_by_value(e_gt,0,254) interp_probab = (1 - alpha)*model_out + alpha*tf.roll(model_out,shift = -1,axis = -1) sparse_cel = tf.keras.losses.SparseCategoricalCrossentropy(reduction=tf.keras.losses.Reduction.NONE)(e_gt,interp_probab) loss_mod = sparse_cel + prob_compensation + gamma*regularization @@ -51,7 +51,7 @@ def metric_oginterploss(y_true,y_pred): alpha = e_gt - tf.math.floor(e_gt) alpha = tf.tile(alpha,[1,1,256]) e_gt = tf.cast(e_gt,'int32') - e_gt = tf.clip_by_value(e_gt,0,254) + e_gt = tf.clip_by_value(e_gt,0,254) interp_probab = (1 - alpha)*model_out + alpha*tf.roll(model_out,shift = -1,axis = -1) sparse_cel = tf.keras.losses.SparseCategoricalCrossentropy(reduction=tf.keras.losses.Reduction.NONE)(e_gt,interp_probab) loss_mod = sparse_cel + prob_compensation @@ -78,7 +78,7 @@ def metric_cel(y_true, y_pred): e_gt = tf_l2u(y_true - p) e_gt = tf.round(e_gt) e_gt = tf.cast(e_gt,'int32') - e_gt = tf.clip_by_value(e_gt,0,255) + e_gt = tf.clip_by_value(e_gt,0,255) sparse_cel = tf.keras.losses.SparseCategoricalCrossentropy(reduction=tf.keras.losses.Reduction.NONE)(e_gt,model_out) return sparse_cel @@ -97,4 +97,3 @@ def loss_matchlar(): loss_lar_diff = tf.square(loss_lar_diff) return tf.reduce_mean(loss_lar_diff, axis=-1) return loss - diff --git a/dnn/training_tf2/lpcnet.py b/dnn/training_tf2/lpcnet.py index b7cee77f2..497f75722 100644 --- a/dnn/training_tf2/lpcnet.py +++ b/dnn/training_tf2/lpcnet.py @@ -186,7 +186,7 @@ class SparsifyGRUB(Callback): w[0] = p layer.set_weights(w) - + class PCMInit(Initializer): def __init__(self, gain=.1, seed=None): @@ -264,20 +264,20 @@ def new_lpcnet_model(rnn_units1=384, rnn_units2=16, nb_used_features=20, batch_s lpcoeffs = diff_rc2lpc(name = "rc2lpc")(cfeat) else: lpcoeffs = Input(shape=(None, lpc_order), batch_size=batch_size) - + real_preds = diff_pred(name = "real_lpc2preds")([pcm,lpcoeffs]) weighting = lpc_gamma ** np.arange(1, 17).astype('float32') weighted_lpcoeffs = Lambda(lambda x: x[0]*x[1])([lpcoeffs, weighting]) tensor_preds = diff_pred(name = "lpc2preds")([pcm,weighted_lpcoeffs]) past_errors = error_calc([pcm,tensor_preds]) - + embed = diff_Embed(name='embed_sig',initializer = PCMInit()) cpcm = Concatenate()([tf_l2u(pcm),tf_l2u(tensor_preds),past_errors]) cpcm = GaussianNoise(.3)(cpcm) cpcm = Reshape((-1, embed_size*3))(embed(cpcm)) cpcm_decoder = Reshape((-1, embed_size*3))(embed(dpcm)) - + rep = Lambda(lambda x: K.repeat_elements(x, frame_size, 1)) quant = quant_regularizer if quantize else None @@ -305,7 +305,7 @@ def new_lpcnet_model(rnn_units1=384, rnn_units2=16, nb_used_features=20, batch_s rnn2.trainable=False md.trainable=False embed.Trainable=False - + m_out = Concatenate(name='pdf')([tensor_preds,real_preds,ulaw_prob]) if not flag_e2e: model = Model([pcm, feat, pitch, lpcoeffs], m_out) @@ -315,7 +315,7 @@ def new_lpcnet_model(rnn_units1=384, rnn_units2=16, nb_used_features=20, batch_s model.rnn_units2 = rnn_units2 model.nb_used_features = nb_used_features model.frame_size = frame_size - + if not flag_e2e: encoder = Model([feat, pitch], cfeat) dec_rnn_in = Concatenate()([cpcm_decoder, dec_feat]) @@ -330,7 +330,7 @@ def new_lpcnet_model(rnn_units1=384, rnn_units2=16, nb_used_features=20, batch_s decoder = Model([dpcm, dec_feat, dec_state1, dec_state2], [dec_ulaw_prob, state1, state2]) else: decoder = Model([dpcm, dec_feat, dec_state1, dec_state2], [dec_ulaw_prob, state1, state2]) - + # add parameters to model set_parameter(model, 'lpc_gamma', lpc_gamma, dtype='float64') set_parameter(model, 'flag_e2e', flag_e2e, dtype='bool') diff --git a/dnn/training_tf2/lpcnet_plc.py b/dnn/training_tf2/lpcnet_plc.py index 9acea419a..618e0084e 100644 --- a/dnn/training_tf2/lpcnet_plc.py +++ b/dnn/training_tf2/lpcnet_plc.py @@ -88,10 +88,10 @@ def new_lpcnet_plc_model(rnn_units=256, nb_used_features=20, nb_burg_features=36 gru_out1, _ = rnn(cfeat) gru_out1 = GaussianNoise(.005)(gru_out1) gru_out2, _ = rnn2(gru_out1) - + out_dense = Dense(nb_used_features, activation='linear', name='plc_out') plc_out = out_dense(gru_out2) - + model = Model([feat, lost], plc_out) model.rnn_units = rnn_units model.cond_size = cond_size diff --git a/dnn/training_tf2/mdense.py b/dnn/training_tf2/mdense.py index 5679dd290..844ae23e6 100644 --- a/dnn/training_tf2/mdense.py +++ b/dnn/training_tf2/mdense.py @@ -6,7 +6,7 @@ import numpy as np import math class MDense(Layer): - + def __init__(self, outputs, channels=2, activation=None, diff --git a/dnn/training_tf2/parameters.py b/dnn/training_tf2/parameters.py index 34b654801..3621a4e4f 100644 --- a/dnn/training_tf2/parameters.py +++ b/dnn/training_tf2/parameters.py @@ -5,9 +5,9 @@ import tensorflow as tf def set_parameter(model, parameter_name, parameter_value, dtype='float32'): """ stores parameter_value as non-trainable weight with name parameter_name:0 """ - + weights = [weight for weight in model.weights if weight.name == (parameter_name + ":0")] - + if len(weights) == 0: model.add_weight(parameter_name, trainable=False, initializer=tf.keras.initializers.Constant(parameter_value), dtype=dtype) elif len(weights) == 1: @@ -15,14 +15,14 @@ def set_parameter(model, parameter_name, parameter_value, dtype='float32'): else: raise ValueError(f"more than one weight starting with {parameter_name}:0 in model") - + def get_parameter(model, parameter_name, default=None): """ returns parameter value if parameter is present in model and otherwise default """ - + weights = [weight for weight in model.weights if weight.name == (parameter_name + ":0")] - + if len(weights) == 0: - return default + return default elif len(weights) > 1: raise ValueError(f"more than one weight starting with {parameter_name}:0 in model") else: diff --git a/dnn/training_tf2/plc_loader.py b/dnn/training_tf2/plc_loader.py index f9430d107..a9bd41d87 100644 --- a/dnn/training_tf2/plc_loader.py +++ b/dnn/training_tf2/plc_loader.py @@ -56,7 +56,7 @@ class PLCLoader(Sequence): lost_mask = np.tile(lost, (1,1,features.shape[2])) in_features = features*lost_mask in_features[:,:,:self.nb_burg_features] = in_features[:,:,:self.nb_burg_features]*burg_mask - + #For the first frame after a loss, we don't have valid features, but the Burg estimate is valid. #in_features[:,1:,self.nb_burg_features:] = in_features[:,1:,self.nb_burg_features:]*lost_mask[:,:-1,self.nb_burg_features:] out_lost = np.copy(lost) diff --git a/dnn/training_tf2/rdovae.py b/dnn/training_tf2/rdovae.py index 45b3efb01..6240120d1 100644 --- a/dnn/training_tf2/rdovae.py +++ b/dnn/training_tf2/rdovae.py @@ -61,7 +61,7 @@ def soft_quantize(x): #x = 4*x #x = x - (.25/np.math.pi)*tf.math.sin(2*np.math.pi*x) #x = x - (.25/np.math.pi)*tf.math.sin(2*np.math.pi*x) - #x = x - (.25/np.math.pi)*tf.math.sin(2*np.math.pi*x) + #x = x - (.25/np.math.pi)*tf.math.sin(2*np.math.pi*x) return x def noise_quantize(x): @@ -237,7 +237,7 @@ def new_rdovae_decoder(nb_used_features=20, nb_bits=17, bunch=4, nb_quant=40, ba bits_input = Input(shape=(None, nb_bits), batch_size=batch_size, name="dec_bits") gru_state_input = Input(shape=(nb_state_dim,), batch_size=batch_size, name="dec_state") - + gru = CuDNNGRU if training else GRU dec_dense1 = Dense(cond_size2, activation='tanh', kernel_constraint=constraint, name='dec_dense1') dec_dense2 = gru(cond_size, return_sequences=True, kernel_constraint=constraint, recurrent_constraint=constraint, name='dec_dense2') @@ -300,7 +300,7 @@ def tensor_concat(x): y = [] for i in range(n-1): offset = 2 * (n-1-i) - tmp = K.concatenate([x[i][:, offset:, :], x[-1][:, -offset:, :]], axis=-2) + tmp = K.concatenate([x[i][:, offset:, :], x[-1][:, -offset:, :]], axis=-2) y.append(tf.expand_dims(tmp, axis=0)) y.append(tf.expand_dims(x[-1], axis=0)) return Concatenate(axis=0)(y) @@ -335,7 +335,7 @@ def new_rdovae_model(nb_used_features=20, nb_bits=17, bunch=4, nb_quant=40, batc dze = dzone([ze,dead_zone]) ndze = noisequant(dze) dze_quant = hardquant(dze) - + div = Lambda(lambda x: x[0]/x[1]) dze_quant = div([dze_quant,quant_scale]) ndze_unquant = div([ndze,quant_scale]) @@ -355,13 +355,13 @@ def new_rdovae_model(nb_used_features=20, nb_bits=17, bunch=4, nb_quant=40, batc combined_output.append(tmp) tmp = split_decoder([ndze_select, state_select]) - tmp = cat([tmp, lambda_up]) + tmp = cat([tmp, lambda_up]) unquantized_output.append(tmp) concat = Lambda(tensor_concat, name="output") combined_output = concat(combined_output) unquantized_output = concat(unquantized_output) - + e2 = Concatenate(name="hard_bits")([dze, hard_distr_embed, lambda_val]) e = Concatenate(name="soft_bits")([dze, soft_distr_embed, lambda_val]) @@ -370,4 +370,3 @@ def new_rdovae_model(nb_used_features=20, nb_bits=17, bunch=4, nb_quant=40, batc model.nb_used_features = nb_used_features return model, encoder, decoder, qembedding - diff --git a/dnn/training_tf2/rdovae_exchange.py b/dnn/training_tf2/rdovae_exchange.py index ecebf707c..3249677d4 100644 --- a/dnn/training_tf2/rdovae_exchange.py +++ b/dnn/training_tf2/rdovae_exchange.py @@ -113,7 +113,7 @@ if __name__ == "__main__": # qembedding print(f"writing layer {exchange_name['qembedding']}...") dump_tf_weights(os.path.join(args.output, exchange_name['qembedding']), qembedding) - + # decoder decoder_dense_names = [ 'state1', @@ -125,7 +125,7 @@ if __name__ == "__main__": 'dec_dense7', 'dec_dense8', 'dec_final' - ] + ] decoder_gru_names = [ 'dec_dense2', diff --git a/dnn/training_tf2/rdovae_import.py b/dnn/training_tf2/rdovae_import.py index f6aa17591..bc8b460d0 100644 --- a/dnn/training_tf2/rdovae_import.py +++ b/dnn/training_tf2/rdovae_import.py @@ -79,7 +79,7 @@ exchange_name = { if __name__ == "__main__": model, encoder, decoder, qembedding = new_rdovae_model(20, args.latent_dim, cond_size=args.cond_size, nb_quant=args.quant_levels) - + encoder_layers = [ 'enc_dense1', 'enc_dense3', @@ -93,7 +93,7 @@ if __name__ == "__main__": 'enc_dense6', 'bits_dense' ] - + decoder_layers = [ 'state1', 'state2', @@ -108,16 +108,16 @@ if __name__ == "__main__": 'dec_dense4', 'dec_dense6' ] - + for name in encoder_layers: print(f"loading weight for layer {name}...") load_tf_weights(os.path.join(args.input, exchange_name[name]), encoder.get_layer(name)) - + print(f"loading weight for layer qembedding...") load_tf_weights(os.path.join(args.input, exchange_name['qembedding']), qembedding) - + for name in decoder_layers: print(f"loading weight for layer {name}...") load_tf_weights(os.path.join(args.input, exchange_name[name]), decoder.get_layer(name)) - - model.save(args.weights) \ No newline at end of file + + model.save(args.weights) diff --git a/dnn/training_tf2/test_lpcnet.py b/dnn/training_tf2/test_lpcnet.py index fe09016cd..ca551e63c 100755 --- a/dnn/training_tf2/test_lpcnet.py +++ b/dnn/training_tf2/test_lpcnet.py @@ -118,5 +118,3 @@ for c in range(0, nb_frames): #print(mem) np.array([np.round(mem)], dtype='int16').tofile(fout) skip = 0 - - diff --git a/dnn/training_tf2/tf_funcs.py b/dnn/training_tf2/tf_funcs.py index 5e065012c..b86f075cf 100644 --- a/dnn/training_tf2/tf_funcs.py +++ b/dnn/training_tf2/tf_funcs.py @@ -36,12 +36,12 @@ class diff_pred(Layer): rept = Lambda(lambda x: K.repeat_elements(x , frame_size, 1)) zpX = Lambda(lambda x: K.concatenate([0*x[:,0:lpcoeffs_N,:], x],axis = 1)) cX = Lambda(lambda x: K.concatenate([x[:,(lpcoeffs_N - i):(lpcoeffs_N - i + 2400),:] for i in range(lpcoeffs_N)],axis = 2)) - + pred = -Multiply()([rept(lpc),cX(zpX(xt))]) return K.sum(pred,axis = 2,keepdims = True) -# Differentiable Transformations (RC <-> LPC) computed using the Levinson Durbin Recursion +# Differentiable Transformations (RC <-> LPC) computed using the Levinson Durbin Recursion class diff_rc2lpc(Layer): def call(self, inputs, lpcoeffs_N = 16): def pred_lpc_recursive(input): diff --git a/dnn/training_tf2/train_lpcnet.py b/dnn/training_tf2/train_lpcnet.py index bc3f43731..60e2b56f5 100755 --- a/dnn/training_tf2/train_lpcnet.py +++ b/dnn/training_tf2/train_lpcnet.py @@ -134,7 +134,7 @@ strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy() with strategy.scope(): model, _, _ = lpcnet.new_lpcnet_model(rnn_units1=args.grua_size, - rnn_units2=args.grub_size, + rnn_units2=args.grub_size, batch_size=batch_size, training=True, quantize=quantize, flag_e2e=flag_e2e, diff --git a/dnn/vec_neon.h b/dnn/vec_neon.h index b21d38966..aeb216e1b 100644 --- a/dnn/vec_neon.h +++ b/dnn/vec_neon.h @@ -200,14 +200,14 @@ static inline void sgemv_accum16(float *out, const float *weights, int rows, int for (i=0;i<rows;i+=16) { float * restrict y = &out[i]; - + /* keep y[0..15] in registers for duration of inner loop */ - + float32x4_t y0_3 = vld1q_f32(&y[0]); float32x4_t y4_7 = vld1q_f32(&y[4]); float32x4_t y8_11 = vld1q_f32(&y[8]); float32x4_t y12_15 = vld1q_f32(&y[12]); - + for (j=0;j<cols;j++) { const float * restrict w; @@ -219,9 +219,9 @@ static inline void sgemv_accum16(float *out, const float *weights, int rows, int wvec4_7 = vld1q_f32(&w[4]); wvec8_11 = vld1q_f32(&w[8]); wvec12_15 = vld1q_f32(&w[12]); - + xj = vld1q_dup_f32(&x[j]); - + y0_3 = vmlaq_f32(y0_3, wvec0_3, xj); y4_7 = vmlaq_f32(y4_7, wvec4_7, xj); y8_11 = vmlaq_f32(y8_11, wvec8_11, xj); @@ -229,12 +229,12 @@ static inline void sgemv_accum16(float *out, const float *weights, int rows, int } /* save y[0..15] back to memory */ - + vst1q_f32(&y[0], y0_3); vst1q_f32(&y[4], y4_7); vst1q_f32(&y[8], y8_11); vst1q_f32(&y[12], y12_15); - + } } @@ -249,32 +249,32 @@ static inline void sparse_sgemv_accum16(float *out, const float *w, int rows, co y = &out[i]; /* keep y[0..15] in registers for duration of inner loop */ - + float32x4_t y0_3 = vld1q_f32(&y[0]); float32x4_t y4_7 = vld1q_f32(&y[4]); float32x4_t y8_11 = vld1q_f32(&y[8]); float32x4_t y12_15 = vld1q_f32(&y[12]); - + for (j=0;j<cols;j++) { float32x4_t xj= vld1q_dup_f32(&x[*idx++]); float32x4_t wvec; - + wvec = vld1q_f32(&w[0]); y0_3 = vmlaq_f32(y0_3, wvec, xj); wvec = vld1q_f32(&w[4]); y4_7 = vmlaq_f32(y4_7, wvec, xj); wvec = vld1q_f32(&w[8]); y8_11 = vmlaq_f32(y8_11, wvec, xj); wvec = vld1q_f32(&w[12]); y12_15 = vmlaq_f32(y12_15, wvec, xj); - + w += 16; } /* save y[0..15] back to memory */ - + vst1q_f32(&y[0], y0_3); vst1q_f32(&y[4], y4_7); vst1q_f32(&y[8], y8_11); vst1q_f32(&y[12], y12_15); - + } } -- GitLab