diff --git a/dnn/freq.c b/dnn/freq.c index b0977d9d2ddefc3136efc97f3369610a78c7cc68..cb977bad04a887cd7b90789f03d1c60ae9c4cfab 100644 --- a/dnn/freq.c +++ b/dnn/freq.c @@ -271,6 +271,17 @@ float lpc_from_bands(float *lpc, const float *Ex) return e; } +void lpc_weighting(float *lpc, float gamma) +{ + int i; + float gamma_i = gamma; + for (i = 0; i < LPC_ORDER; i++) + { + lpc[i] *= gamma_i; + gamma_i *= gamma; + } +} + float lpc_from_cepstrum(float *lpc, const float *cepstrum) { int i; diff --git a/dnn/freq.h b/dnn/freq.h index 438d48ac31b3a79656746bf4627e5469b53495f0..93d8cf77973b43ca5320165d4bf03e412c277c39 100644 --- a/dnn/freq.h +++ b/dnn/freq.h @@ -56,4 +56,4 @@ void inverse_transform(float *out, const kiss_fft_cpx *in); float lpc_from_bands(float *lpc, const float *Ex); float lpc_from_cepstrum(float *lpc, const float *cepstrum); void apply_window(float *x); - +void lpc_weighting(float *lpc, float gamma); diff --git a/dnn/lpcnet.c b/dnn/lpcnet.c index bd61d6f75c55ee2c02a20e0b926a6cab5952753e..5f8e4bd0f4677e693073a3fb10622c282f19d38d 100644 --- a/dnn/lpcnet.c +++ b/dnn/lpcnet.c @@ -113,6 +113,9 @@ void run_frame_network(LPCNetState *lpcnet, float *gru_a_condition, float *gru_b lpc_from_cepstrum(lpcnet->old_lpc[0], features); #else lpc_from_cepstrum(lpc, features); +#endif +#ifdef LPC_GAMMA + lpc_weighting(lpc, LPC_GAMMA); #endif if (lpcnet->frame_count < 1000) lpcnet->frame_count++; } diff --git a/dnn/lpcnet_private.h b/dnn/lpcnet_private.h index ed3b8b688eb3d9f0964ca73df16f279dc147d95c..3ff0c0f8a8549f6b17148d1075685b97bdce0513 100644 --- a/dnn/lpcnet_private.h +++ b/dnn/lpcnet_private.h @@ -21,8 +21,6 @@ #define FORBIDDEN_INTERP 7 -#define FEATURES_DELAY (FEATURE_CONV1_DELAY + FEATURE_CONV2_DELAY) - struct LPCNetState { NNetState nnet; int last_exc; diff --git a/dnn/training_tf2/dump_lpcnet.py b/dnn/training_tf2/dump_lpcnet.py index d2fd096f531933743a0f63c08bfc7bb2b3c5b6f3..f4e43a8b5aa3e507293d85f4f69c99150735c46d 100755 --- a/dnn/training_tf2/dump_lpcnet.py +++ b/dnn/training_tf2/dump_lpcnet.py @@ -25,6 +25,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ''' +import os import lpcnet import sys import numpy as np @@ -33,12 +34,19 @@ from tensorflow.keras.layers import Layer, GRU, Dense, Conv1D, Embedding from ulaw import ulaw2lin, lin2ulaw from mdense import MDense from diffembed import diff_Embed +from parameters import get_parameter import h5py import re +import argparse + + +# no cuda devices needed +os.environ['CUDA_VISIBLE_DEVICES'] = "" # Flag for dumping e2e (differentiable lpc) network weights flag_e2e = False + max_rnn_neurons = 1 max_conv_inputs = 1 max_mdense_tmp = 1 @@ -245,84 +253,112 @@ def dump_embedding_layer(self, f, hf): Embedding.dump_layer = dump_embedding_layer diff_Embed.dump_layer = dump_embedding_layer -filename = sys.argv[1] -with h5py.File(filename, "r") as f: - units = min(f['model_weights']['gru_a']['gru_a']['recurrent_kernel:0'].shape) - units2 = min(f['model_weights']['gru_b']['gru_b']['recurrent_kernel:0'].shape) - cond_size = min(f['model_weights']['feature_dense1']['feature_dense1']['kernel:0'].shape) - e2e = 'rc2lpc' in f['model_weights'] +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument('model_file', type=str, help='model weight h5 file') + parser.add_argument('--nnet-header', type=str, help='name of c header file for dumped model', default='nnet_data.h') + parser.add_argument('--nnet-source', type=str, help='name of c source file for dumped model', default='nnet_data.c') + parser.add_argument('--lpc-gamma', type=float, help='LPC weighting factor. If not specified I will attempt to read it from the model file with 1 as default', default=None) + parser.add_argument('--lookahead', type=float, help='Features lookahead. If not specified I will attempt to read it from the model file with 2 as default', default=None) + + args = parser.parse_args() -model, _, _ = lpcnet.new_lpcnet_model(rnn_units1=units, rnn_units2=units2, flag_e2e = e2e, cond_size=cond_size) -model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy']) -#model.summary() + filename = args.model_file + with h5py.File(filename, "r") as f: + units = min(f['model_weights']['gru_a']['gru_a']['recurrent_kernel:0'].shape) + units2 = min(f['model_weights']['gru_b']['gru_b']['recurrent_kernel:0'].shape) + cond_size = min(f['model_weights']['feature_dense1']['feature_dense1']['kernel:0'].shape) + e2e = 'rc2lpc' in f['model_weights'] -model.load_weights(filename, by_name=True) + model, _, _ = lpcnet.new_lpcnet_model(rnn_units1=units, rnn_units2=units2, flag_e2e = e2e, cond_size=cond_size) + model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy']) + #model.summary() -if len(sys.argv) > 2: - cfile = sys.argv[2]; - hfile = sys.argv[3]; -else: - cfile = 'nnet_data.c' - hfile = 'nnet_data.h' + print(get_parameter(model, 'lpc_gamma')) + model.load_weights(filename, by_name=True) + + print(get_parameter(model, 'lpc_gamma')) -f = open(cfile, 'w') -hf = open(hfile, 'w') + cfile = args.nnet_source + hfile = args.nnet_header + f = open(cfile, 'w') + hf = open(hfile, 'w') -f.write('/*This file is automatically generated from a Keras model*/\n') -f.write('/*based on model {}*/\n\n'.format(sys.argv[1])) -f.write('#ifdef HAVE_CONFIG_H\n#include "config.h"\n#endif\n\n#include "nnet.h"\n#include "{}"\n\n'.format(hfile)) + f.write('/*This file is automatically generated from a Keras model*/\n') + f.write('/*based on model {}*/\n\n'.format(sys.argv[1])) + f.write('#ifdef HAVE_CONFIG_H\n#include "config.h"\n#endif\n\n#include "nnet.h"\n#include "{}"\n\n'.format(hfile)) -hf.write('/*This file is automatically generated from a Keras model*/\n\n') -hf.write('#ifndef RNN_DATA_H\n#define RNN_DATA_H\n\n#include "nnet.h"\n\n') + hf.write('/*This file is automatically generated from a Keras model*/\n\n') + hf.write('#ifndef RNN_DATA_H\n#define RNN_DATA_H\n\n#include "nnet.h"\n\n') + + if e2e: + hf.write('/* This is an end-to-end model */\n') + hf.write('#define END2END\n\n') + else: + hf.write('/* This is *not* an end-to-end model */\n') + hf.write('/* #define END2END */\n\n') + + print([weight.name for weight in model.weights]) + + # LPC weighting factor + if type(args.lpc_gamma) == type(None): + lpc_gamma = get_parameter(model, 'lpc_gamma', 1) + else: + lpc_gamma = args.lpc_gamma + + hf.write('/* LPC weighting factor */\n') + hf.write('#define LPC_GAMMA ' + str(lpc_gamma) +'f\n\n') + + # look-ahead + if type(args.lookahead) == type(None): + lookahead = get_parameter(model, 'lookahead', 2) + else: + lookahead = args.lookahead -if e2e: - hf.write('/* This is an end-to-end model */\n') - hf.write('#define END2END\n\n') -else: - hf.write('/* This is *not* an end-to-end model */\n') - hf.write('/* #define END2END */\n\n') + hf.write('/* Features look-ahead */\n') + hf.write('#define FEATURES_DELAY ' + str(lookahead) +'\n\n') -embed_size = lpcnet.embed_size + embed_size = lpcnet.embed_size -E = model.get_layer('embed_sig').get_weights()[0] -W = model.get_layer('gru_a').get_weights()[0][:embed_size,:] -dump_embedding_layer_impl('gru_a_embed_sig', np.dot(E, W), f, hf) -W = model.get_layer('gru_a').get_weights()[0][embed_size:2*embed_size,:] -dump_embedding_layer_impl('gru_a_embed_pred', np.dot(E, W), f, hf) -W = model.get_layer('gru_a').get_weights()[0][2*embed_size:3*embed_size,:] -dump_embedding_layer_impl('gru_a_embed_exc', np.dot(E, W), f, hf) -W = model.get_layer('gru_a').get_weights()[0][3*embed_size:,:] -#FIXME: dump only half the biases -b = model.get_layer('gru_a').get_weights()[2] -dump_dense_layer_impl('gru_a_dense_feature', W, b, 'LINEAR', f, hf) + E = model.get_layer('embed_sig').get_weights()[0] + W = model.get_layer('gru_a').get_weights()[0][:embed_size,:] + dump_embedding_layer_impl('gru_a_embed_sig', np.dot(E, W), f, hf) + W = model.get_layer('gru_a').get_weights()[0][embed_size:2*embed_size,:] + dump_embedding_layer_impl('gru_a_embed_pred', np.dot(E, W), f, hf) + W = model.get_layer('gru_a').get_weights()[0][2*embed_size:3*embed_size,:] + dump_embedding_layer_impl('gru_a_embed_exc', np.dot(E, W), f, hf) + W = model.get_layer('gru_a').get_weights()[0][3*embed_size:,:] + #FIXME: dump only half the biases + b = model.get_layer('gru_a').get_weights()[2] + dump_dense_layer_impl('gru_a_dense_feature', W, b, 'LINEAR', f, hf) -W = model.get_layer('gru_b').get_weights()[0][model.rnn_units1:,:] -b = model.get_layer('gru_b').get_weights()[2] -# Set biases to zero because they'll be included in the GRU input part -# (we need regular and SU biases) -dump_dense_layer_impl('gru_b_dense_feature', W, 0*b, 'LINEAR', f, hf) -dump_grub(model.get_layer('gru_b'), f, hf, model.rnn_units1) + W = model.get_layer('gru_b').get_weights()[0][model.rnn_units1:,:] + b = model.get_layer('gru_b').get_weights()[2] + # Set biases to zero because they'll be included in the GRU input part + # (we need regular and SU biases) + dump_dense_layer_impl('gru_b_dense_feature', W, 0*b, 'LINEAR', f, hf) + dump_grub(model.get_layer('gru_b'), f, hf, model.rnn_units1) -layer_list = [] -for i, layer in enumerate(model.layers): - if layer.dump_layer(f, hf): - layer_list.append(layer.name) + layer_list = [] + for i, layer in enumerate(model.layers): + if layer.dump_layer(f, hf): + layer_list.append(layer.name) -dump_sparse_gru(model.get_layer('gru_a'), f, hf) + dump_sparse_gru(model.get_layer('gru_a'), f, hf) -hf.write('#define MAX_RNN_NEURONS {}\n\n'.format(max_rnn_neurons)) -hf.write('#define MAX_CONV_INPUTS {}\n\n'.format(max_conv_inputs)) -hf.write('#define MAX_MDENSE_TMP {}\n\n'.format(max_mdense_tmp)) + hf.write('#define MAX_RNN_NEURONS {}\n\n'.format(max_rnn_neurons)) + hf.write('#define MAX_CONV_INPUTS {}\n\n'.format(max_conv_inputs)) + hf.write('#define MAX_MDENSE_TMP {}\n\n'.format(max_mdense_tmp)) -hf.write('typedef struct {\n') -for i, name in enumerate(layer_list): - hf.write(' float {}_state[{}_STATE_SIZE];\n'.format(name, name.upper())) -hf.write('} NNetState;\n') + hf.write('typedef struct {\n') + for i, name in enumerate(layer_list): + hf.write(' float {}_state[{}_STATE_SIZE];\n'.format(name, name.upper())) + hf.write('} NNetState;\n') -hf.write('\n\n#endif\n') + hf.write('\n\n#endif\n') -f.close() -hf.close() + f.close() + hf.close() diff --git a/dnn/training_tf2/lossfuncs.py b/dnn/training_tf2/lossfuncs.py index fae285b2ac45ba228b749835f2afed2b0dc387db..eb831764654ba1c8aacacc364766c21cb4a68839 100644 --- a/dnn/training_tf2/lossfuncs.py +++ b/dnn/training_tf2/lossfuncs.py @@ -11,7 +11,7 @@ import tensorflow as tf def res_from_sigloss(): def loss(y_true,y_pred): p = y_pred[:,:,0:1] - model_out = y_pred[:,:,1:] + model_out = y_pred[:,:,2:] e_gt = tf_l2u(y_true - p) e_gt = tf.round(e_gt) e_gt = tf.cast(e_gt,'int32') @@ -26,23 +26,26 @@ def interp_mulaw(gamma = 1): def loss(y_true,y_pred): y_true = tf.cast(y_true, 'float32') p = y_pred[:,:,0:1] - model_out = y_pred[:,:,1:] + real_p = y_pred[:,:,1:2] + model_out = y_pred[:,:,2:] e_gt = tf_l2u(y_true - p) + exc_gt = tf_l2u(y_true - real_p) prob_compensation = tf.squeeze((K.abs(e_gt - 128)/128.0)*K.log(256.0)) + regularization = tf.squeeze((K.abs(exc_gt - 128)/128.0)*K.log(256.0)) alpha = e_gt - tf.math.floor(e_gt) alpha = tf.tile(alpha,[1,1,256]) e_gt = tf.cast(e_gt,'int32') e_gt = tf.clip_by_value(e_gt,0,254) interp_probab = (1 - alpha)*model_out + alpha*tf.roll(model_out,shift = -1,axis = -1) sparse_cel = tf.keras.losses.SparseCategoricalCrossentropy(reduction=tf.keras.losses.Reduction.NONE)(e_gt,interp_probab) - loss_mod = sparse_cel + gamma*prob_compensation + loss_mod = sparse_cel + prob_compensation + gamma*regularization return loss_mod return loss # Same as above, except a metric def metric_oginterploss(y_true,y_pred): p = y_pred[:,:,0:1] - model_out = y_pred[:,:,1:] + model_out = y_pred[:,:,2:] e_gt = tf_l2u(y_true - p) prob_compensation = tf.squeeze((K.abs(e_gt - 128)/128.0)*K.log(256.0)) alpha = e_gt - tf.math.floor(e_gt) @@ -57,7 +60,7 @@ def metric_oginterploss(y_true,y_pred): # Interpolated cross entropy loss metric def metric_icel(y_true, y_pred): p = y_pred[:,:,0:1] - model_out = y_pred[:,:,1:] + model_out = y_pred[:,:,2:] e_gt = tf_l2u(y_true - p) alpha = e_gt - tf.math.floor(e_gt) alpha = tf.tile(alpha,[1,1,256]) @@ -71,7 +74,7 @@ def metric_icel(y_true, y_pred): def metric_cel(y_true, y_pred): y_true = tf.cast(y_true, 'float32') p = y_pred[:,:,0:1] - model_out = y_pred[:,:,1:] + model_out = y_pred[:,:,2:] e_gt = tf_l2u(y_true - p) e_gt = tf.round(e_gt) e_gt = tf.cast(e_gt,'int32') diff --git a/dnn/training_tf2/lpcnet.py b/dnn/training_tf2/lpcnet.py index 3ab4599020b77137372ad9c805fc66c974cb9788..3f45bb9e1a205d4996a1c8242c446b5ef4753aa5 100644 --- a/dnn/training_tf2/lpcnet.py +++ b/dnn/training_tf2/lpcnet.py @@ -40,6 +40,7 @@ import h5py import sys from tf_funcs import * from diffembed import diff_Embed +from parameters import set_parameter frame_size = 160 pcm_bits = 8 @@ -230,7 +231,7 @@ class WeightClip(Constraint): constraint = WeightClip(0.992) -def new_lpcnet_model(rnn_units1=384, rnn_units2=16, nb_used_features=20, batch_size=128, training=False, adaptation=False, quantize=False, flag_e2e = False, cond_size=128, lpc_order=16): +def new_lpcnet_model(rnn_units1=384, rnn_units2=16, nb_used_features=20, batch_size=128, training=False, adaptation=False, quantize=False, flag_e2e = False, cond_size=128, lpc_order=16, lpc_gamma=1., lookahead=2): pcm = Input(shape=(None, 1), batch_size=batch_size) dpcm = Input(shape=(None, 3), batch_size=batch_size) feat = Input(shape=(None, nb_used_features), batch_size=batch_size) @@ -240,14 +241,14 @@ def new_lpcnet_model(rnn_units1=384, rnn_units2=16, nb_used_features=20, batch_s dec_state2 = Input(shape=(rnn_units2,)) padding = 'valid' if training else 'same' - fconv1 = Conv1D(cond_size, 3, padding=padding, activation='tanh', name='feature_conv1') - fconv2 = Conv1D(cond_size, 3, padding=padding, activation='tanh', name='feature_conv2') + fconv1 = Conv1D(cond_size, 3, padding=padding, activation='swish', name='feature_conv1') + fconv2 = Conv1D(cond_size, 3, padding=padding, activation='swish', name='feature_conv2') pembed = Embedding(256, 64, name='embed_pitch') cat_feat = Concatenate()([feat, Reshape((-1, 64))(pembed(pitch))]) cfeat = fconv2(fconv1(cat_feat)) - fdense1 = Dense(cond_size, activation='tanh', name='feature_dense1') + fdense1 = Dense(cond_size, activation='swish', name='feature_dense1') fdense2 = Dense(cond_size, activation='tanh', name='feature_dense2') if flag_e2e and quantize: @@ -263,8 +264,13 @@ def new_lpcnet_model(rnn_units1=384, rnn_units2=16, nb_used_features=20, batch_s lpcoeffs = diff_rc2lpc(name = "rc2lpc")(cfeat) else: lpcoeffs = Input(shape=(None, lpc_order), batch_size=batch_size) - tensor_preds = diff_pred(name = "lpc2preds")([pcm,lpcoeffs]) + + real_preds = diff_pred(name = "real_lpc2preds")([pcm,lpcoeffs]) + weighting = lpc_gamma ** np.arange(1, 17).astype('float32') + weighted_lpcoeffs = Lambda(lambda x: x[0]*x[1])([lpcoeffs, weighting]) + tensor_preds = diff_pred(name = "lpc2preds")([pcm,weighted_lpcoeffs]) past_errors = error_calc([pcm,tensor_preds]) + embed = diff_Embed(name='embed_sig',initializer = PCMInit()) cpcm = Concatenate()([tf_l2u(pcm),tf_l2u(tensor_preds),past_errors]) cpcm = GaussianNoise(.3)(cpcm) @@ -300,7 +306,7 @@ def new_lpcnet_model(rnn_units1=384, rnn_units2=16, nb_used_features=20, batch_s md.trainable=False embed.Trainable=False - m_out = Concatenate(name='pdf')([tensor_preds,ulaw_prob]) + m_out = Concatenate(name='pdf')([tensor_preds,real_preds,ulaw_prob]) if not flag_e2e: model = Model([pcm, feat, pitch, lpcoeffs], m_out) else: @@ -324,4 +330,10 @@ def new_lpcnet_model(rnn_units1=384, rnn_units2=16, nb_used_features=20, batch_s decoder = Model([dpcm, dec_feat, dec_state1, dec_state2], [dec_ulaw_prob, state1, state2]) else: decoder = Model([dpcm, dec_feat, dec_state1, dec_state2], [dec_ulaw_prob, state1, state2]) + + # add parameters to model + set_parameter(model, 'lpc_gamma', lpc_gamma, dtype='float64') + set_parameter(model, 'flag_e2e', flag_e2e, dtype='bool') + set_parameter(model, 'lookahead', lookahead, dtype='int32') + return model, encoder, decoder diff --git a/dnn/training_tf2/parameters.py b/dnn/training_tf2/parameters.py new file mode 100644 index 0000000000000000000000000000000000000000..34b654801bd6beb5adb9cbec448411086cd62056 --- /dev/null +++ b/dnn/training_tf2/parameters.py @@ -0,0 +1,29 @@ +""" module for handling extra model parameters for tf.keras models """ + +import tensorflow as tf + + +def set_parameter(model, parameter_name, parameter_value, dtype='float32'): + """ stores parameter_value as non-trainable weight with name parameter_name:0 """ + + weights = [weight for weight in model.weights if weight.name == (parameter_name + ":0")] + + if len(weights) == 0: + model.add_weight(parameter_name, trainable=False, initializer=tf.keras.initializers.Constant(parameter_value), dtype=dtype) + elif len(weights) == 1: + weights[0].assign(parameter_value) + else: + raise ValueError(f"more than one weight starting with {parameter_name}:0 in model") + + +def get_parameter(model, parameter_name, default=None): + """ returns parameter value if parameter is present in model and otherwise default """ + + weights = [weight for weight in model.weights if weight.name == (parameter_name + ":0")] + + if len(weights) == 0: + return default + elif len(weights) > 1: + raise ValueError(f"more than one weight starting with {parameter_name}:0 in model") + else: + return weights[0].numpy().item() diff --git a/dnn/training_tf2/test_lpcnet.py b/dnn/training_tf2/test_lpcnet.py index e1fdf59df24e74f06c6d04e3899b1419573df212..fe09016cd07578f2be9f6f7753875fd59af18bac 100755 --- a/dnn/training_tf2/test_lpcnet.py +++ b/dnn/training_tf2/test_lpcnet.py @@ -24,14 +24,23 @@ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ''' - -import lpcnet +import argparse import sys + +import h5py import numpy as np + +import lpcnet from ulaw import ulaw2lin, lin2ulaw -import h5py -filename = sys.argv[1] + +parser = argparse.ArgumentParser() +parser.add_argument('model-file', type=str, help='model weight h5 file') +parser.add_argument('--lpc-gamma', type=float, help='LPC weighting factor. WARNING: giving an inconsistent value here will severely degrade performance', default=1) + +args = parser.parse_args() + +filename = args.model_file with h5py.File(filename, "r") as f: units = min(f['model_weights']['gru_a']['gru_a']['recurrent_kernel:0'].shape) units2 = min(f['model_weights']['gru_b']['gru_b']['recurrent_kernel:0'].shape) @@ -74,6 +83,8 @@ state2 = np.zeros((1, model.rnn_units2), dtype='float32') mem = 0 coef = 0.85 +lpc_weights = np.array([args.lpc_gamma ** (i + 1) for i in range(16)]) + fout = open(out_file, 'wb') skip = order + 1 @@ -85,7 +96,7 @@ for c in range(0, nb_frames): for fr in range(0, feature_chunk_size): f = c*feature_chunk_size + fr if not e2e: - a = features[c, fr, nb_features-order:] + a = features[c, fr, nb_features-order:] * lpc_weights else: a = lpcs[c,fr] for i in range(skip, frame_size): diff --git a/dnn/training_tf2/train_lpcnet.py b/dnn/training_tf2/train_lpcnet.py index b6802fc726159c65c299abf88440ca35fbe87a97..4551da8302c555aa929f185fb972f820ed9e8dff 100755 --- a/dnn/training_tf2/train_lpcnet.py +++ b/dnn/training_tf2/train_lpcnet.py @@ -28,6 +28,8 @@ # Train an LPCNet model import argparse +import os + from dataloader import LPCNetLoader parser = argparse.ArgumentParser(description='Train an LPCNet model') @@ -54,10 +56,15 @@ parser.add_argument('--decay', metavar='<decay>', type=float, help='learning rat parser.add_argument('--gamma', metavar='<gamma>', type=float, help='adjust u-law compensation (default 2.0, should not be less than 1.0)') parser.add_argument('--lookahead', metavar='<nb frames>', default=2, type=int, help='Number of look-ahead frames (default 2)') parser.add_argument('--logdir', metavar='<log dir>', help='directory for tensorboard log files') - +parser.add_argument('--lpc-gamma', type=float, default=1, help='gamma for LPC weighting') +parser.add_argument('--cuda-devices', metavar='<cuda devices>', type=str, default=None, help='string with comma separated cuda device ids') args = parser.parse_args() +# set visible cuda devices +if args.cuda_devices != None: + os.environ['CUDA_VISIBLE_DEVICES'] = args.cuda_devices + density = (0.05, 0.05, 0.2) if args.density_split is not None: density = args.density_split @@ -109,7 +116,7 @@ if quantize: input_model = args.quantize else: lr = 0.001 - decay = 2.5e-5 + decay = 5e-5 if args.lr is not None: lr = args.lr @@ -122,11 +129,19 @@ if retrain: flag_e2e = args.flag_e2e -opt = Adam(lr, decay=decay, beta_2=0.99) +opt = Adam(lr, decay=decay, beta_1=0.5, beta_2=0.8) strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy() with strategy.scope(): - model, _, _ = lpcnet.new_lpcnet_model(rnn_units1=args.grua_size, rnn_units2=args.grub_size, batch_size=batch_size, training=True, quantize=quantize, flag_e2e = flag_e2e, cond_size=args.cond_size) + model, _, _ = lpcnet.new_lpcnet_model(rnn_units1=args.grua_size, + rnn_units2=args.grub_size, + batch_size=batch_size, training=True, + quantize=quantize, + flag_e2e=flag_e2e, + cond_size=args.cond_size, + lpc_gamma=args.lpc_gamma, + lookahead=args.lookahead + ) if not flag_e2e: model.compile(optimizer=opt, loss=metric_cel, metrics=metric_cel) else: @@ -183,7 +198,7 @@ if quantize or retrain: grub_sparsify = lpcnet.SparsifyGRUB(0, 0, 1, args.grua_size, grub_density) else: #Training from scratch - sparsify = lpcnet.Sparsify(2000, 40000, 400, density) + sparsify = lpcnet.Sparsify(2000, 20000, 400, density) grub_sparsify = lpcnet.SparsifyGRUB(2000, 40000, 400, args.grua_size, grub_density) model.save_weights('{}_{}_initial.h5'.format(args.output, args.grua_size))