diff --git a/dnn/lpcnet_enc.c b/dnn/lpcnet_enc.c index 3e50ed9fc4a8d21cbe6a6cc5c7e6c71a0e274c22..ed979ce1582f2ea2f0e7de2ec2a4d6fa987ad2e7 100644 --- a/dnn/lpcnet_enc.c +++ b/dnn/lpcnet_enc.c @@ -590,7 +590,7 @@ void process_superframe(LPCNetEncState *st, unsigned char *buf, FILE *ffeat, int max_prev = st->pitch_max_path_all - 6.f; pitch_prev[sub][i] = st->best_i; for (j=IMIN(0, 4-i);j<=4 && i+j<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;j++) { - if (st->pitch_max_path[0][i+j] > max_prev) { + if (st->pitch_max_path[0][i+j] - .02f*abs(j)*abs(j) > max_prev) { max_prev = st->pitch_max_path[0][i+j] - .02f*abs(j)*abs(j); pitch_prev[sub][i] = i+j; } @@ -662,10 +662,11 @@ void process_superframe(LPCNetEncState *st, unsigned char *buf, FILE *ffeat, int if (quantize) { float p = pow(2.f, main_pitch/21.)*PITCH_MIN_PERIOD; p *= 1 + modulation/16./7.*(2*sub-3); + p = MIN16(255, MAX16(32, p)); st->features[sub][2*NB_BANDS] = .02*(p-100); st->features[sub][2*NB_BANDS + 1] = frame_corr-.5; } else { - st->features[sub][2*NB_BANDS] = .01*(best[2+2*sub]+best[2+2*sub+1]-200); + st->features[sub][2*NB_BANDS] = .01*(IMAX(64, IMIN(510, best[2+2*sub]+best[2+2*sub+1]))-200); st->features[sub][2*NB_BANDS + 1] = frame_corr-.5; } //printf("%f %d %f\n", st->features[sub][2*NB_BANDS], best[2+2*sub], frame_corr); diff --git a/dnn/training_tf2/dump_lpcnet.py b/dnn/training_tf2/dump_lpcnet.py new file mode 100755 index 0000000000000000000000000000000000000000..c42d21078c6a0456862b572ea4e5e0b2c69da96e --- /dev/null +++ b/dnn/training_tf2/dump_lpcnet.py @@ -0,0 +1,267 @@ +#!/usr/bin/python3 +'''Copyright (c) 2017-2018 Mozilla + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +''' + +import lpcnet +import sys +import numpy as np +from tensorflow.keras.optimizers import Adam +from tensorflow.keras.layers import Layer, GRU, Dense, Conv1D, Embedding +from ulaw import ulaw2lin, lin2ulaw +from mdense import MDense +import h5py +import re + +max_rnn_neurons = 1 +max_conv_inputs = 1 +max_mdense_tmp = 1 + +def printVector(f, vector, name, dtype='float'): + v = np.reshape(vector, (-1)); + #print('static const float ', name, '[', len(v), '] = \n', file=f) + f.write('static const {} {}[{}] = {{\n '.format(dtype, name, len(v))) + for i in range(0, len(v)): + f.write('{}'.format(v[i])) + if (i!=len(v)-1): + f.write(',') + else: + break; + if (i%8==7): + f.write("\n ") + else: + f.write(" ") + #print(v, file=f) + f.write('\n};\n\n') + return; + +def printSparseVector(f, A, name): + N = A.shape[0] + W = np.zeros((0,)) + diag = np.concatenate([np.diag(A[:,:N]), np.diag(A[:,N:2*N]), np.diag(A[:,2*N:])]) + A[:,:N] = A[:,:N] - np.diag(np.diag(A[:,:N])) + A[:,N:2*N] = A[:,N:2*N] - np.diag(np.diag(A[:,N:2*N])) + A[:,2*N:] = A[:,2*N:] - np.diag(np.diag(A[:,2*N:])) + printVector(f, diag, name + '_diag') + idx = np.zeros((0,), dtype='int') + for i in range(3*N//16): + pos = idx.shape[0] + idx = np.append(idx, -1) + nb_nonzero = 0 + for j in range(N): + if np.sum(np.abs(A[j, i*16:(i+1)*16])) > 1e-10: + nb_nonzero = nb_nonzero + 1 + idx = np.append(idx, j) + W = np.concatenate([W, A[j, i*16:(i+1)*16]]) + idx[pos] = nb_nonzero + printVector(f, W, name) + #idx = np.tile(np.concatenate([np.array([N]), np.arange(N)]), 3*N//16) + printVector(f, idx, name + '_idx', dtype='int') + return; + +def dump_layer_ignore(self, f, hf): + print("ignoring layer " + self.name + " of type " + self.__class__.__name__) + return False +Layer.dump_layer = dump_layer_ignore + +def dump_sparse_gru(self, f, hf): + global max_rnn_neurons + name = 'sparse_' + self.name + print("printing layer " + name + " of type sparse " + self.__class__.__name__) + weights = self.get_weights() + printSparseVector(f, weights[1], name + '_recurrent_weights') + printVector(f, weights[-1], name + '_bias') + if hasattr(self, 'activation'): + activation = self.activation.__name__.upper() + else: + activation = 'TANH' + if hasattr(self, 'reset_after') and not self.reset_after: + reset_after = 0 + else: + reset_after = 1 + neurons = weights[0].shape[1]//3 + max_rnn_neurons = max(max_rnn_neurons, neurons) + f.write('const SparseGRULayer {} = {{\n {}_bias,\n {}_recurrent_weights_diag,\n {}_recurrent_weights,\n {}_recurrent_weights_idx,\n {}, ACTIVATION_{}, {}\n}};\n\n' + .format(name, name, name, name, name, weights[0].shape[1]//3, activation, reset_after)) + hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3)) + hf.write('#define {}_STATE_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3)) + hf.write('extern const SparseGRULayer {};\n\n'.format(name)); + return True + +def dump_gru_layer(self, f, hf): + global max_rnn_neurons + name = self.name + print("printing layer " + name + " of type " + self.__class__.__name__) + weights = self.get_weights() + printVector(f, weights[0], name + '_weights') + printVector(f, weights[1], name + '_recurrent_weights') + printVector(f, weights[-1], name + '_bias') + if hasattr(self, 'activation'): + activation = self.activation.__name__.upper() + else: + activation = 'TANH' + if hasattr(self, 'reset_after') and not self.reset_after: + reset_after = 0 + else: + reset_after = 1 + neurons = weights[0].shape[1]//3 + max_rnn_neurons = max(max_rnn_neurons, neurons) + f.write('const GRULayer {} = {{\n {}_bias,\n {}_weights,\n {}_recurrent_weights,\n {}, {}, ACTIVATION_{}, {}\n}};\n\n' + .format(name, name, name, name, weights[0].shape[0], weights[0].shape[1]//3, activation, reset_after)) + hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3)) + hf.write('#define {}_STATE_SIZE {}\n'.format(name.upper(), weights[0].shape[1]//3)) + hf.write('extern const GRULayer {};\n\n'.format(name)); + return True +GRU.dump_layer = dump_gru_layer + +def dump_dense_layer_impl(name, weights, bias, activation, f, hf): + printVector(f, weights, name + '_weights') + printVector(f, bias, name + '_bias') + f.write('const DenseLayer {} = {{\n {}_bias,\n {}_weights,\n {}, {}, ACTIVATION_{}\n}};\n\n' + .format(name, name, name, weights.shape[0], weights.shape[1], activation)) + hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights.shape[1])) + hf.write('extern const DenseLayer {};\n\n'.format(name)); + +def dump_dense_layer(self, f, hf): + name = self.name + print("printing layer " + name + " of type " + self.__class__.__name__) + weights = self.get_weights() + activation = self.activation.__name__.upper() + dump_dense_layer_impl(name, weights[0], weights[1], activation, f, hf) + return False + +Dense.dump_layer = dump_dense_layer + +def dump_mdense_layer(self, f, hf): + global max_mdense_tmp + name = self.name + print("printing layer " + name + " of type " + self.__class__.__name__) + weights = self.get_weights() + printVector(f, np.transpose(weights[0], (1, 2, 0)), name + '_weights') + printVector(f, np.transpose(weights[1], (1, 0)), name + '_bias') + printVector(f, np.transpose(weights[2], (1, 0)), name + '_factor') + activation = self.activation.__name__.upper() + max_mdense_tmp = max(max_mdense_tmp, weights[0].shape[0]*weights[0].shape[2]) + f.write('const MDenseLayer {} = {{\n {}_bias,\n {}_weights,\n {}_factor,\n {}, {}, {}, ACTIVATION_{}\n}};\n\n' + .format(name, name, name, name, weights[0].shape[1], weights[0].shape[0], weights[0].shape[2], activation)) + hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[0])) + hf.write('extern const MDenseLayer {};\n\n'.format(name)); + return False +MDense.dump_layer = dump_mdense_layer + +def dump_conv1d_layer(self, f, hf): + global max_conv_inputs + name = self.name + print("printing layer " + name + " of type " + self.__class__.__name__) + weights = self.get_weights() + printVector(f, weights[0], name + '_weights') + printVector(f, weights[-1], name + '_bias') + activation = self.activation.__name__.upper() + max_conv_inputs = max(max_conv_inputs, weights[0].shape[1]*weights[0].shape[0]) + f.write('const Conv1DLayer {} = {{\n {}_bias,\n {}_weights,\n {}, {}, {}, ACTIVATION_{}\n}};\n\n' + .format(name, name, name, weights[0].shape[1], weights[0].shape[0], weights[0].shape[2], activation)) + hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights[0].shape[2])) + hf.write('#define {}_STATE_SIZE ({}*{})\n'.format(name.upper(), weights[0].shape[1], (weights[0].shape[0]-1))) + hf.write('#define {}_DELAY {}\n'.format(name.upper(), (weights[0].shape[0]-1)//2)) + hf.write('extern const Conv1DLayer {};\n\n'.format(name)); + return True +Conv1D.dump_layer = dump_conv1d_layer + + +def dump_embedding_layer_impl(name, weights, f, hf): + printVector(f, weights, name + '_weights') + f.write('const EmbeddingLayer {} = {{\n {}_weights,\n {}, {}\n}};\n\n' + .format(name, name, weights.shape[0], weights.shape[1])) + hf.write('#define {}_OUT_SIZE {}\n'.format(name.upper(), weights.shape[1])) + hf.write('extern const EmbeddingLayer {};\n\n'.format(name)); + +def dump_embedding_layer(self, f, hf): + name = self.name + print("printing layer " + name + " of type " + self.__class__.__name__) + weights = self.get_weights()[0] + dump_embedding_layer_impl(name, weights, f, hf) + return False +Embedding.dump_layer = dump_embedding_layer + + +model, _, _ = lpcnet.new_lpcnet_model(rnn_units1=384) +model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy']) +#model.summary() + +model.load_weights(sys.argv[1]) + +if len(sys.argv) > 2: + cfile = sys.argv[2]; + hfile = sys.argv[3]; +else: + cfile = 'nnet_data.c' + hfile = 'nnet_data.h' + + +f = open(cfile, 'w') +hf = open(hfile, 'w') + + +f.write('/*This file is automatically generated from a Keras model*/\n\n') +f.write('#ifdef HAVE_CONFIG_H\n#include "config.h"\n#endif\n\n#include "nnet.h"\n#include "{}"\n\n'.format(hfile)) + +hf.write('/*This file is automatically generated from a Keras model*/\n\n') +hf.write('#ifndef RNN_DATA_H\n#define RNN_DATA_H\n\n#include "nnet.h"\n\n') + +embed_size = lpcnet.embed_size + +E = model.get_layer('embed_sig').get_weights()[0] +W = model.get_layer('gru_a').get_weights()[0][:embed_size,:] +dump_embedding_layer_impl('gru_a_embed_sig', np.dot(E, W), f, hf) +W = model.get_layer('gru_a').get_weights()[0][embed_size:2*embed_size,:] +dump_embedding_layer_impl('gru_a_embed_pred', np.dot(E, W), f, hf) +W = model.get_layer('gru_a').get_weights()[0][2*embed_size:3*embed_size,:] +dump_embedding_layer_impl('gru_a_embed_exc', np.dot(E, W), f, hf) +W = model.get_layer('gru_a').get_weights()[0][3*embed_size:,:] +#FIXME: dump only half the biases +b = model.get_layer('gru_a').get_weights()[2] +dump_dense_layer_impl('gru_a_dense_feature', W, b, 'LINEAR', f, hf) + +layer_list = [] +for i, layer in enumerate(model.layers): + if layer.dump_layer(f, hf): + layer_list.append(layer.name) + +dump_sparse_gru(model.get_layer('gru_a'), f, hf) + +hf.write('#define MAX_RNN_NEURONS {}\n\n'.format(max_rnn_neurons)) +hf.write('#define MAX_CONV_INPUTS {}\n\n'.format(max_conv_inputs)) +hf.write('#define MAX_MDENSE_TMP {}\n\n'.format(max_mdense_tmp)) + + +hf.write('typedef struct {\n') +for i, name in enumerate(layer_list): + hf.write(' float {}_state[{}_STATE_SIZE];\n'.format(name, name.upper())) +hf.write('} NNetState;\n') + +hf.write('\n\n#endif\n') + +f.close() +hf.close() diff --git a/dnn/training_tf2/lpcnet.py b/dnn/training_tf2/lpcnet.py new file mode 100644 index 0000000000000000000000000000000000000000..546e6d2250946ee0d2feebd355c0d9de1d8d83bd --- /dev/null +++ b/dnn/training_tf2/lpcnet.py @@ -0,0 +1,172 @@ +#!/usr/bin/python3 +'''Copyright (c) 2018 Mozilla + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +''' + +import math +from tensorflow.keras.models import Model +from tensorflow.keras.layers import Input, GRU, Dense, Embedding, Reshape, Concatenate, Lambda, Conv1D, Multiply, Add, Bidirectional, MaxPooling1D, Activation +from tensorflow.keras import backend as K +from tensorflow.keras.initializers import Initializer +from tensorflow.keras.callbacks import Callback +from mdense import MDense +import numpy as np +import h5py +import sys + +frame_size = 160 +pcm_bits = 8 +embed_size = 128 +pcm_levels = 2**pcm_bits + +class Sparsify(Callback): + def __init__(self, t_start, t_end, interval, density): + super(Sparsify, self).__init__() + self.batch = 0 + self.t_start = t_start + self.t_end = t_end + self.interval = interval + self.final_density = density + + def on_batch_end(self, batch, logs=None): + #print("batch number", self.batch) + self.batch += 1 + if self.batch < self.t_start or ((self.batch-self.t_start) % self.interval != 0 and self.batch < self.t_end): + #print("don't constrain"); + pass + else: + #print("constrain"); + layer = self.model.get_layer('gru_a') + w = layer.get_weights() + p = w[1] + nb = p.shape[1]//p.shape[0] + N = p.shape[0] + #print("nb = ", nb, ", N = ", N); + #print(p.shape) + #print ("density = ", density) + for k in range(nb): + density = self.final_density[k] + if self.batch < self.t_end: + r = 1 - (self.batch-self.t_start)/(self.t_end - self.t_start) + density = 1 - (1-self.final_density[k])*(1 - r*r*r) + A = p[:, k*N:(k+1)*N] + A = A - np.diag(np.diag(A)) + #A = np.transpose(A, (1, 0)) + L=np.reshape(A, (N, N//16, 16)) + S=np.sum(L*L, axis=-1) + SS=np.sort(np.reshape(S, (-1,))) + thresh = SS[round(N*N//16*(1-density))] + mask = (S>=thresh).astype('float32'); + mask = np.repeat(mask, 16, axis=1) + mask = np.minimum(1, mask + np.diag(np.ones((N,)))) + #mask = np.transpose(mask, (1, 0)) + p[:, k*N:(k+1)*N] = p[:, k*N:(k+1)*N]*mask + #print(thresh, np.mean(mask)) + w[1] = p + layer.set_weights(w) + + +class PCMInit(Initializer): + def __init__(self, gain=.1, seed=None): + self.gain = gain + self.seed = seed + + def __call__(self, shape, dtype=None): + num_rows = 1 + for dim in shape[:-1]: + num_rows *= dim + num_cols = shape[-1] + flat_shape = (num_rows, num_cols) + if self.seed is not None: + np.random.seed(self.seed) + a = np.random.uniform(-1.7321, 1.7321, flat_shape) + #a[:,0] = math.sqrt(12)*np.arange(-.5*num_rows+.5,.5*num_rows-.4)/num_rows + #a[:,1] = .5*a[:,0]*a[:,0]*a[:,0] + a = a + np.reshape(math.sqrt(12)*np.arange(-.5*num_rows+.5,.5*num_rows-.4)/num_rows, (num_rows, 1)) + return self.gain * a + + def get_config(self): + return { + 'gain': self.gain, + 'seed': self.seed + } + +def new_lpcnet_model(rnn_units1=384, rnn_units2=16, nb_used_features = 38, training=False, adaptation=False): + pcm = Input(shape=(None, 3)) + feat = Input(shape=(None, nb_used_features)) + pitch = Input(shape=(None, 1)) + dec_feat = Input(shape=(None, 128)) + dec_state1 = Input(shape=(rnn_units1,)) + dec_state2 = Input(shape=(rnn_units2,)) + + padding = 'valid' if training else 'same' + fconv1 = Conv1D(128, 3, padding=padding, activation='tanh', name='feature_conv1') + fconv2 = Conv1D(128, 3, padding=padding, activation='tanh', name='feature_conv2') + + embed = Embedding(256, embed_size, embeddings_initializer=PCMInit(), name='embed_sig') + cpcm = Reshape((-1, embed_size*3))(embed(pcm)) + + pembed = Embedding(256, 64, name='embed_pitch') + cat_feat = Concatenate()([feat, Reshape((-1, 64))(pembed(pitch))]) + + cfeat = fconv2(fconv1(cat_feat)) + + fdense1 = Dense(128, activation='tanh', name='feature_dense1') + fdense2 = Dense(128, activation='tanh', name='feature_dense2') + + cfeat = fdense2(fdense1(cfeat)) + + rep = Lambda(lambda x: K.repeat_elements(x, frame_size, 1)) + + rnn = GRU(rnn_units1, return_sequences=True, return_state=True, recurrent_activation="sigmoid", reset_after='true', name='gru_a') + rnn2 = GRU(rnn_units2, return_sequences=True, return_state=True, recurrent_activation="sigmoid", reset_after='true', name='gru_b') + + rnn_in = Concatenate()([cpcm, rep(cfeat)]) + md = MDense(pcm_levels, activation='softmax', name='dual_fc') + gru_out1, _ = rnn(rnn_in) + gru_out2, _ = rnn2(Concatenate()([gru_out1, rep(cfeat)])) + ulaw_prob = md(gru_out2) + + if adaptation: + rnn.trainable=False + rnn2.trainable=False + md.trainable=False + embed.Trainable=False + + model = Model([pcm, feat, pitch], ulaw_prob) + model.rnn_units1 = rnn_units1 + model.rnn_units2 = rnn_units2 + model.nb_used_features = nb_used_features + model.frame_size = frame_size + + encoder = Model([feat, pitch], cfeat) + + dec_rnn_in = Concatenate()([cpcm, dec_feat]) + dec_gru_out1, state1 = rnn(dec_rnn_in, initial_state=dec_state1) + dec_gru_out2, state2 = rnn2(Concatenate()([dec_gru_out1, dec_feat]), initial_state=dec_state2) + dec_ulaw_prob = md(dec_gru_out2) + + decoder = Model([pcm, dec_feat, dec_state1, dec_state2], [dec_ulaw_prob, state1, state2]) + return model, encoder, decoder diff --git a/dnn/training_tf2/mdense.py b/dnn/training_tf2/mdense.py new file mode 100644 index 0000000000000000000000000000000000000000..5679dd290366f05bd0c2161117a038a92f26ba72 --- /dev/null +++ b/dnn/training_tf2/mdense.py @@ -0,0 +1,95 @@ +from tensorflow.keras import backend as K +from tensorflow.keras.layers import Layer, InputSpec +from tensorflow.keras import activations +from tensorflow.keras import initializers, regularizers, constraints +import numpy as np +import math + +class MDense(Layer): + + def __init__(self, outputs, + channels=2, + activation=None, + use_bias=True, + kernel_initializer='glorot_uniform', + bias_initializer='zeros', + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + **kwargs): + if 'input_shape' not in kwargs and 'input_dim' in kwargs: + kwargs['input_shape'] = (kwargs.pop('input_dim'),) + super(MDense, self).__init__(**kwargs) + self.units = outputs + self.channels = channels + self.activation = activations.get(activation) + self.use_bias = use_bias + self.kernel_initializer = initializers.get(kernel_initializer) + self.bias_initializer = initializers.get(bias_initializer) + self.kernel_regularizer = regularizers.get(kernel_regularizer) + self.bias_regularizer = regularizers.get(bias_regularizer) + self.activity_regularizer = regularizers.get(activity_regularizer) + self.kernel_constraint = constraints.get(kernel_constraint) + self.bias_constraint = constraints.get(bias_constraint) + self.input_spec = InputSpec(min_ndim=2) + self.supports_masking = True + + def build(self, input_shape): + assert len(input_shape) >= 2 + input_dim = input_shape[-1] + + self.kernel = self.add_weight(shape=(self.units, input_dim, self.channels), + initializer=self.kernel_initializer, + name='kernel', + regularizer=self.kernel_regularizer, + constraint=self.kernel_constraint) + if self.use_bias: + self.bias = self.add_weight(shape=(self.units, self.channels), + initializer=self.bias_initializer, + name='bias', + regularizer=self.bias_regularizer, + constraint=self.bias_constraint) + else: + self.bias = None + self.factor = self.add_weight(shape=(self.units, self.channels), + initializer='ones', + name='factor', + regularizer=self.bias_regularizer, + constraint=self.bias_constraint) + self.input_spec = InputSpec(min_ndim=2, axes={-1: input_dim}) + self.built = True + + def call(self, inputs): + output = K.dot(inputs, self.kernel) + if self.use_bias: + output = output + self.bias + output = K.tanh(output) * self.factor + output = K.sum(output, axis=-1) + if self.activation is not None: + output = self.activation(output) + return output + + def compute_output_shape(self, input_shape): + assert input_shape and len(input_shape) >= 2 + assert input_shape[-1] + output_shape = list(input_shape) + output_shape[-1] = self.units + return tuple(output_shape) + + def get_config(self): + config = { + 'units': self.units, + 'activation': activations.serialize(self.activation), + 'use_bias': self.use_bias, + 'kernel_initializer': initializers.serialize(self.kernel_initializer), + 'bias_initializer': initializers.serialize(self.bias_initializer), + 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), + 'bias_regularizer': regularizers.serialize(self.bias_regularizer), + 'activity_regularizer': regularizers.serialize(self.activity_regularizer), + 'kernel_constraint': constraints.serialize(self.kernel_constraint), + 'bias_constraint': constraints.serialize(self.bias_constraint) + } + base_config = super(MDense, self).get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/dnn/training_tf2/train_lpcnet.py b/dnn/training_tf2/train_lpcnet.py new file mode 100755 index 0000000000000000000000000000000000000000..f6eddccb54a4a86f792172f4111f66ad177b8cf8 --- /dev/null +++ b/dnn/training_tf2/train_lpcnet.py @@ -0,0 +1,124 @@ +#!/usr/bin/python3 +'''Copyright (c) 2018 Mozilla + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +''' + +# Train a LPCNet model (note not a Wavenet model) + +import lpcnet +import sys +import numpy as np +from tensorflow.keras.optimizers import Adam +from tensorflow.keras.callbacks import ModelCheckpoint +from ulaw import ulaw2lin, lin2ulaw +import tensorflow.keras.backend as K +import h5py + +import tensorflow as tf +gpus = tf.config.experimental.list_physical_devices('GPU') +if gpus: + try: + tf.config.experimental.set_virtual_device_configuration(gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=5120)]) + except RuntimeError as e: + print(e) + +nb_epochs = 120 + +# Try reducing batch_size if you run out of memory on your GPU +batch_size = 64 + +model, _, _ = lpcnet.new_lpcnet_model(training=True) + +model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy']) +model.summary() + +feature_file = sys.argv[1] +pcm_file = sys.argv[2] # 16 bit unsigned short PCM samples +frame_size = model.frame_size +nb_features = 55 +nb_used_features = model.nb_used_features +feature_chunk_size = 15 +pcm_chunk_size = frame_size*feature_chunk_size + +# u for unquantised, load 16 bit PCM samples and convert to mu-law + +data = np.fromfile(pcm_file, dtype='uint8') +nb_frames = len(data)//(4*pcm_chunk_size) + +features = np.fromfile(feature_file, dtype='float32') + +# limit to discrete number of frames +data = data[:nb_frames*4*pcm_chunk_size] +features = features[:nb_frames*feature_chunk_size*nb_features] + +features = np.reshape(features, (nb_frames*feature_chunk_size, nb_features)) + +sig = np.reshape(data[0::4], (nb_frames, pcm_chunk_size, 1)) +pred = np.reshape(data[1::4], (nb_frames, pcm_chunk_size, 1)) +in_exc = np.reshape(data[2::4], (nb_frames, pcm_chunk_size, 1)) +out_exc = np.reshape(data[3::4], (nb_frames, pcm_chunk_size, 1)) +del data + +print("ulaw std = ", np.std(out_exc)) + +features = np.reshape(features, (nb_frames, feature_chunk_size, nb_features)) +features = features[:, :, :nb_used_features] +features[:,:,18:36] = 0 + +fpad1 = np.concatenate([features[0:1, 0:2, :], features[:-1, -2:, :]], axis=0) +fpad2 = np.concatenate([features[1:, :2, :], features[0:1, -2:, :]], axis=0) +features = np.concatenate([fpad1, features, fpad2], axis=1) + + +periods = (.1 + 50*features[:,:,36:37]+100).astype('int16') +#periods = np.minimum(periods, 255) + +in_data = np.concatenate([sig, pred, in_exc], axis=-1) + +del sig +del pred +del in_exc + +# dump models to disk as we go +checkpoint = ModelCheckpoint('lpcnet32c_384_10_G16_{epoch:02d}.h5') + +#Set this to True to adapt an existing model (e.g. on new data) +adaptation = False + +if adaptation: + #Adapting from an existing model + model.load_weights('lpcnet24c_384_10_G16_120.h5') + sparsify = lpcnet.Sparsify(0, 0, 1, (0.05, 0.05, 0.2)) + lr = 0.0001 + decay = 0 +else: + #Training from scratch + sparsify = lpcnet.Sparsify(2000, 40000, 400, (0.05, 0.05, 0.2)) + lr = 0.001 + decay = 5e-5 + +model.compile(optimizer=Adam(lr, decay=decay, beta_2=0.99), loss='sparse_categorical_crossentropy') +model.save_weights('lpcnet32c_384_10_G16_00.h5'); +model.fit([in_data, features, periods], out_exc, batch_size=batch_size, epochs=nb_epochs, validation_split=0.0, callbacks=[checkpoint, sparsify]) diff --git a/dnn/training_tf2/ulaw.py b/dnn/training_tf2/ulaw.py new file mode 100644 index 0000000000000000000000000000000000000000..b79d4315bf1fa7cfc1236c71e79762e0511713fb --- /dev/null +++ b/dnn/training_tf2/ulaw.py @@ -0,0 +1,19 @@ + +import numpy as np +import math + +scale = 255.0/32768.0 +scale_1 = 32768.0/255.0 +def ulaw2lin(u): + u = u - 128 + s = np.sign(u) + u = np.abs(u) + return s*scale_1*(np.exp(u/128.*math.log(256))-1) + + +def lin2ulaw(x): + s = np.sign(x) + x = np.abs(x) + u = (s*(128*np.log(1+scale*x)/math.log(256))) + u = np.clip(128 + np.round(u), 0, 255) + return u.astype('int16')