diff --git a/dnn/nfec_dec.c b/dnn/nfec_dec.c new file mode 100644 index 0000000000000000000000000000000000000000..8b35a63a29c9118aada2568bf367f104a535d5a2 --- /dev/null +++ b/dnn/nfec_dec.c @@ -0,0 +1,118 @@ +#include "nfec_dec.h" + +//#define DEBUG + +#ifdef DEBUG +#include <stdio.h> +#endif + +void nfec_dec_init_states( + NFECDecState *h, /* io: state buffer handle */ + const float *initial_state /* i: initial state */ + ) +{ + /* initialize GRU states from initial state */ + compute_dense(&state1, h->dense2_state, initial_state); + compute_dense(&state2, h->dense4_state, initial_state); + compute_dense(&state3, h->dense6_state, initial_state); +} + +void nfec_dec_unquantize_latent_vector( + float *z, /* o: unquantized latent vector */ + const int *zq, /* i: quantized latent vector */ + int quant_level /* i: quantization level */ + ) +{ + int i; + /* inverse scaling and type conversion */ + for (i = 0; i < NFEC_STATS_NUM_LATENTS; i ++) + { + z[i] = (float) zq[i] / nfec_stats_quant_scales[quant_level * NFEC_STATS_NUM_LATENTS + i]; + } +} + +void nfec_decode_qframe( + NFECDecState *dec_state, /* io: state buffer handle */ + float *qframe, /* o: quadruple feature frame (four concatenated frames) */ + const float *input /* i: latent vector */ + ) +{ + float buffer[DEC_DENSE1_OUT_SIZE + DEC_DENSE2_OUT_SIZE + DEC_DENSE3_OUT_SIZE + DEC_DENSE4_OUT_SIZE + DEC_DENSE5_OUT_SIZE + DEC_DENSE6_OUT_SIZE + DEC_DENSE7_OUT_SIZE + DEC_DENSE8_OUT_SIZE]; + int output_index = 0; + int input_index = 0; +#ifdef DEBUG + static FILE *fids[8] = {NULL}; + int i; + char filename[256]; + + for (i=0; i < 8; i ++) + { + if (fids[i] == NULL) + { + sprintf(filename, "y%d.f32", i + 1); + fids[i] = fopen(filename, "wb"); + } + } +#endif + + /* run encoder stack and concatenate output in buffer*/ + compute_dense(&dec_dense1, &buffer[output_index], input); +#ifdef DEBUG + fwrite(&buffer[output_index], sizeof(buffer[0]), DEC_DENSE1_OUT_SIZE, fids[0]); +#endif + input_index = output_index; + output_index += DEC_DENSE1_OUT_SIZE; + + compute_gru2(&dec_dense2, dec_state->dense2_state, &buffer[input_index]); + memcpy(&buffer[output_index], dec_state->dense2_state, DEC_DENSE2_OUT_SIZE * sizeof(float)); +#ifdef DEBUG + fwrite(&buffer[output_index], sizeof(buffer[0]), DEC_DENSE2_OUT_SIZE, fids[1]); +#endif + input_index = output_index; + output_index += DEC_DENSE2_OUT_SIZE; + + compute_dense(&dec_dense3, &buffer[output_index], &buffer[input_index]); +#ifdef DEBUG + fwrite(&buffer[output_index], sizeof(buffer[0]), DEC_DENSE3_OUT_SIZE, fids[2]); +#endif + input_index = output_index; + output_index += DEC_DENSE3_OUT_SIZE; + + compute_gru2(&dec_dense4, dec_state->dense4_state, &buffer[input_index]); + memcpy(&buffer[output_index], dec_state->dense4_state, DEC_DENSE4_OUT_SIZE * sizeof(float)); +#ifdef DEBUG + fwrite(&buffer[output_index], sizeof(buffer[0]), DEC_DENSE4_OUT_SIZE, fids[3]); +#endif + input_index = output_index; + output_index += DEC_DENSE4_OUT_SIZE; + + compute_dense(&dec_dense5, &buffer[output_index], &buffer[input_index]); +#ifdef DEBUG + fwrite(&buffer[output_index], sizeof(buffer[0]), DEC_DENSE5_OUT_SIZE, fids[4]); +#endif + input_index = output_index; + output_index += DEC_DENSE5_OUT_SIZE; + + compute_gru2(&dec_dense6, dec_state->dense6_state, &buffer[input_index]); + memcpy(&buffer[output_index], dec_state->dense6_state, DEC_DENSE6_OUT_SIZE * sizeof(float)); +#ifdef DEBUG + fwrite(&buffer[output_index], sizeof(buffer[0]), DEC_DENSE6_OUT_SIZE, fids[5]); +#endif + input_index = output_index; + output_index += DEC_DENSE6_OUT_SIZE; + + compute_dense(&dec_dense7, &buffer[output_index], &buffer[input_index]); +#ifdef DEBUG + fwrite(&buffer[output_index], sizeof(buffer[0]), DEC_DENSE7_OUT_SIZE, fids[6]); +#endif + input_index = output_index; + output_index += DEC_DENSE7_OUT_SIZE; + + compute_dense(&dec_dense8, &buffer[output_index], &buffer[input_index]); +#ifdef DEBUG + fwrite(&buffer[output_index], sizeof(buffer[0]), DEC_DENSE8_OUT_SIZE, fids[7]); +#endif + output_index += DEC_DENSE8_OUT_SIZE; + + compute_dense(&dec_final, qframe, buffer); +} \ No newline at end of file diff --git a/dnn/nfec_dec.h b/dnn/nfec_dec.h new file mode 100644 index 0000000000000000000000000000000000000000..b866b00ca3d510f7334193bd6b546b179494fe71 --- /dev/null +++ b/dnn/nfec_dec.h @@ -0,0 +1,17 @@ +#ifndef _NFEC_DEC_H +#define _NFEC_DEC_H + +#include "nfec_dec_data.h" +#include "nfec_stats_data.h" + +typedef struct { + float dense2_state[DEC_DENSE2_STATE_SIZE]; + float dense4_state[DEC_DENSE2_STATE_SIZE]; + float dense6_state[DEC_DENSE2_STATE_SIZE]; +} NFECDecState; + +void nfec_dec_init_states(NFECDecState *h, const float * initial_state); +void nfec_dec_unquantize_latent_vector(float *z, const int *zq, int quant_level); +void nfec_decode_qframe(NFECDecState *h, float *qframe, const float * z); + +#endif \ No newline at end of file diff --git a/dnn/nfec_dec_demo.c b/dnn/nfec_dec_demo.c new file mode 100644 index 0000000000000000000000000000000000000000..ce5e6a2138d40cc61795ce5b6c86fe2c36e31452 --- /dev/null +++ b/dnn/nfec_dec_demo.c @@ -0,0 +1,68 @@ +#include <stdlib.h> +#include <stdio.h> + +#include "nfec_dec.h" +#include "nfec_enc.h" + + +void usage() +{ + printf("nfec_dec_demo <input> <output>\n"); + exit(1); +} + +int main(int argc, char **argv) +{ + NFECDecState dec_state; + float feature_buffer[36]; + float qframe[4 * NFEC_DEC_NUM_FEATURES]; + float latents[80]; + float initial_state[24]; + int quantized_latents[80]; + int index = 0; + FILE *in_fid, *out_fid; + int qlevel = 0; + + memset(&dec_state, 0, sizeof(dec_state)); + + if (argc < 3) usage(); + + in_fid = fopen(argv[1], "rb"); + if (in_fid == NULL) + { + perror("Could not open input file"); + usage(); + } + + out_fid = fopen(argv[2], "wb"); + if (out_fid == NULL) + { + perror("Could not open output file"); + usage(); + } + + /* read initial state from input stream */ + if (fread(initial_state, sizeof(float), 24, in_fid) != 24) + { + perror("error while reading initial state"); + return 1; + } + + /* initialize GRU states */ + nfec_dec_init_states(&dec_state, initial_state); + + /* start decoding */ + while (fread(latents, sizeof(float), 80, in_fid) == 80) + { + nfec_decode_qframe(&dec_state, qframe, latents); + fwrite(qframe, sizeof(float), 4*20, out_fid); + } + + fclose(in_fid); + fclose(out_fid); + + + return 0; +} + +/* gcc -DDISABLE_DOT_PROD -DDISABLE_NEON nfec_dec_demo.c nfec_dec.c nnet.c nfec_dec_data.c nfec_stats_data.c kiss99.c -g -o nfec_dec_demo */ \ No newline at end of file diff --git a/dnn/nfec_enc.c b/dnn/nfec_enc.c index d524e8211f791c5498cb00e08095a76b8d1a55f4..1fd5bbe185cf99a84036aea8a483b601c9239820 100644 --- a/dnn/nfec_enc.c +++ b/dnn/nfec_enc.c @@ -1,6 +1,9 @@ +#include <math.h> + #include "nfec_enc.h" #include "nnet.h" #include "nfec_enc_data.h" +#include "nfec_stats_data.h" //#define DEBUG @@ -8,7 +11,12 @@ #include <stdio.h> #endif -void nfec_encode_dframe(struct NFECEncState *enc_state, float *latents, float *initial_state, const float *input) +void nfec_encode_dframe( + struct NFECEncState *enc_state, /* io: encoder state */ + float *latents, /* o: latent vector */ + float *initial_state, /* o: initial state */ + const float *input /* i: double feature frame (concatenated) */ + ) { float buffer[ENC_DENSE1_OUT_SIZE + ENC_DENSE2_OUT_SIZE + ENC_DENSE3_OUT_SIZE + ENC_DENSE4_OUT_SIZE + ENC_DENSE5_OUT_SIZE + ENC_DENSE6_OUT_SIZE + ENC_DENSE7_OUT_SIZE + ENC_DENSE8_OUT_SIZE + GDENSE1_OUT_SIZE]; int output_index = 0; @@ -105,4 +113,28 @@ void nfec_encode_dframe(struct NFECEncState *enc_state, float *latents, float *i input_index = output_index; compute_dense(&gdense2, initial_state, &buffer[input_index]); +} + +void nfec_quantize_latent_vector( + int *z_q, /* o: quantized latent vector */ + const float *z, /* i: unquantized latent vector */ + int quant_level /* i: quantization level */ + ) +{ + int i; + float delta; + float tmp[NFEC_LATENT_DIM]; + + for (i = 0; i < NFEC_LATENT_DIM; i ++) + { + /* dead-zone transform */ + delta = nfec_stats_dead_zone_theta[quant_level * NFEC_LATENT_DIM + i] - .5f; + tmp[i] = z[i] - delta * tanhf(z[i] / (delta + 0.1f)); + + /* scaling */ + tmp[i] *= nfec_stats_quant_scales[quant_level * NFEC_LATENT_DIM + i]; + + /* quantization by rounding (CAVE: is there a quantization routine with overlfow check available?) */ + z_q[i] = (int) roundf(tmp[i]); + } } \ No newline at end of file diff --git a/dnn/nfec_enc.h b/dnn/nfec_enc.h index 27face1d7d0ea1944b08b7bcf5c01f68ed6beb02..9544c93ad80eb361a83d3d12fd6a7bfea2394213 100644 --- a/dnn/nfec_enc.h +++ b/dnn/nfec_enc.h @@ -11,5 +11,6 @@ struct NFECEncState{ }; void nfec_encode_dframe(struct NFECEncState *enc_state, float *latents, float *initial_state, const float *input); +void nfec_quantize_latent_vector(int *z_q, const float *z, int quant_level); #endif \ No newline at end of file diff --git a/dnn/nfec_enc_demo.c b/dnn/nfec_enc_demo.c index 809c90bd56f99774a0ff249a09c8486580eef4f8..f58f1166faa392b1b8633f6fe5ace59a584bcae4 100644 --- a/dnn/nfec_enc_demo.c +++ b/dnn/nfec_enc_demo.c @@ -16,8 +16,9 @@ int main(int argc, char **argv) float dframe[2 * NFEC_NUM_FEATURES]; float latents[80]; float initial_state[24]; + int quantized_latents[NFEC_LATENT_DIM]; int index = 0; - FILE *fid, *latents_fid, *states_fid; + FILE *fid, *latents_fid, *quantized_latents_fid, *states_fid; memset(&enc_state, 0, sizeof(enc_state)); @@ -40,6 +41,16 @@ int main(int argc, char **argv) usage(); } + char filename[256]; + strcpy(filename, argv[2]); + strcat(filename, ".quantized.f32"); + quantized_latents_fid = fopen(filename, "wb"); + if (latents_fid == NULL) + { + fprintf(stderr, "could not open latents file %s\n", filename); + usage(); + } + states_fid = fopen(argv[3], "wb"); if (states_fid == NULL) { @@ -55,8 +66,10 @@ int main(int argc, char **argv) if (index == 2) { nfec_encode_dframe(&enc_state, latents, initial_state, dframe); + nfec_quantize_latent_vector(quantized_latents, latents, 0); index = 0; fwrite(latents, sizeof(float), NFEC_LATENT_DIM, latents_fid); + fwrite(quantized_latents, sizeof(int), NFEC_LATENT_DIM, quantized_latents_fid); fwrite(initial_state, sizeof(float), GDENSE2_OUT_SIZE, states_fid); } } @@ -64,6 +77,9 @@ int main(int argc, char **argv) fclose(fid); fclose(states_fid); fclose(latents_fid); + fclose(quantized_latents_fid); + + return 0; } -/* gcc -DDISABLE_DOT_PROD -DDISABLE_NEON nfec_enc_demo.c nfec_enc.c nnet.c nfec_enc_data.c kiss99.c -g -o nfec_enc_demo */ \ No newline at end of file +/* gcc -DDISABLE_DOT_PROD -DDISABLE_NEON nfec_enc_demo.c nfec_enc.c nnet.c nfec_enc_data.c nfec_stats_data.c kiss99.c -g -o nfec_enc_demo */ \ No newline at end of file diff --git a/dnn/training_tf2/dump_nfec_model.py b/dnn/training_tf2/dump_nfec_model.py index 9f0768a8773c7dc6cad1e1fb9fe90ca4fe86a33a..1f016467376a65d82cb72df52256b22f53582fb3 100644 --- a/dnn/training_tf2/dump_nfec_model.py +++ b/dnn/training_tf2/dump_nfec_model.py @@ -1,6 +1,7 @@ import argparse import os +os.environ['CUDA_VISIBLE_DEVICES'] = "" parser = argparse.ArgumentParser() @@ -59,17 +60,17 @@ def dump_statistical_model(qembedding, f, fh): r = 0.5 + 0.5 * tf.math.sigmoid(w[:, 4 * N : 5 * N]).numpy() theta = tf.math.sigmoid(w[:, 5 * N : 6 * N]).numpy() - printVector(f, quant_scales[:], 'nfec_stats_quant_scales') - printVector(f, dead_zone_theta[:], 'nfec_stats_dead_zone_theta') - printVector(f, r, 'nfec_stats_r') - printVector(f, theta, 'nfec_stats_theta') + printVector(f, quant_scales[:], 'nfec_stats_quant_scales', static=False) + printVector(f, dead_zone_theta[:], 'nfec_stats_dead_zone_theta', static=False) + printVector(f, r, 'nfec_stats_r', static=False) + printVector(f, theta, 'nfec_stats_theta', static=False) fh.write( f""" -extern float nfec_stats_quant_scales; -extern float nfec_stats_dead_zone_theta; -extern float nfec_stats_r; -extern float nfec_stats_theta; +extern const float nfec_stats_quant_scales[{levels * N}]; +extern const float nfec_stats_dead_zone_theta[{levels * N}]; +extern const float nfec_stats_r[{levels * N}]; +extern const float nfec_stats_theta[{levels * N}]; """ ) @@ -159,6 +160,7 @@ f""" header_fid.write( f""" #define NFEC_STATS_NUM_LEVELS {num_levels} +#define NFEC_STATS_NUM_LATENTS {args.latent_dim} """ ) @@ -171,3 +173,60 @@ f""" header_fid.close() source_fid.close() + # decoder + decoder_dense_names = [ + 'state1', + 'state2', + 'state3', + 'dec_dense1', + 'dec_dense3', + 'dec_dense5', + 'dec_dense7', + 'dec_dense8', + 'dec_final' + ] + + decoder_gru_names = [ + 'dec_dense2', + 'dec_dense4', + 'dec_dense6' + ] + + source_fid = open("nfec_dec_data.c", 'w') + header_fid = open("nfec_dec_data.h", 'w') + + start_header(header_fid, "nfec_dec_data.h") + start_source(source_fid, "nfec_dec_data.h", os.path.basename(args.weights)) + + # some global constants + header_fid.write( +f""" +#define NFEC_DEC_NUM_FEATURES 20 + +#define NFEC_DEC_LATENT_DIM {args.latent_dim} + +#define NFEC_DEC_MAX_RNN_NEURONS {max_rnn_neurons} + + +""" + ) + + + # dump GRUs + max_rnn_neurons = max( + [ + dump_gru_layer(decoder.get_layer(name), source_fid, header_fid) + for name in decoder_gru_names + ] + ) + + # dump Dense layers + for name in decoder_dense_names: + layer = decoder.get_layer(name) + dump_dense_layer(layer, source_fid, header_fid) + + finish_header(header_fid) + finish_source(source_fid) + + header_fid.close() + source_fid.close() \ No newline at end of file diff --git a/dnn/training_tf2/keraslayerdump.py b/dnn/training_tf2/keraslayerdump.py index 41c723ae616a0bebdb6888be7ea7c689b1f75dfa..3269c6b82d1ae000e20141bc1b931017e926617b 100644 --- a/dnn/training_tf2/keraslayerdump.py +++ b/dnn/training_tf2/keraslayerdump.py @@ -3,13 +3,16 @@ import numpy as np -def printVector(f, vector, name, dtype='float', dotp=False): +def printVector(f, vector, name, dtype='float', dotp=False, static=True): """ prints vector as one-dimensional C array """ if dotp: vector = vector.reshape((vector.shape[0]//4, 4, vector.shape[1]//8, 8)) vector = vector.transpose((2, 0, 3, 1)) v = np.reshape(vector, (-1)) - f.write('static const {} {}[{}] = {{\n '.format(dtype, name, len(v))) + if static: + f.write('static const {} {}[{}] = {{\n '.format(dtype, name, len(v))) + else: + f.write('const {} {}[{}] = {{\n '.format(dtype, name, len(v))) for i in range(0, len(v)): f.write('{}'.format(v[i])) if (i!=len(v)-1):