diff --git a/dnn/nfec_dec.c b/dnn/nfec_dec.c
new file mode 100644
index 0000000000000000000000000000000000000000..8b35a63a29c9118aada2568bf367f104a535d5a2
--- /dev/null
+++ b/dnn/nfec_dec.c
@@ -0,0 +1,118 @@
+#include "nfec_dec.h"
+
+//#define DEBUG
+
+#ifdef DEBUG
+#include <stdio.h>
+#endif
+
+void nfec_dec_init_states(
+    NFECDecState *h,            /* io: state buffer handle */
+    const float *initial_state  /* i: initial state */
+    )
+{
+    /* initialize GRU states from initial state */
+    compute_dense(&state1, h->dense2_state, initial_state);
+    compute_dense(&state2, h->dense4_state, initial_state);
+    compute_dense(&state3, h->dense6_state, initial_state);
+}
+
+void nfec_dec_unquantize_latent_vector(
+    float *z,       /* o: unquantized latent vector */
+    const int *zq,  /* i: quantized latent vector */
+    int quant_level /* i: quantization level */
+    )
+{
+    int i;
+    /* inverse scaling and type conversion */
+    for (i = 0; i < NFEC_STATS_NUM_LATENTS; i ++)
+    {
+        z[i] = (float) zq[i] / nfec_stats_quant_scales[quant_level * NFEC_STATS_NUM_LATENTS + i];
+    }
+}
+
+void nfec_decode_qframe(
+    NFECDecState *dec_state,    /* io: state buffer handle */
+    float *qframe,              /* o: quadruple feature frame (four concatenated frames) */
+    const float *input          /* i: latent vector */
+    )
+{
+    float buffer[DEC_DENSE1_OUT_SIZE + DEC_DENSE2_OUT_SIZE + DEC_DENSE3_OUT_SIZE + DEC_DENSE4_OUT_SIZE + DEC_DENSE5_OUT_SIZE + DEC_DENSE6_OUT_SIZE + DEC_DENSE7_OUT_SIZE + DEC_DENSE8_OUT_SIZE];
+    int output_index = 0;
+    int input_index = 0;
+#ifdef DEBUG
+    static FILE *fids[8] = {NULL};
+    int i;
+    char filename[256];
+
+    for (i=0; i < 8; i ++)
+    {
+        if (fids[i] == NULL)
+        {
+            sprintf(filename, "y%d.f32", i + 1);
+            fids[i] = fopen(filename, "wb");
+        }
+    }
+#endif
+
+    /* run encoder stack and concatenate output in buffer*/
+    compute_dense(&dec_dense1, &buffer[output_index], input);
+#ifdef DEBUG
+    fwrite(&buffer[output_index], sizeof(buffer[0]), DEC_DENSE1_OUT_SIZE, fids[0]);
+#endif
+    input_index = output_index;
+    output_index += DEC_DENSE1_OUT_SIZE;
+
+    compute_gru2(&dec_dense2, dec_state->dense2_state, &buffer[input_index]);
+    memcpy(&buffer[output_index], dec_state->dense2_state, DEC_DENSE2_OUT_SIZE * sizeof(float));
+#ifdef DEBUG
+    fwrite(&buffer[output_index], sizeof(buffer[0]), DEC_DENSE2_OUT_SIZE, fids[1]);
+#endif
+    input_index = output_index;
+    output_index += DEC_DENSE2_OUT_SIZE;
+
+    compute_dense(&dec_dense3, &buffer[output_index], &buffer[input_index]);
+#ifdef DEBUG
+    fwrite(&buffer[output_index], sizeof(buffer[0]), DEC_DENSE3_OUT_SIZE, fids[2]);
+#endif
+    input_index = output_index;
+    output_index += DEC_DENSE3_OUT_SIZE;
+
+    compute_gru2(&dec_dense4, dec_state->dense4_state, &buffer[input_index]);
+    memcpy(&buffer[output_index], dec_state->dense4_state, DEC_DENSE4_OUT_SIZE * sizeof(float));
+#ifdef DEBUG
+    fwrite(&buffer[output_index], sizeof(buffer[0]), DEC_DENSE4_OUT_SIZE, fids[3]);
+#endif
+    input_index = output_index;
+    output_index += DEC_DENSE4_OUT_SIZE;
+
+    compute_dense(&dec_dense5, &buffer[output_index], &buffer[input_index]);
+#ifdef DEBUG
+    fwrite(&buffer[output_index], sizeof(buffer[0]), DEC_DENSE5_OUT_SIZE, fids[4]);
+#endif
+    input_index = output_index;
+    output_index += DEC_DENSE5_OUT_SIZE;
+
+    compute_gru2(&dec_dense6, dec_state->dense6_state, &buffer[input_index]);
+    memcpy(&buffer[output_index], dec_state->dense6_state, DEC_DENSE6_OUT_SIZE * sizeof(float));
+#ifdef DEBUG
+    fwrite(&buffer[output_index], sizeof(buffer[0]), DEC_DENSE6_OUT_SIZE, fids[5]);
+#endif
+    input_index = output_index;
+    output_index += DEC_DENSE6_OUT_SIZE;
+
+    compute_dense(&dec_dense7, &buffer[output_index], &buffer[input_index]);
+#ifdef DEBUG
+    fwrite(&buffer[output_index], sizeof(buffer[0]), DEC_DENSE7_OUT_SIZE, fids[6]);
+#endif
+    input_index = output_index;
+    output_index += DEC_DENSE7_OUT_SIZE;
+
+    compute_dense(&dec_dense8, &buffer[output_index], &buffer[input_index]);
+#ifdef DEBUG
+    fwrite(&buffer[output_index], sizeof(buffer[0]), DEC_DENSE8_OUT_SIZE, fids[7]);
+#endif
+    output_index += DEC_DENSE8_OUT_SIZE;
+
+    compute_dense(&dec_final, qframe, buffer);
+}
\ No newline at end of file
diff --git a/dnn/nfec_dec.h b/dnn/nfec_dec.h
new file mode 100644
index 0000000000000000000000000000000000000000..b866b00ca3d510f7334193bd6b546b179494fe71
--- /dev/null
+++ b/dnn/nfec_dec.h
@@ -0,0 +1,17 @@
+#ifndef _NFEC_DEC_H
+#define _NFEC_DEC_H
+
+#include "nfec_dec_data.h"
+#include "nfec_stats_data.h"
+
+typedef struct {
+    float dense2_state[DEC_DENSE2_STATE_SIZE];
+    float dense4_state[DEC_DENSE2_STATE_SIZE];
+    float dense6_state[DEC_DENSE2_STATE_SIZE];
+} NFECDecState;
+
+void nfec_dec_init_states(NFECDecState *h, const float * initial_state);
+void nfec_dec_unquantize_latent_vector(float *z, const int *zq, int quant_level);
+void nfec_decode_qframe(NFECDecState *h, float *qframe, const float * z);
+
+#endif
\ No newline at end of file
diff --git a/dnn/nfec_dec_demo.c b/dnn/nfec_dec_demo.c
new file mode 100644
index 0000000000000000000000000000000000000000..ce5e6a2138d40cc61795ce5b6c86fe2c36e31452
--- /dev/null
+++ b/dnn/nfec_dec_demo.c
@@ -0,0 +1,68 @@
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "nfec_dec.h"
+#include "nfec_enc.h"
+
+
+void usage()
+{
+    printf("nfec_dec_demo <input> <output>\n");
+    exit(1);
+}
+
+int main(int argc, char **argv)
+{
+    NFECDecState dec_state;
+    float feature_buffer[36];
+    float qframe[4 * NFEC_DEC_NUM_FEATURES];
+    float latents[80];
+    float initial_state[24];
+    int quantized_latents[80];
+    int index = 0;
+    FILE *in_fid, *out_fid;
+    int qlevel = 0;
+
+    memset(&dec_state, 0, sizeof(dec_state));
+
+    if (argc < 3) usage();
+
+    in_fid = fopen(argv[1], "rb");
+    if (in_fid == NULL)
+    {
+        perror("Could not open input file");
+        usage();
+    }
+
+    out_fid = fopen(argv[2], "wb");
+    if (out_fid == NULL)
+    {
+        perror("Could not open output file");
+        usage();
+    }
+
+    /* read initial state from input stream */
+    if (fread(initial_state, sizeof(float), 24, in_fid) != 24)
+    {
+        perror("error while reading initial state");
+        return 1;
+    }
+
+    /* initialize GRU states */
+    nfec_dec_init_states(&dec_state, initial_state);
+
+    /* start decoding */
+    while (fread(latents, sizeof(float), 80, in_fid) == 80)
+    {
+        nfec_decode_qframe(&dec_state, qframe, latents);
+        fwrite(qframe, sizeof(float), 4*20, out_fid);
+    }
+
+    fclose(in_fid);
+    fclose(out_fid);
+
+
+    return 0;
+}
+
+/* gcc -DDISABLE_DOT_PROD -DDISABLE_NEON nfec_dec_demo.c nfec_dec.c nnet.c nfec_dec_data.c nfec_stats_data.c kiss99.c -g -o nfec_dec_demo */
\ No newline at end of file
diff --git a/dnn/nfec_enc.c b/dnn/nfec_enc.c
index d524e8211f791c5498cb00e08095a76b8d1a55f4..1fd5bbe185cf99a84036aea8a483b601c9239820 100644
--- a/dnn/nfec_enc.c
+++ b/dnn/nfec_enc.c
@@ -1,6 +1,9 @@
+#include <math.h>
+
 #include "nfec_enc.h"
 #include "nnet.h"
 #include "nfec_enc_data.h"
+#include "nfec_stats_data.h"
 
 //#define DEBUG
 
@@ -8,7 +11,12 @@
 #include <stdio.h>
 #endif
 
-void nfec_encode_dframe(struct NFECEncState *enc_state, float *latents, float *initial_state, const float *input)
+void nfec_encode_dframe(
+    struct NFECEncState *enc_state, /* io: encoder state */
+    float *latents,                 /* o: latent vector */
+    float *initial_state,           /* o: initial state */
+    const float *input              /* i: double feature frame (concatenated) */
+    )
 {
     float buffer[ENC_DENSE1_OUT_SIZE + ENC_DENSE2_OUT_SIZE + ENC_DENSE3_OUT_SIZE + ENC_DENSE4_OUT_SIZE + ENC_DENSE5_OUT_SIZE + ENC_DENSE6_OUT_SIZE + ENC_DENSE7_OUT_SIZE + ENC_DENSE8_OUT_SIZE + GDENSE1_OUT_SIZE];
     int output_index = 0;
@@ -105,4 +113,28 @@ void nfec_encode_dframe(struct NFECEncState *enc_state, float *latents, float *i
     input_index = output_index;
     compute_dense(&gdense2, initial_state, &buffer[input_index]);
 
+}
+
+void nfec_quantize_latent_vector(
+    int *z_q,           /* o: quantized latent vector */
+    const float *z,     /* i: unquantized latent vector */
+    int quant_level     /* i: quantization level */
+    )
+{
+    int i;
+    float delta;
+    float tmp[NFEC_LATENT_DIM];
+
+    for (i = 0; i < NFEC_LATENT_DIM; i ++)
+    {
+        /* dead-zone transform */
+        delta = nfec_stats_dead_zone_theta[quant_level * NFEC_LATENT_DIM + i] - .5f;
+        tmp[i] = z[i] - delta * tanhf(z[i] / (delta + 0.1f));
+
+        /* scaling */
+        tmp[i] *= nfec_stats_quant_scales[quant_level * NFEC_LATENT_DIM + i];
+
+        /* quantization by rounding (CAVE: is there a quantization routine with overlfow check available?) */
+        z_q[i] = (int) roundf(tmp[i]);
+    }
 }
\ No newline at end of file
diff --git a/dnn/nfec_enc.h b/dnn/nfec_enc.h
index 27face1d7d0ea1944b08b7bcf5c01f68ed6beb02..9544c93ad80eb361a83d3d12fd6a7bfea2394213 100644
--- a/dnn/nfec_enc.h
+++ b/dnn/nfec_enc.h
@@ -11,5 +11,6 @@ struct NFECEncState{
 };
 
 void nfec_encode_dframe(struct NFECEncState *enc_state, float *latents, float *initial_state, const float *input);
+void nfec_quantize_latent_vector(int *z_q, const float *z, int quant_level);
 
 #endif
\ No newline at end of file
diff --git a/dnn/nfec_enc_demo.c b/dnn/nfec_enc_demo.c
index 809c90bd56f99774a0ff249a09c8486580eef4f8..f58f1166faa392b1b8633f6fe5ace59a584bcae4 100644
--- a/dnn/nfec_enc_demo.c
+++ b/dnn/nfec_enc_demo.c
@@ -16,8 +16,9 @@ int main(int argc, char **argv)
     float dframe[2 * NFEC_NUM_FEATURES];
     float latents[80];
     float initial_state[24];
+    int quantized_latents[NFEC_LATENT_DIM];
     int index = 0;
-    FILE *fid, *latents_fid, *states_fid;
+    FILE *fid, *latents_fid, *quantized_latents_fid, *states_fid;
 
     memset(&enc_state, 0, sizeof(enc_state));
 
@@ -40,6 +41,16 @@ int main(int argc, char **argv)
         usage();
     }
 
+    char filename[256];
+    strcpy(filename, argv[2]);
+    strcat(filename, ".quantized.f32");
+    quantized_latents_fid = fopen(filename, "wb");
+    if (latents_fid == NULL)
+    {
+        fprintf(stderr, "could not open latents file %s\n", filename);
+        usage();
+    }
+
     states_fid = fopen(argv[3], "wb");
     if (states_fid == NULL)
     {
@@ -55,8 +66,10 @@ int main(int argc, char **argv)
         if (index == 2)
         {
             nfec_encode_dframe(&enc_state, latents, initial_state, dframe);
+            nfec_quantize_latent_vector(quantized_latents, latents, 0);
             index = 0;
             fwrite(latents, sizeof(float), NFEC_LATENT_DIM, latents_fid);
+            fwrite(quantized_latents, sizeof(int), NFEC_LATENT_DIM, quantized_latents_fid);
             fwrite(initial_state, sizeof(float), GDENSE2_OUT_SIZE, states_fid);
         }
     }
@@ -64,6 +77,9 @@ int main(int argc, char **argv)
     fclose(fid);
     fclose(states_fid);
     fclose(latents_fid);
+    fclose(quantized_latents_fid);
+
+    return 0;
 }
 
-/* gcc -DDISABLE_DOT_PROD -DDISABLE_NEON nfec_enc_demo.c nfec_enc.c nnet.c nfec_enc_data.c kiss99.c -g -o nfec_enc_demo */
\ No newline at end of file
+/* gcc -DDISABLE_DOT_PROD -DDISABLE_NEON nfec_enc_demo.c nfec_enc.c nnet.c nfec_enc_data.c nfec_stats_data.c kiss99.c -g -o nfec_enc_demo */
\ No newline at end of file
diff --git a/dnn/training_tf2/dump_nfec_model.py b/dnn/training_tf2/dump_nfec_model.py
index 9f0768a8773c7dc6cad1e1fb9fe90ca4fe86a33a..1f016467376a65d82cb72df52256b22f53582fb3 100644
--- a/dnn/training_tf2/dump_nfec_model.py
+++ b/dnn/training_tf2/dump_nfec_model.py
@@ -1,6 +1,7 @@
 import argparse
 import os
 
+os.environ['CUDA_VISIBLE_DEVICES'] = ""
 
 parser = argparse.ArgumentParser()
 
@@ -59,17 +60,17 @@ def dump_statistical_model(qembedding, f, fh):
     r               = 0.5 + 0.5 * tf.math.sigmoid(w[:, 4 * N : 5 * N]).numpy()
     theta           = tf.math.sigmoid(w[:, 5 * N : 6 * N]).numpy()
 
-    printVector(f, quant_scales[:], 'nfec_stats_quant_scales')
-    printVector(f, dead_zone_theta[:], 'nfec_stats_dead_zone_theta')
-    printVector(f, r, 'nfec_stats_r')
-    printVector(f, theta, 'nfec_stats_theta')
+    printVector(f, quant_scales[:], 'nfec_stats_quant_scales', static=False)
+    printVector(f, dead_zone_theta[:], 'nfec_stats_dead_zone_theta', static=False)
+    printVector(f, r, 'nfec_stats_r', static=False)
+    printVector(f, theta, 'nfec_stats_theta', static=False)
 
     fh.write(
 f"""
-extern float nfec_stats_quant_scales;
-extern float nfec_stats_dead_zone_theta;
-extern float nfec_stats_r;
-extern float nfec_stats_theta;
+extern const float nfec_stats_quant_scales[{levels * N}];
+extern const float nfec_stats_dead_zone_theta[{levels * N}];
+extern const float nfec_stats_r[{levels * N}];
+extern const float nfec_stats_theta[{levels * N}];
 
 """
     )
@@ -159,6 +160,7 @@ f"""
     header_fid.write(
 f"""
 #define NFEC_STATS_NUM_LEVELS {num_levels}
+#define NFEC_STATS_NUM_LATENTS {args.latent_dim}
 
 """
     )
@@ -171,3 +173,60 @@ f"""
     header_fid.close()
     source_fid.close()
 
+    # decoder
+    decoder_dense_names = [
+        'state1',
+        'state2',
+        'state3',
+        'dec_dense1',
+        'dec_dense3',
+        'dec_dense5',
+        'dec_dense7',
+        'dec_dense8',
+        'dec_final'
+    ]   
+
+    decoder_gru_names = [
+        'dec_dense2',
+        'dec_dense4',
+        'dec_dense6'
+    ] 
+
+    source_fid = open("nfec_dec_data.c", 'w')
+    header_fid = open("nfec_dec_data.h", 'w')
+
+    start_header(header_fid, "nfec_dec_data.h")
+    start_source(source_fid, "nfec_dec_data.h", os.path.basename(args.weights))
+
+    # some global constants
+    header_fid.write(
+f"""
+#define NFEC_DEC_NUM_FEATURES 20
+
+#define NFEC_DEC_LATENT_DIM {args.latent_dim}
+
+#define NFEC_DEC_MAX_RNN_NEURONS {max_rnn_neurons}
+
+
+"""
+    )
+
+
+    # dump GRUs
+    max_rnn_neurons = max(
+        [
+            dump_gru_layer(decoder.get_layer(name), source_fid, header_fid)
+            for name in decoder_gru_names
+        ]
+    )
+
+    # dump Dense layers
+    for name in decoder_dense_names:
+        layer = decoder.get_layer(name)
+        dump_dense_layer(layer, source_fid, header_fid)
+
+    finish_header(header_fid)
+    finish_source(source_fid)
+
+    header_fid.close()
+    source_fid.close()
\ No newline at end of file
diff --git a/dnn/training_tf2/keraslayerdump.py b/dnn/training_tf2/keraslayerdump.py
index 41c723ae616a0bebdb6888be7ea7c689b1f75dfa..3269c6b82d1ae000e20141bc1b931017e926617b 100644
--- a/dnn/training_tf2/keraslayerdump.py
+++ b/dnn/training_tf2/keraslayerdump.py
@@ -3,13 +3,16 @@
 import numpy as np
 
 
-def printVector(f, vector, name, dtype='float', dotp=False):
+def printVector(f, vector, name, dtype='float', dotp=False, static=True):
     """ prints vector as one-dimensional C array """
     if dotp:
         vector = vector.reshape((vector.shape[0]//4, 4, vector.shape[1]//8, 8))
         vector = vector.transpose((2, 0, 3, 1))
     v = np.reshape(vector, (-1))
-    f.write('static const {} {}[{}] = {{\n   '.format(dtype, name, len(v)))
+    if static:
+        f.write('static const {} {}[{}] = {{\n   '.format(dtype, name, len(v)))
+    else:
+        f.write('const {} {}[{}] = {{\n   '.format(dtype, name, len(v)))
     for i in range(0, len(v)):
         f.write('{}'.format(v[i]))
         if (i!=len(v)-1):