From 8bdbbfa18d4697be7ba9fc47176809d669d33f77 Mon Sep 17 00:00:00 2001
From: Jean-Marc Valin <jmvalin@amazon.com>
Date: Fri, 16 Jul 2021 03:07:26 -0400
Subject: [PATCH] Support for sparse GRU B input matrices

Only on the C side, no sparse GRU B training yet
---
 dnn/nnet.c                      |  2 +-
 dnn/nnet.h                      |  1 +
 dnn/training_tf2/dump_lpcnet.py | 27 ++++++++++++---------------
 3 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/dnn/nnet.c b/dnn/nnet.c
index 566268ec1..3513a6062 100644
--- a/dnn/nnet.c
+++ b/dnn/nnet.c
@@ -323,7 +323,7 @@ void compute_gruB(const GRULayer *gru, const float* gru_b_condition, float *stat
    for (i=0;i<3*N;i++)
       zrh[i] = gru->bias[i] + gru_b_condition[i];
 #endif
-   sgemv_accum8x4(zrh, gru->input_weights, 3*N, M, stride, input);
+   sparse_sgemv_accum8x4(zrh, gru->input_weights, 3*N, M, gru->input_weights_idx, input);
    for (i=0;i<3*N;i++)
       recur[i] = gru->bias[3*N + i];
    sgemv_accum(recur, gru->recurrent_weights, 3*N, N, stride, state);
diff --git a/dnn/nnet.h b/dnn/nnet.h
index b7e9b6905..0c06280dd 100644
--- a/dnn/nnet.h
+++ b/dnn/nnet.h
@@ -58,6 +58,7 @@ typedef struct {
   const float *bias;
   const float *subias;
   const qweight *input_weights;
+  const int *input_weights_idx;
   const float *recurrent_weights;
   int nb_inputs;
   int nb_neurons;
diff --git a/dnn/training_tf2/dump_lpcnet.py b/dnn/training_tf2/dump_lpcnet.py
index 6980d28b9..93920eecd 100755
--- a/dnn/training_tf2/dump_lpcnet.py
+++ b/dnn/training_tf2/dump_lpcnet.py
@@ -60,18 +60,20 @@ def printVector(f, vector, name, dtype='float', dotp=False):
     f.write('\n};\n\n')
     return;
 
-def printSparseVector(f, A, name):
+def printSparseVector(f, A, name, have_diag=True):
     N = A.shape[0]
+    M = A.shape[1]
     W = np.zeros((0,), dtype='int')
     W0 = np.zeros((0,))
-    diag = np.concatenate([np.diag(A[:,:N]), np.diag(A[:,N:2*N]), np.diag(A[:,2*N:])])
-    A[:,:N] = A[:,:N] - np.diag(np.diag(A[:,:N]))
-    A[:,N:2*N] = A[:,N:2*N] - np.diag(np.diag(A[:,N:2*N]))
-    A[:,2*N:] = A[:,2*N:] - np.diag(np.diag(A[:,2*N:]))
+    if have_diag:
+        diag = np.concatenate([np.diag(A[:,:N]), np.diag(A[:,N:2*N]), np.diag(A[:,2*N:])])
+        A[:,:N] = A[:,:N] - np.diag(np.diag(A[:,:N]))
+        A[:,N:2*N] = A[:,N:2*N] - np.diag(np.diag(A[:,N:2*N]))
+        A[:,2*N:] = A[:,2*N:] - np.diag(np.diag(A[:,2*N:]))
+        printVector(f, diag, name + '_diag')
     AQ = np.minimum(127, np.maximum(-128, np.round(A*128))).astype('int')
-    printVector(f, diag, name + '_diag')
     idx = np.zeros((0,), dtype='int')
-    for i in range(3*N//8):
+    for i in range(M//8):
         pos = idx.shape[0]
         idx = np.append(idx, -1)
         nb_nonzero = 0
@@ -131,12 +133,7 @@ def dump_grub(self, f, hf, gru_a_size):
     name = self.name
     print("printing layer " + name + " of type " + self.__class__.__name__)
     weights = self.get_weights()
-    f.write('#ifdef DOT_PROD\n')
-    qweight = np.clip(np.round(128.*weights[0][:gru_a_size, :]).astype('int'), -128, 127)
-    printVector(f, qweight, name + '_weights', dotp=True, dtype='qweight')
-    f.write('#else /*DOT_PROD*/\n')
-    printVector(f, weights[0][:gru_a_size, :], name + '_weights')
-    f.write('#endif /*DOT_PROD*/\n')
+    qweight = printSparseVector(f, weights[0][:gru_a_size, :], name + '_weights', have_diag=False)
     printVector(f, weights[1], name + '_recurrent_weights')
     printVector(f, weights[-1], name + '_bias')
     subias = weights[-1].copy()
@@ -152,8 +149,8 @@ def dump_grub(self, f, hf, gru_a_size):
         reset_after = 1
     neurons = weights[0].shape[1]//3
     max_rnn_neurons = max(max_rnn_neurons, neurons)
-    f.write('const GRULayer {} = {{\n   {}_bias,\n   {}_subias,\n   {}_weights,\n   {}_recurrent_weights,\n   {}, {}, ACTIVATION_{}, {}\n}};\n\n'
-            .format(name, name, name, name, name, gru_a_size, weights[0].shape[1]//3, activation, reset_after))
+    f.write('const GRULayer {} = {{\n   {}_bias,\n   {}_subias,\n   {}_weights,\n   {}_weights_idx,\n   {}_recurrent_weights,\n   {}, {}, ACTIVATION_{}, {}\n}};\n\n'
+            .format(name, name, name, name, name, name, gru_a_size, weights[0].shape[1]//3, activation, reset_after))
     hf.write('extern const GRULayer {};\n\n'.format(name));
     return True
 
-- 
GitLab