Add resampling/downmix support to DRED encoder

8k, 12k and stereo are mostly untested

Add resampling/downmix support to DRED encoder
0dad5e06 · Jean-Marc Valin · 8b42b006 · 0dad5e06 · 0dad5e06
Verified Commit 0dad5e06 authored 1 year ago by Jean-Marc Valin
--- a/silk/dred_encoder.c
+++ b/silk/dred_encoder.c
@@ -85,25 +85,104 @@ static void dred_process_frame(DREDEnc *enc)
    enc->latents_buffer_fill = IMIN(enc->latents_buffer_fill+1, DRED_NUM_REDUNDANCY_FRAMES);
 }

+void filter_df2t(const float *in, float *out, int len, float b0, const float *b, const float *a, int order, float *mem)
+{
+    int i;
+    for (i=0;i<len;i++) {
+        int j;
+        float xi, yi, nyi;
+        xi = in[i];
+        yi = xi*b0 + mem[0];
+        nyi = -yi;
+        for (j=0;j<order;j++)
+        {
+           mem[j] = mem[j+1] + b[j]*xi + a[j]*nyi;
+        }
+        out[i] = yi;
+        /*fprintf(stdout, "%f\n", out[i]);*/
+    }
+}
+
+#define MAX_DOWNMIX_BUFFER (960*2)
+static void dred_convert_to_16k(DREDEnc *enc, const float *in, int in_len, float *out, int out_len)
+{
+    float downmix[MAX_DOWNMIX_BUFFER];
+    int i;
+    int up;
+    celt_assert(enc->channels*in_len <= MAX_DOWNMIX_BUFFER);
+    celt_assert(in_len * (opus_int32)16000 == out_len * enc->Fs);
+    switch(enc->Fs) {
+        case 8000:
+            up = 2;
+            break;
+        case 12000:
+            up = 4;
+            break;
+        case 16000:
+            up = 1;
+            break;
+        case 24000:
+            up = 2;
+            break;
+        case 48000:
+            up = 1;
+            break;
+        default:
+            celt_assert(0);
+    }
+    OPUS_CLEAR(downmix, up*in_len);
+    if (enc->channels == 1) {
+        for (i=0;i<in_len;i++) downmix[up*i] = FLOAT2INT16(up*in[i]);
+    } else {
+        for (i=0;i<in_len;i++) downmix[up*i] = FLOAT2INT16(.5*up*(in[2*i]+in[2*i+1]));
+    }
+    if (enc->Fs == 16000) {
+        OPUS_COPY(out, downmix, out_len);
+    } else if (enc->Fs == 48000 || enc->Fs == 24000) {
+        /* ellip(7, .2, 70, 7750/24000) */
+
+        static const float filter_b[8] = { 0.005873358047f,  0.012980854831f, 0.014531340042f,  0.014531340042f, 0.012980854831f,  0.005873358047f, 0.004523418224f, 0.f};
+        static const float filter_a[8] = {-3.878718597768f, 7.748834257468f, -9.653651699533f, 8.007342726666f, -4.379450178552f, 1.463182111810f, -0.231720677804f, 0.f};
+        float b0 = 0.004523418224f;
+        filter_df2t(downmix, downmix, up*in_len, b0, filter_b, filter_a, RESAMPLING_ORDER, enc->resample_mem);
+        for (i=0;i<out_len;i++) out[i] = downmix[3*i];
+    } else if (enc->Fs == 12000) {
+        /* ellip(7, .2, 70, 7750/24000) */
+        static const float filter_b[8] = {-0.001017101081f,  0.003673127243f,   0.001009165267f,  0.001009165267f,  0.003673127243f, -0.001017101081f,  0.002033596776f, 0.f};
+        static const float filter_a[8] = {-4.930414411612f, 11.291643096504f, -15.322037343815f, 13.216403930898f, -7.220409219553f,  2.310550142771f, -0.334338618782f, 0.f};
+        float b0 = 0.002033596776f;
+        filter_df2t(downmix, downmix, up*in_len, b0, filter_b, filter_a, RESAMPLING_ORDER, enc->resample_mem);
+        for (i=0;i<out_len;i++) out[i] = downmix[3*i];
+    } else if (enc->Fs == 8000) {
+        /* ellip(7, .2, 70, 3900/8000) */
+        static const float filter_b[8] = { 0.081670120929f, 0.180401598565f,  0.259391051971f, 0.259391051971f,  0.180401598565f, 0.081670120929f,  0.020109185709f, 0.f};
+        static const float filter_a[8] = {-1.393651933659f, 2.609789872676f, -2.403541968806f, 2.056814957331f, -1.148908574570f, 0.473001413788f, -0.110359852412f, 0.f};
+        float b0 = 0.020109185709f;
+        filter_df2t(downmix, out, out_len, b0, filter_b, filter_a, RESAMPLING_ORDER, enc->resample_mem);
+    } else {
+        celt_assert(0);
+    }
+}
+
 void dred_compute_latents(DREDEnc *enc, const float *pcm, int frame_size)
 {
    int frame_size16k = frame_size * 16000 / enc->Fs;
    while (frame_size16k > 0) {
-        int i;
        int process_size16k;
        int process_size;
-        process_size16k = IMIN(2*DRED_FRAME_SIZE - enc->input_buffer_fill, frame_size16k);
+        process_size16k = IMIN(2*DRED_FRAME_SIZE, frame_size16k);
        process_size = process_size16k * enc->Fs / 16000;
-        for (i=0;i<process_size16k;i++) enc->input_buffer[enc->input_buffer_fill+i] = FLOAT2INT16(pcm[i]);
+        dred_convert_to_16k(enc, pcm, process_size, &enc->input_buffer[enc->input_buffer_fill], process_size16k);
        enc->input_buffer_fill += process_size16k;
-        if (enc->input_buffer_fill == 2*DRED_FRAME_SIZE)
+        if (enc->input_buffer_fill >= 2*DRED_FRAME_SIZE)
        {
          dred_process_frame(enc);
-          enc->input_buffer_fill = 0;
+          enc->input_buffer_fill -= 2*DRED_FRAME_SIZE;
+          OPUS_MOVE(&enc->input_buffer[0], &enc->input_buffer[2*DRED_FRAME_SIZE], enc->input_buffer_fill);
        }

        pcm += process_size;
-        frame_size16k -= process_size;
+        frame_size16k -= process_size16k;
    }
 }


--- a/silk/dred_encoder.h
+++ b/silk/dred_encoder.h
@@ -36,17 +36,20 @@
 #include "lpcnet/src/dred_rdovae_enc.h"
 #include "lpcnet/src/dred_rdovae_enc_data.h"

+#define RESAMPLING_ORDER 8
+
 typedef struct {
    RDOVAEEnc model;
    opus_int32 Fs;
    int channels;

 #define DREDENC_RESET_START input_buffer
-    float input_buffer[DRED_DFRAME_SIZE];
+    float input_buffer[2*DRED_DFRAME_SIZE];
    int input_buffer_fill;
    float latents_buffer[DRED_MAX_FRAMES * DRED_LATENT_DIM];
    int latents_buffer_fill;
    float state_buffer[24];
+    float resample_mem[RESAMPLING_ORDER + 1];
    LPCNetEncState lpcnet_enc_state;
    RDOVAEEncState rdovae_enc;
 } DREDEnc;