From 101fd2411a697eebbbcf1079006fb2af27e1ae86 Mon Sep 17 00:00:00 2001 From: Jan Buethe <jbuethe@amazon.de> Date: Mon, 24 Jul 2023 14:02:35 -0700 Subject: [PATCH] added dataset for SILK to LPCNet feature conversion --- dnn/torch/osce/data/silk_conversion_set.py | 132 +++++++++++++++++++++ 1 file changed, 132 insertions(+) create mode 100644 dnn/torch/osce/data/silk_conversion_set.py diff --git a/dnn/torch/osce/data/silk_conversion_set.py b/dnn/torch/osce/data/silk_conversion_set.py new file mode 100644 index 000000000..8f6467561 --- /dev/null +++ b/dnn/torch/osce/data/silk_conversion_set.py @@ -0,0 +1,132 @@ +""" +/* Copyright (c) 2023 Amazon + Written by Jan Buethe */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ +""" + +import os + +from torch.utils.data import Dataset +import numpy as np + +from utils.silk_features import silk_feature_factory +from utils.pitch import hangover, calculate_acorr_window + + +class SilkEnhancementSet(Dataset): + def __init__(self, + path, + frames_per_sample=100, + no_pitch_value=9, + acorr_radius=2, + pitch_hangover=8, + num_bands_clean_spec=64, + num_bands_noisy_spec=18, + noisy_spec_scale='opus', + noisy_apply_dct=True, + add_offset=False, + add_double_lag_acorr=False + ): + + assert frames_per_sample % 4 == 0 + + self.frame_size = 80 + self.frames_per_sample = frames_per_sample + self.no_pitch_value = no_pitch_value + self.acorr_radius = acorr_radius + self.pitch_hangover = pitch_hangover + self.num_bands_clean_spec = num_bands_clean_spec + self.num_bands_noisy_spec = num_bands_noisy_spec + self.noisy_spec_scale = noisy_spec_scale + self.add_double_lag_acorr = add_double_lag_acorr + + self.lpcs = np.fromfile(os.path.join(path, 'features_lpc.f32'), dtype=np.float32).reshape(-1, 16) + self.ltps = np.fromfile(os.path.join(path, 'features_ltp.f32'), dtype=np.float32).reshape(-1, 5) + self.periods = np.fromfile(os.path.join(path, 'features_period.s16'), dtype=np.int16) + self.gains = np.fromfile(os.path.join(path, 'features_gain.f32'), dtype=np.float32) + self.num_bits = np.fromfile(os.path.join(path, 'features_num_bits.s32'), dtype=np.int32) + self.num_bits_smooth = np.fromfile(os.path.join(path, 'features_num_bits_smooth.f32'), dtype=np.float32) + self.offsets = np.fromfile(os.path.join(path, 'features_offset.f32'), dtype=np.float32) + self.lpcnet_features = np.from_file(os.path.join(path, 'features_lpcnet.f32'), dtype=np.float32).reshape(-1, 36) + + self.coded_signal = np.fromfile(os.path.join(path, 'coded.s16'), dtype=np.int16) + + self.create_features = silk_feature_factory(no_pitch_value, + acorr_radius, + pitch_hangover, + num_bands_clean_spec, + num_bands_noisy_spec, + noisy_spec_scale, + noisy_apply_dct, + add_offset, + add_double_lag_acorr) + + self.history_len = 700 if add_double_lag_acorr else 350 + # discard some frames to have enough signal history + self.skip_frames = 4 * ((self.history_len + 319) // 320 + 2) + + num_frames = self.clean_signal.shape[0] // 80 - self.skip_frames + + self.len = num_frames // frames_per_sample + + def __len__(self): + return self.len + + def __getitem__(self, index): + + frame_start = self.frames_per_sample * index + self.skip_frames + frame_stop = frame_start + self.frames_per_sample + + signal_start = frame_start * self.frame_size - self.skip + signal_stop = frame_stop * self.frame_size - self.skip + + coded_signal = self.coded_signal[signal_start : signal_stop].astype(np.float32) / 2**15 + + coded_signal_history = self.coded_signal[signal_start - self.history_len : signal_start].astype(np.float32) / 2**15 + + features, periods = self.create_features( + coded_signal, + coded_signal_history, + self.lpcs[frame_start : frame_stop], + self.gains[frame_start : frame_stop], + self.ltps[frame_start : frame_stop], + self.periods[frame_start : frame_stop], + self.offsets[frame_start : frame_stop] + ) + + lpcnet_features = self.lpcnet_features[frame_start // 2 : frame_stop // 2, :20] + + num_bits = np.repeat(self.num_bits[frame_start // 4 : frame_stop // 4], 4).astype(np.float32).reshape(-1, 1) + num_bits_smooth = np.repeat(self.num_bits_smooth[frame_start // 4 : frame_stop // 4], 4).astype(np.float32).reshape(-1, 1) + + numbits = np.concatenate((num_bits, num_bits_smooth), axis=-1) + + return { + 'silk_features' : features, + 'periods' : periods.astype(np.int64), + 'numbits' : numbits.astype(np.float32), + 'lpcnet_features' : lpcnet_features + } -- GitLab