From 17ff693ee5561b78c7ff62d85647f7e4eb360db3 Mon Sep 17 00:00:00 2001 From: Dominik Roth Date: Mon, 27 May 2024 17:00:11 +0200 Subject: [PATCH] Better unfuckify wavs --- data_processing.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/data_processing.py b/data_processing.py index b4b81d3..311cbff 100644 --- a/data_processing.py +++ b/data_processing.py @@ -24,7 +24,7 @@ def load_all_wavs(data_dir, cut_length=None): if cut_length is not None: print(cut_length) data = data[:cut_length] - all_data.append(unfuckify(data)) + all_data.append(data) return all_data def save_wav(file_path, data, sample_rate=19531): @@ -33,10 +33,9 @@ def save_wav(file_path, data, sample_rate=19531): def save_all_wavs(output_dir, all_data, input_filenames): for data, filename in zip(all_data, input_filenames): output_file_path = os.path.join(output_dir, filename) - save_wav(output_file_path, refuckify(data)) + save_wav(output_file_path, data) def compute_topology_metrics(data): - num_leads = len(data) min_length = min(len(d) for d in data) # Trim all leads to the minimum length @@ -58,7 +57,12 @@ def split_data_by_time(data, split_ratio=0.5): def unfuckify(nums): return np.round((nums + 33) / 64).astype(int) +def unfuckify_all(wavs): + return [unfuckify(wav) for wav in wavs] + # The released dataset is 10bit resolution encoded in a 16bit range with a completely fucked up mapping, which we have to replicate for lossless fml +# This func works for all samples contained in the provided dataset, but I don't guarentee it works for all possible data +# The solution would be to just never fuck up the data (operate on the true 10bit values) def refuckify(nums): n = np.round((nums * 64) - 32).astype(int) n[n >= 32] -= 1 @@ -73,4 +77,7 @@ def refuckify(nums): n[n <= -32770] -= -2 n[n <= -32832] -= -65599 - return n \ No newline at end of file + return n + +def refuckify_all(wavs): + return [refuckify(wav) for wav in wavs] \ No newline at end of file