2024-05-24 22:01:59 +02:00
|
|
|
import numpy as np
|
|
|
|
from scipy.io import wavfile
|
|
|
|
import urllib.request
|
|
|
|
import zipfile
|
2024-05-24 23:02:24 +02:00
|
|
|
import os
|
2024-05-24 22:01:59 +02:00
|
|
|
|
2024-05-25 17:44:12 +02:00
|
|
|
def download_and_extract_data(url):
|
|
|
|
if not os.path.exists('data'):
|
|
|
|
zip_path = os.path.join('.', 'data.zip')
|
2024-05-24 22:01:59 +02:00
|
|
|
urllib.request.urlretrieve(url, zip_path)
|
|
|
|
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
|
2024-05-25 17:44:12 +02:00
|
|
|
zip_ref.extractall('.')
|
2024-05-24 22:01:59 +02:00
|
|
|
os.remove(zip_path)
|
|
|
|
|
|
|
|
def load_wav(file_path):
|
|
|
|
sample_rate, data = wavfile.read(file_path)
|
|
|
|
return sample_rate, data
|
|
|
|
|
2024-05-25 17:31:08 +02:00
|
|
|
def load_all_wavs(data_dir, cut_length=None):
|
2024-05-24 22:01:59 +02:00
|
|
|
wav_files = [os.path.join(data_dir, f) for f in os.listdir(data_dir) if f.endswith('.wav')]
|
|
|
|
all_data = []
|
|
|
|
for file_path in wav_files:
|
|
|
|
_, data = load_wav(file_path)
|
2024-05-25 20:27:54 +02:00
|
|
|
if cut_length is not None:
|
|
|
|
print(cut_length)
|
2024-05-25 17:31:08 +02:00
|
|
|
data = data[:cut_length]
|
2024-05-24 22:01:59 +02:00
|
|
|
all_data.append(data)
|
|
|
|
return all_data
|
|
|
|
|
2024-05-25 17:31:08 +02:00
|
|
|
def compute_correlation_matrix(data):
|
|
|
|
num_leads = len(data)
|
2024-05-25 20:27:54 +02:00
|
|
|
min_length = min(len(d) for d in data)
|
|
|
|
|
|
|
|
# Trim all leads to the minimum length
|
|
|
|
trimmed_data = [d[:min_length] for d in data]
|
|
|
|
|
|
|
|
corr_matrix = np.corrcoef(trimmed_data)
|
2024-05-25 21:39:57 +02:00
|
|
|
np.fill_diagonal(corr_matrix, 0)
|
2024-05-25 17:31:08 +02:00
|
|
|
return corr_matrix
|
2024-05-24 22:01:59 +02:00
|
|
|
|
2024-05-25 17:31:08 +02:00
|
|
|
def split_data_by_time(data, split_ratio=0.5):
|
|
|
|
train_data = []
|
|
|
|
test_data = []
|
|
|
|
for lead in data:
|
|
|
|
split_idx = int(len(lead) * split_ratio)
|
|
|
|
train_data.append(lead[:split_idx])
|
|
|
|
test_data.append(lead[split_idx:])
|
|
|
|
return train_data, test_data
|