Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
import os
import pickle
from glob import iglob
from shutil import rmtree
import numpy as np
from model_data import read_audio_from_filename
DATA_AUDIO_DIR = './audio'
TARGET_SR = 8000
OUTPUT_DIR = './output'
OUTPUT_DIR_TRAIN = os.path.join(OUTPUT_DIR, 'train')
OUTPUT_DIR_TEST = os.path.join(OUTPUT_DIR, 'test')
AUDIO_LENGTH = 10000
def mkdir_p(path):
import errno
try:
os.makedirs(path)
except OSError as exc:
if exc.errno == errno.EEXIST and os.path.isdir(path):
pass
else:
raise
def del_folder(path):
try:
rmtree(path)
except:
pass
del_folder(OUTPUT_DIR_TRAIN)
del_folder(OUTPUT_DIR_TEST)
mkdir_p(OUTPUT_DIR_TRAIN)
mkdir_p(OUTPUT_DIR_TEST)
class_ids = {
'normal': 0,
'murmur': 1,
'extrahls': 2,
'artifact': 3,
'unlabelled': 4,
}
def extract_class_id(wav_filename):
if 'normal' in wav_filename:
return class_ids.get('normal')
elif 'murmur' in wav_filename:
return class_ids.get('murmur')
elif 'extrahls' in wav_filename:
return class_ids.get('extrahls')
elif 'artifact' in wav_filename:
return class_ids.get('artifact')
elif 'unlabelled' in wav_filename:
return class_ids.get('unlabelled')
else:
return class_ids.get('unlabelled')
def convert_data():
for i, wav_filename in enumerate(iglob(os.path.join(DATA_AUDIO_DIR, '**/**.wav'), recursive=True)):
class_id = extract_class_id(wav_filename)
audio_buf = read_audio_from_filename(wav_filename, target_sr=TARGET_SR)
# normalize mean 0, variance 1
audio_buf = (audio_buf - np.mean(audio_buf)) / np.std(audio_buf)
original_length = len(audio_buf)
print(i, wav_filename, original_length, np.round(np.mean(audio_buf), 4), np.std(audio_buf))
if original_length < AUDIO_LENGTH:
audio_buf = np.concatenate((audio_buf, np.zeros(shape=(AUDIO_LENGTH - original_length, 1))))
print('PAD New length =', len(audio_buf))
elif original_length > AUDIO_LENGTH:
audio_buf = audio_buf[0:AUDIO_LENGTH]
print('CUT New length =', len(audio_buf))
output_folder = OUTPUT_DIR_TRAIN
output_filename = os.path.join(output_folder, str(i) + '.pkl')
out = {'class_id': class_id,
'audio': audio_buf,
'sr': TARGET_SR}
with open(output_filename, 'wb') as w:
pickle.dump(out, w)
if __name__ == '__main__':
convert_data()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment