Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
import os
import pickle
from glob import iglob
import numpy as np
import librosa
DATA_AUDIO_DIR = './audio'
TARGET_SR = 8000
OUTPUT_DIR = './output'
OUTPUT_DIR_TRAIN = os.path.join(OUTPUT_DIR, 'train')
OUTPUT_DIR_TEST = os.path.join(OUTPUT_DIR, 'test')
AUDIO_LENGTH = 10000
class_ids = {
'normal': 0,
'murmur': 1,
'extrahls': 2,
'artifact': 3,
'unlabelled': 4,
}
def extract_class_id(wav_filename):
if 'normal' in wav_filename:
return class_ids.get('normal')
elif 'murmur' in wav_filename:
return class_ids.get('murmur')
elif 'extrahls' in wav_filename:
return class_ids.get('extrahls')
elif 'artifact' in wav_filename:
return class_ids.get('artifact')
elif 'unlabelled' in wav_filename:
return class_ids.get('unlabelled')
else:
return class_ids.get('unlabelled')
def read_audio_from_filename(filename, target_sr):
audio, _ = librosa.load(filename, sr=target_sr, mono=True)
audio = audio.reshape(-1, 1)
return audio
def convert_data():
for i, wav_filename in enumerate(iglob(os.path.join(DATA_AUDIO_DIR, '**/**.wav'), recursive=True)):
class_id = extract_class_id(wav_filename)
audio_buf = read_audio_from_filename(wav_filename, target_sr=TARGET_SR)
# normalize mean 0, variance 1
audio_buf = (audio_buf - np.mean(audio_buf)) / np.std(audio_buf)
original_length = len(audio_buf)
print(i, wav_filename, original_length, np.round(np.mean(audio_buf), 4), np.std(audio_buf))
if original_length < AUDIO_LENGTH:
audio_buf = np.concatenate((audio_buf, np.zeros(shape=(AUDIO_LENGTH - original_length, 1))))
print('PAD New length =', len(audio_buf))
elif original_length > AUDIO_LENGTH:
audio_buf = audio_buf[0:AUDIO_LENGTH]
print('CUT New length =', len(audio_buf))
output_folder = OUTPUT_DIR_TRAIN
if i // 50 == 0:
output_folder = OUTPUT_DIR_TEST
output_filename = os.path.join(output_folder, str(i) + '.pkl')
out = {'class_id': class_id,
'audio': audio_buf,
'sr': TARGET_SR}
with open(output_filename, 'wb') as w:
pickle.dump(out, w)
if __name__ == '__main__':
convert_data()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.