Skip to content

Instantly share code, notes, and snippets.

@MechCoder
Created October 27, 2016 22:41
Show Gist options
  • Save MechCoder/ef30cc36c28c08d3f5a84ca6683f6205 to your computer and use it in GitHub Desktop.
Save MechCoder/ef30cc36c28c08d3f5a84ca6683f6205 to your computer and use it in GitHub Desktop.
import os
import numpy as np
import scipy.io.wavfile as wav
def convert_mp3_to_wav(folder, sampling_freq=44.1):
"""
Converts a directory with mp3 files to wav files.
Arguments
---------
folder - A folder with all .mp3 files with no spaces
in between the
sampling freq - No of samples needed per second.
"""
for filename in os.listdir(folder):
if filename.endswith(".mp3"):
from_path = os.path.join(folder, filename)
mono_path = from_path[:-4] + "mono.mp3"
wav_path = os.path.join(folder, from_path[:-3] + "wav")
# Converts stereo to mono
os.system("lame -a -m m %s %s" % (from_path, mono_path))
# Converts mono to wav with the given sampling frequency.
os.system(
"lame --decode %s %s --resample %s" %
(mono_path, wav_path, str(sampling_freq)))
def time_blocks_to_fft_blocks(time_blocks):
"""
Converts arrays from the time domain to the frequency domain.
"""
fft_blocks = []
for block in time_blocks:
fft_block = np.fft.fft(block)
new_block = np.concatenate((np.real(fft_block), np.imag(fft_block)))
fft_blocks.append(new_block)
return np.array(fft_blocks)
def convert_wav_to_freq_domain(wavfile, block_size=None):
"""
Converts a wavefile to the frequency domain that can be input to a RNN.
Arguments
---------
wavfile - A file with extension .wav
block_size - Size of a block
If not provided, set to frequency / 4.
Returns
-------
x_freq- shape=(_, 2*block_size)
Returns x_freq with second dimension of shape block_size.
y_freq- shape=(_, 2*block_size)
Returns x_freq but shifted by 1 unit.
"""
freq, amplitude = wav.read(wavfile)
amplitude = np.asarray(amplitude, dtype=np.float32)
# Normalize to [-1, 1]
amplitude /= 32678.0
if block_size is None:
block_size = freq // 4
# Split into sub-arrays of size block_size:
split_indices = np.arange(block_size, amplitude.shape[0], block_size)
block_arrays = np.split(amplitude, split_indices)
last_shape = len(block_arrays[-1])
padded = np.zeros(block_size)
padded[:last_shape] = block_arrays[-1]
block_arrays[-1] = padded
x = block_arrays
y = block_arrays[1:]
y.append(np.zeros(block_size))
x_freq = time_blocks_to_fft_blocks(x)
y_freq = time_blocks_to_fft_blocks(y)
return x_freq, y_freq
def generate_tensors(directory, max_seq_len=40, block_size=None):
"""
Generate tensors of shape (num_examples, max_seq_len, block_size)
where seq_length is the number of time steps.
Arguments
---------
directory - A directory that has .wav files in it
max_seq_len - If you want a clip of x seconds set this to
(block_size / sample_frequency * x)
"""
X_chunks = []
y_chunks = []
for wav_file in os.listdir(directory):
if wav_file.endswith(".wav"):
x_freq, y_freq = convert_wav_to_freq_domain(
os.path.join(directory, wav_file))
splits = np.arange(max_seq_len, x_freq.shape[0], max_seq_len)
# Split x_freq into 2-D arrays of size (max_seq_len, block_size)
x_freq_chunks = np.split(x_freq, splits, axis=0)[:-1]
print(np.asarray(x_freq_chunks).shape)
X_chunks.extend(x_freq_chunks)
y_freq_chunks = np.split(y_freq, splits, axis=0)[:-1]
print(np.asarray(y_freq_chunks).shape)
y_chunks.extend(y_freq_chunks)
return np.asarray(X_chunks), np.asarray(y_chunks)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment