Created
October 27, 2016 22:41
-
-
Save MechCoder/ef30cc36c28c08d3f5a84ca6683f6205 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import numpy as np | |
import scipy.io.wavfile as wav | |
def convert_mp3_to_wav(folder, sampling_freq=44.1): | |
""" | |
Converts a directory with mp3 files to wav files. | |
Arguments | |
--------- | |
folder - A folder with all .mp3 files with no spaces | |
in between the | |
sampling freq - No of samples needed per second. | |
""" | |
for filename in os.listdir(folder): | |
if filename.endswith(".mp3"): | |
from_path = os.path.join(folder, filename) | |
mono_path = from_path[:-4] + "mono.mp3" | |
wav_path = os.path.join(folder, from_path[:-3] + "wav") | |
# Converts stereo to mono | |
os.system("lame -a -m m %s %s" % (from_path, mono_path)) | |
# Converts mono to wav with the given sampling frequency. | |
os.system( | |
"lame --decode %s %s --resample %s" % | |
(mono_path, wav_path, str(sampling_freq))) | |
def time_blocks_to_fft_blocks(time_blocks): | |
""" | |
Converts arrays from the time domain to the frequency domain. | |
""" | |
fft_blocks = [] | |
for block in time_blocks: | |
fft_block = np.fft.fft(block) | |
new_block = np.concatenate((np.real(fft_block), np.imag(fft_block))) | |
fft_blocks.append(new_block) | |
return np.array(fft_blocks) | |
def convert_wav_to_freq_domain(wavfile, block_size=None): | |
""" | |
Converts a wavefile to the frequency domain that can be input to a RNN. | |
Arguments | |
--------- | |
wavfile - A file with extension .wav | |
block_size - Size of a block | |
If not provided, set to frequency / 4. | |
Returns | |
------- | |
x_freq- shape=(_, 2*block_size) | |
Returns x_freq with second dimension of shape block_size. | |
y_freq- shape=(_, 2*block_size) | |
Returns x_freq but shifted by 1 unit. | |
""" | |
freq, amplitude = wav.read(wavfile) | |
amplitude = np.asarray(amplitude, dtype=np.float32) | |
# Normalize to [-1, 1] | |
amplitude /= 32678.0 | |
if block_size is None: | |
block_size = freq // 4 | |
# Split into sub-arrays of size block_size: | |
split_indices = np.arange(block_size, amplitude.shape[0], block_size) | |
block_arrays = np.split(amplitude, split_indices) | |
last_shape = len(block_arrays[-1]) | |
padded = np.zeros(block_size) | |
padded[:last_shape] = block_arrays[-1] | |
block_arrays[-1] = padded | |
x = block_arrays | |
y = block_arrays[1:] | |
y.append(np.zeros(block_size)) | |
x_freq = time_blocks_to_fft_blocks(x) | |
y_freq = time_blocks_to_fft_blocks(y) | |
return x_freq, y_freq | |
def generate_tensors(directory, max_seq_len=40, block_size=None): | |
""" | |
Generate tensors of shape (num_examples, max_seq_len, block_size) | |
where seq_length is the number of time steps. | |
Arguments | |
--------- | |
directory - A directory that has .wav files in it | |
max_seq_len - If you want a clip of x seconds set this to | |
(block_size / sample_frequency * x) | |
""" | |
X_chunks = [] | |
y_chunks = [] | |
for wav_file in os.listdir(directory): | |
if wav_file.endswith(".wav"): | |
x_freq, y_freq = convert_wav_to_freq_domain( | |
os.path.join(directory, wav_file)) | |
splits = np.arange(max_seq_len, x_freq.shape[0], max_seq_len) | |
# Split x_freq into 2-D arrays of size (max_seq_len, block_size) | |
x_freq_chunks = np.split(x_freq, splits, axis=0)[:-1] | |
print(np.asarray(x_freq_chunks).shape) | |
X_chunks.extend(x_freq_chunks) | |
y_freq_chunks = np.split(y_freq, splits, axis=0)[:-1] | |
print(np.asarray(y_freq_chunks).shape) | |
y_chunks.extend(y_freq_chunks) | |
return np.asarray(X_chunks), np.asarray(y_chunks) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment