Skip to content

Instantly share code, notes, and snippets.

@iCorv
Last active January 15, 2020 17:31
Show Gist options
  • Save iCorv/1c3f526f05eb86522359ea86de6bacc4 to your computer and use it in GitHub Desktop.
Save iCorv/1c3f526f05eb86522359ea86de6bacc4 to your computer and use it in GitHub Desktop.
# lets build an audio dataset of random sinus chunks!
def sinus_dataset_generator(num_examples, fs, samples, frequency_range):
"""Builds a dataset of sinus.
Args:
num_examples: number of examples to generate (int)
fs: sample rate of the sinus
samples: number of samples to generate (int)
frequency_range: a list of two values defining [lower, upper] frequency range (int)
Returns:
A numpy array of sinus examples.
"""
# first example
sinus_data = (np.sin((2*np.pi*np.arange(samples)*440.0/fs) + 0.0)).astype(np.float32)
sinus_data = np.reshape(sinus_data, newshape=(1, 1, samples, 1))
for idx in range(0, num_examples-1):
# random frequency
f = np.random.randint(frequency_range[0], frequency_range[1])
# random phase shift
phase = np.random.random() * np.pi
# random gain
gain = np.random.uniform(0.5, 1.0)
sinus = (np.sin((2*np.pi*np.arange(samples)*f/fs) + phase) * gain).astype(np.float32)
# add some noise, mu = 0, sigma = 0.1
s = np.random.normal(0, 0.1, samples)
sinus = sinus + s
# bring it into shape for the model
sinus = np.reshape(sinus, newshape=(1, 1, samples, 1))
sinus_data = np.append(sinus_data, sinus, axis=0)
return sinus_data
sinus_data = sinus_dataset_generator(4000, RATE, AUDIO_CHUNK_SIZE, [30, 8000])
# split into train and eval dataset, roughly a 70/30 split
split = int(num_examples * 0.7)
train_dataset = tf.data.Dataset.from_tensor_slices((sinus_data[:split], sinus_data[:split]))
eval_dataset = tf.data.Dataset.from_tensor_slices((sinus_data[split:], sinus_data[split:]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment