This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
AUDIO_CHUNK_SIZE = 1024 | |
RATE = 44100 | |
# build the model | |
kernel_size = (1,5) | |
input_audio = Input(shape=(1, AUDIO_CHUNK_SIZE, 1)) | |
net = Convolution2D(filters=64, kernel_size=kernel_size,activation='relu',padding='same')(input_audio) | |
net = AveragePooling2D((1,2))(net) | |
net = Convolution2D(filters=32, kernel_size=kernel_size,activation='relu',padding='same')(net) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# lets build an audio dataset of random sinus chunks! | |
def sinus_dataset_generator(num_examples, fs, samples, frequency_range): | |
"""Builds a dataset of sinus. | |
Args: | |
num_examples: number of examples to generate (int) | |
fs: sample rate of the sinus | |
samples: number of samples to generate (int) | |
frequency_range: a list of two values defining [lower, upper] frequency range (int) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# compile the model with adam optimizer and mean squeared error as its loss function | |
autoencoder.compile(optimizer='adam', loss='mse', metrics=['mse']) | |
# some constants for training | |
BATCH_SIZE = 32 | |
SHUFFLE_BUFFER_SIZE = 100 | |
# shuffle and batch the examples | |
train_dataset = train_dataset.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE) | |
eval_dataset = eval_dataset.batch(BATCH_SIZE) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import google.auth | |
from google.auth.transport import requests | |
# check the scopes you need at https://developers.google.com/oauthplayground in the api list | |
SCOPES = ['https://www.googleapis.com/auth/devstorage.full_control'] | |
# env variable GOOGLE_APPLICATION_CREDENTIALS has to be set with service account key.json! | |
credentials, project_id = google.auth.default(scopes=SCOPES) | |
http_request = requests.Request() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Subpixel1D(tf.keras.layers.Layer): | |
def __init__(self, | |
r, | |
**kwargs): | |
super(Subpixel1D, self).__init__(**kwargs) | |
self.r = r | |
def build(self, input_shape): | |
# check if channels are evenly divisible for subpixel1d to work! | |
input_shape = tf.TensorShape(input_shape).as_list() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# down- and up-sampling by a factor of 4 | |
strides = 4 | |
inputs = tf.keras.Input(shape=(16384, 1)) | |
d = tf.keras.layers.Conv1D(16, kernel_size=64, strides=strides, | |
padding='same', activation='elu', | |
kernel_initializer='he_normal')(inputs) | |
d = tf.keras.layers.Conv1D(32, kernel_size=32, strides=strides, padding='same', | |
activation='elu', kernel_initializer='he_normal')(d) | |
d = tf.keras.layers.Conv1D(64, kernel_size=16, strides=strides, padding='same', | |
activation='elu', kernel_initializer='he_normal')(d) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
dataset, info = tfds.load( | |
'ljspeech', split='train', | |
download=True, with_info=True) | |
# transform int16 audio to float32 in [-1, 1] | |
dataset = dataset.map( | |
lambda example_dict: tf.cast( | |
example_dict['speech'], tf.float32 | |
) / 32767, | |
num_parallel_calls=AUTOTUNE) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
sox_effects = { | |
'compand': {}, | |
'chorus': {}, | |
'highpass': {'frequency': 100}, | |
'lowpass': {'frequency': 8000}, | |
'phaser': {}, | |
'reverb': {} | |
} | |
def get_sox_effect( |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_pb_effect( | |
effect_type: str | |
) -> Callable[[tf.Tensor], np.ndarray]: | |
def pb_effect(y: tf.Tensor) -> np.ndarray: | |
y = y.numpy() | |
effect = getattr(pb, effect_type)() | |
y_out = effect(y, sample_rate=SR) | |
return y_out | |
return pb_effect |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
sox_dataset = dataset.map( | |
lambda speech: tf.py_function( | |
get_sox_effect('reverb', {}), | |
[speech], | |
tf.float32) | |
# num_parallel_calls=AUTOTUNE | |
) |
OlderNewer