corv iCorv

## simple_raw_audio_autoencoder.py
AUDIO_CHUNK_SIZE = 1024
RATE = 44100

# build the model
kernel_size = (1,5)
input_audio = Input(shape=(1, AUDIO_CHUNK_SIZE, 1))

net = Convolution2D(filters=64, kernel_size=kernel_size,activation='relu',padding='same')(input_audio)
net = AveragePooling2D((1,2))(net)
net = Convolution2D(filters=32, kernel_size=kernel_size,activation='relu',padding='same')(net)

## sinus_dataset.py
# lets build an audio dataset of random sinus chunks!

def sinus_dataset_generator(num_examples, fs, samples, frequency_range):
    """Builds a dataset of sinus.

    Args:
        num_examples: number of examples to generate (int)
        fs: sample rate of the sinus
        samples: number of samples to generate (int)
        frequency_range: a list of two values defining [lower, upper] frequency range (int)

## train_simple_autoencoder.py
# compile the model with adam optimizer and mean squeared error as its loss function
autoencoder.compile(optimizer='adam', loss='mse', metrics=['mse'])

# some constants for training
BATCH_SIZE = 32
SHUFFLE_BUFFER_SIZE = 100

# shuffle and batch the examples
train_dataset = train_dataset.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
eval_dataset = eval_dataset.batch(BATCH_SIZE)

## google_access_token.py
import google.auth
from google.auth.transport import requests

# check the scopes you need at https://developers.google.com/oauthplayground in the api list
SCOPES = ['https://www.googleapis.com/auth/devstorage.full_control']

# env variable GOOGLE_APPLICATION_CREDENTIALS has to be set with service account key.json!
credentials, project_id = google.auth.default(scopes=SCOPES)
http_request = requests.Request()

## Subpixel1D.py
class Subpixel1D(tf.keras.layers.Layer):
    def __init__(self,
                 r,
                 **kwargs):
        super(Subpixel1D, self).__init__(**kwargs)
        self.r = r

    def build(self, input_shape):
        # check if channels are evenly divisible for subpixel1d to work!
        input_shape = tf.TensorShape(input_shape).as_list()

## subpixel1d_cnn.py
# down- and up-sampling by a factor of 4
strides = 4
inputs = tf.keras.Input(shape=(16384, 1))
d = tf.keras.layers.Conv1D(16, kernel_size=64, strides=strides,
                           padding='same', activation='elu',
                           kernel_initializer='he_normal')(inputs)
d = tf.keras.layers.Conv1D(32, kernel_size=32, strides=strides, padding='same',
                           activation='elu', kernel_initializer='he_normal')(d)
d = tf.keras.layers.Conv1D(64, kernel_size=16, strides=strides, padding='same',
                           activation='elu', kernel_initializer='he_normal')(d)

## load_ljspeech.py
dataset, info = tfds.load(
    'ljspeech', split='train',
    download=True, with_info=True)

# transform int16 audio to float32 in [-1, 1]
dataset = dataset.map(
    lambda example_dict: tf.cast(
        example_dict['speech'], tf.float32
    ) / 32767,
    num_parallel_calls=AUTOTUNE)

## mappable_sox_fn.py
sox_effects = {
    'compand': {},
    'chorus': {},
    'highpass': {'frequency': 100},
    'lowpass': {'frequency': 8000},
    'phaser': {},
    'reverb': {}
}

def get_sox_effect(

## mappable_pb_fn.py
def get_pb_effect(
    effect_type: str
    ) -> Callable[[tf.Tensor], np.ndarray]:
    def pb_effect(y: tf.Tensor) -> np.ndarray:
        y = y.numpy()
        effect = getattr(pb, effect_type)()
        y_out = effect(y, sample_rate=SR)
        return y_out
    return pb_effect

## soxbindings_dataset_map.py
sox_dataset = dataset.map(
    lambda speech: tf.py_function(
        get_sox_effect('reverb', {}),
        [speech],
        tf.float32)
    # num_parallel_calls=AUTOTUNE
)
	AUDIO_CHUNK_SIZE = 1024
	RATE = 44100

	# build the model
	kernel_size = (1,5)
	input_audio = Input(shape=(1, AUDIO_CHUNK_SIZE, 1))

	net = Convolution2D(filters=64, kernel_size=kernel_size,activation='relu',padding='same')(input_audio)
	net = AveragePooling2D((1,2))(net)
	net = Convolution2D(filters=32, kernel_size=kernel_size,activation='relu',padding='same')(net)
	# lets build an audio dataset of random sinus chunks!

	def sinus_dataset_generator(num_examples, fs, samples, frequency_range):
	"""Builds a dataset of sinus.

	Args:
	num_examples: number of examples to generate (int)
	fs: sample rate of the sinus
	samples: number of samples to generate (int)
	frequency_range: a list of two values defining [lower, upper] frequency range (int)
	# compile the model with adam optimizer and mean squeared error as its loss function
	autoencoder.compile(optimizer='adam', loss='mse', metrics=['mse'])

	# some constants for training
	BATCH_SIZE = 32
	SHUFFLE_BUFFER_SIZE = 100

	# shuffle and batch the examples
	train_dataset = train_dataset.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
	eval_dataset = eval_dataset.batch(BATCH_SIZE)
	import google.auth
	from google.auth.transport import requests

	# check the scopes you need at https://developers.google.com/oauthplayground in the api list
	SCOPES = ['https://www.googleapis.com/auth/devstorage.full_control']

	# env variable GOOGLE_APPLICATION_CREDENTIALS has to be set with service account key.json!
	credentials, project_id = google.auth.default(scopes=SCOPES)
	http_request = requests.Request()
	class Subpixel1D(tf.keras.layers.Layer):
	def __init__(self,
	r,
	**kwargs):
	super(Subpixel1D, self).__init__(**kwargs)
	self.r = r

	def build(self, input_shape):
	# check if channels are evenly divisible for subpixel1d to work!
	input_shape = tf.TensorShape(input_shape).as_list()
	# down- and up-sampling by a factor of 4
	strides = 4
	inputs = tf.keras.Input(shape=(16384, 1))
	d = tf.keras.layers.Conv1D(16, kernel_size=64, strides=strides,
	padding='same', activation='elu',
	kernel_initializer='he_normal')(inputs)
	d = tf.keras.layers.Conv1D(32, kernel_size=32, strides=strides, padding='same',
	activation='elu', kernel_initializer='he_normal')(d)
	d = tf.keras.layers.Conv1D(64, kernel_size=16, strides=strides, padding='same',
	activation='elu', kernel_initializer='he_normal')(d)
	dataset, info = tfds.load(
	'ljspeech', split='train',
	download=True, with_info=True)

	# transform int16 audio to float32 in [-1, 1]
	dataset = dataset.map(
	lambda example_dict: tf.cast(
	example_dict['speech'], tf.float32
	) / 32767,
	num_parallel_calls=AUTOTUNE)
	sox_effects = {
	'compand': {},
	'chorus': {},
	'highpass': {'frequency': 100},
	'lowpass': {'frequency': 8000},
	'phaser': {},
	'reverb': {}
	}

	def get_sox_effect(
	def get_pb_effect(
	effect_type: str
	) -> Callable[[tf.Tensor], np.ndarray]:
	def pb_effect(y: tf.Tensor) -> np.ndarray:
	y = y.numpy()
	effect = getattr(pb, effect_type)()
	y_out = effect(y, sample_rate=SR)
	return y_out
	return pb_effect
	sox_dataset = dataset.map(
	lambda speech: tf.py_function(
	get_sox_effect('reverb', {}),
	[speech],
	tf.float32)
	# num_parallel_calls=AUTOTUNE
	)