Skip to content

Instantly share code, notes, and snippets.

@dschwertfeger
Created March 21, 2020 05:29
Show Gist options
  • Save dschwertfeger/427f9dcc7058147507a85dfbc5bb14bf to your computer and use it in GitHub Desktop.
Save dschwertfeger/427f9dcc7058147507a85dfbc5bb14bf to your computer and use it in GitHub Desktop.
An example of a neural network that uses a custom LogMelSpectrogram layer
def ConvModel(n_classes, sample_rate=16000, duration=4,
fft_size=_FFT_SIZE, hop_size=_HOP_SIZE, n_mels=_N_MEL_BINS):
n_samples = sample_rate * duration
# Accept raw audio data as input
x = Input(shape=(n_samples,), name='input', dtype='float32')
# Process into log-mel-spectrograms. (This is your custom layer!)
y = LogMelSpectrogram(sample_rate, fft_size, hop_size, n_mels)(x)
# Normalize data (on frequency axis)
y = BatchNormalization(axis=2)(y)
y = Conv2D(32, (3, n_mels), activation='relu')(y)
y = BatchNormalization()(y)
y = MaxPool2D((1, y.shape[2]))(y)
y = Conv2D(32, (3, 1), activation='relu')(y)
y = BatchNormalization()(y)
y = MaxPool2D(pool_size=(2, 1))(y)
y = Flatten()(y)
y = Dense(64, activation='relu')(y)
y = Dropout(0.25)(y)
y = Dense(n_classes, activation='softmax')(y)
return Model(inputs=x, outputs=y)
model = ConvModel(11)
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['sparse_categorical_accuracy'])
model.summary()
# Model: "model"
# _________________________________________________________________
# Layer (type) Output Shape Param #
# =================================================================
# input (InputLayer) [(None, 64000)] 0
# _________________________________________________________________
# log_mel_spectrogram_4 (LogMe (None, 124, 64, 1) 0
# _________________________________________________________________
# batch_normalization (BatchNo (None, 124, 64, 1) 256
# _________________________________________________________________
# conv2d (Conv2D) (None, 122, 1, 32) 6176
# _________________________________________________________________
# batch_normalization_1 (Batch (None, 122, 1, 32) 128
# _________________________________________________________________
# max_pooling2d (MaxPooling2D) (None, 122, 1, 32) 0
# _________________________________________________________________
# conv2d_1 (Conv2D) (None, 120, 1, 32) 3104
# _________________________________________________________________
# batch_normalization_2 (Batch (None, 120, 1, 32) 128
# _________________________________________________________________
# max_pooling2d_1 (MaxPooling2 (None, 60, 1, 32) 0
# _________________________________________________________________
# flatten (Flatten) (None, 1920) 0
# _________________________________________________________________
# dense (Dense) (None, 64) 122944
# _________________________________________________________________
# dropout (Dropout) (None, 64) 0
# _________________________________________________________________
# dense_1 (Dense) (None, 11) 715
# =================================================================
# Total params: 133,451
# Trainable params: 133,195
# Non-trainable params: 256
# _________________________________________________________________
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment