Created
February 2, 2017 21:46
-
-
Save keunwoochoi/ac51b916337e8d4ad8d060bf6bd4bf7b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from keras.models import Sequential | |
from kapre.time_frequency import Melspectrogram | |
from kapre.utils import Normalization2D | |
from kapre.augmentation import AdditiveNoise | |
# 6 channels (!), maybe 1-sec audio signal | |
input_shape = (6, 44100) | |
sr = 44100 | |
model = Sequential() | |
# A mel-spectrogram layer with | |
# no decibel conversion for some reasons and (return_decibel=False) | |
# amplitude, not power (power=1.0) | |
model.add(Melspectrogram(n_dft=512, n_hop=256, input_shape=src_shape, | |
border_mode='same', sr=sr, n_mels=128, | |
fmin=0.0, fmax=sr/2, power=1.0, | |
return_decibel=False, trainable_fb=False, | |
trainable_kernel=False | |
name='trainable_stft')) | |
# Maybe some additive white noise. | |
model.add(AdditiveNoise(power=0.2)) | |
# If you wanna normalise it per-frequency | |
model.add(Normalization2D(str_axis='freq')) # or 'channel', 'time', 'batch', 'data_sample' | |
# After this, it's just a usual keras workflow. For example.. | |
# Add some layers, e.g., model.add(some convolution layers..) | |
# Compile the model | |
model.compile('adam', 'categorical_crossentropy') # if single-label classification | |
# train it with raw audio sample inputs | |
x = load_x() # e.g., x.shape = (10000, 6, 44100) | |
y = load_y() # e.g., y.shape = (10000, 10) if it's 10-class classification | |
# and train it | |
model.fit(x, y) | |
# write a paper and graduate or get paid. Profit! |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment