Skip to content

Instantly share code, notes, and snippets.

@magnuskahr
Last active December 6, 2019 14:56
Show Gist options
  • Save magnuskahr/6b63cc0365f8f3253eb0ae3cd3e1f11d to your computer and use it in GitHub Desktop.
Save magnuskahr/6b63cc0365f8f3253eb0ae3cd3e1f11d to your computer and use it in GitHub Desktop.
Untitled8.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "Untitled8.ipynb",
"provenance": [],
"collapsed_sections": [],
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"accelerator": "GPU"
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/magnuskahr/6b63cc0365f8f3253eb0ae3cd3e1f11d/untitled8.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"metadata": {
"id": "Iqy2srmDL1Ax",
"colab_type": "code",
"colab": {}
},
"source": [
"%tensorflow_version 1.x\n",
"\n",
"!pip install SoundFile\n",
"!pip install scikit-learn\n",
"!pip install sounddevice\n",
"!pip install pydub"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "rCMPe9mLPql1",
"colab_type": "code",
"colab": {}
},
"source": [
"#from google.colab import drive\n",
"#drive.mount('/content/gdrive')"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "k1eEK7z1Ps5V",
"colab_type": "code",
"colab": {}
},
"source": [
"!git clone https://github.com/karolpiczak/ESC-50"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "HCwR7sKW2XwD",
"colab_type": "code",
"colab": {}
},
"source": [
"import json\n",
"import os\n",
"import math\n",
"\n",
"import keras\n",
"\n",
"import librosa\n",
"import numpy as np\n",
"import pandas as pd\n",
"from pydub import AudioSegment\n",
"from sklearn.preprocessing import LabelEncoder\n",
"from tqdm import tqdm\n",
"\n",
"from librosa.feature import melspectrogram \n",
"from librosa.core import perceptual_weighting, mel_frequencies\n",
"\n",
"\n",
"SAMPLING_RATE = 44100\n",
"CHUNK_SIZE = 882\n",
"FFT_SIZE = 2 * CHUNK_SIZE\n",
"SEGMENT_LENGTH = 100 # 2 s\n",
"\n",
"MEL_BANDS = 80\n",
"MEL_FREQS = mel_frequencies(n_mels=MEL_BANDS)\n",
"\n",
"AUDIO_MEAN = 20.0\n",
"AUDIO_STD = 20.0\n",
"\n",
"def to_one_hot(targets, class_count):\n",
"\n",
" one_hot_enc = np.zeros((len(targets), class_count))\n",
" for r in range(len(targets)):\n",
" one_hot_enc[r, targets[r]] = 1\n",
" return one_hot_enc\n",
"\n",
"def load_segment(filename):\n",
"\n",
" spec = np.load('/content/ESC-50/spec/' + filename + '.npy').astype('float32')\n",
" offset = np.random.randint(0, np.shape(spec)[1] - SEGMENT_LENGTH + 1) # Argument with noise\n",
" spec = spec[:, offset:offset + SEGMENT_LENGTH]\n",
"\n",
" return np.stack([spec])\n",
"\n",
"class DataSequence(keras.utils.Sequence):\n",
" def __init__(self, batch_size, dataframe):\n",
" self.batch_size = batch_size\n",
" self.data = []\n",
" for row in dataframe.iloc[np.random.permutation(len(dataframe))].itertuples():\n",
" self.data.append(row)\n",
"\n",
" def __len__(self):\n",
" return math.ceil(len(self.data) / self.batch_size)\n",
"\n",
" def __getitem__(self, idx):\n",
" batch = self.data[idx * self.batch_size : (idx + 1) * self.batch_size]\n",
" \n",
" X = []\n",
" y = []\n",
" for b in batch:\n",
" X.append(load_segment(b.filename))\n",
" y.append(le.transform([b.category])[0])\n",
"\n",
" X = np.stack(X) - AUDIO_MEAN\n",
" X /= AUDIO_STD\n",
" \n",
" y = to_one_hot(np.array(y), len(labels))\n",
"\n",
" return np.array(X), np.array(y)\n"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "oP2PrJDiZOEq",
"colab_type": "code",
"colab": {}
},
"source": [
"np.random.seed(1)\n",
"data = pd.read_csv('/content/ESC-50/meta/esc50.csv')\n",
"data = data[(data.esc10 == True)]\n",
"splittedData = np.split(data, [int(len(data) * (1 - 0.2))], axis = 0)\n",
"trainingGen, validationGen = splittedData[0], splittedData[1]"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "K-BkA350Y5Oi",
"colab_type": "code",
"colab": {}
},
"source": [
"labels = pd.unique(data.sort_values('category')['category'])\n",
"le = LabelEncoder()\n",
"le.fit(labels) "
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "3cUmsX-gNjug",
"colab_type": "code",
"colab": {}
},
"source": [
"# Generate spectrograms\n",
"import matplotlib.pyplot as plt\n",
"shown = False\n",
"\n",
"if not os.path.exists('/content/ESC-50/spec/'):\n",
" os.mkdir('/content/ESC-50/spec/')\n",
"\n",
"for row in data.itertuples():\n",
"\n",
" audio_file = '/content/ESC-50/audio/' + row.filename\n",
"\n",
" audio = AudioSegment.from_file(audio_file)\n",
" audio = audio.set_frame_rate(SAMPLING_RATE)\n",
" audio = audio.set_channels(1)\n",
" audio = np.fromstring(audio._data, dtype = np.int16) + 0.5\n",
"\n",
" if shown == False:\n",
" plt.plot(audio)\n",
" plt.show() \n",
"\n",
" audio /= 32767 # Normalize \n",
"\n",
" if shown == False:\n",
" plt.plot(audio)\n",
" plt.show() \n",
"\n",
" spec = melspectrogram(\n",
" audio, \n",
" sr = SAMPLING_RATE, \n",
" n_fft = FFT_SIZE,\n",
" hop_length = CHUNK_SIZE, \n",
" n_mels = MEL_BANDS\n",
" )\n",
"\n",
" if shown == False:\n",
" plt.plot(spec)\n",
" plt.show() \n",
" \n",
" spec = perceptual_weighting(\n",
" S = spec, \n",
" frequencies = MEL_FREQS, \n",
" amin = 1e-5, \n",
" ref = 1e-5,\n",
" top_db = None\n",
" )\n",
"\n",
" if shown == False:\n",
" plt.plot(spec)\n",
" plt.show() \n",
"\n",
" spec = np.clip(spec, 0, 100)\n",
"\n",
"\n",
" if shown == False:\n",
" plt.plot(spec)\n",
" plt.show() \n",
"\n",
" shown = True\n",
"\n",
"\n",
" np.save(\n",
" '/content/ESC-50/spec/' + row.filename + '.npy', \n",
" spec.astype(np.float16), \n",
" allow_pickle = False\n",
" )"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "PzZZYVVfY6jH",
"colab_type": "code",
"colab": {}
},
"source": [
"import keras\n",
"from keras.layers.advanced_activations import LeakyReLU\n",
"from keras.layers.core import Activation, Dense, Dropout, Flatten\n",
"from keras.layers import Input, Conv2D, MaxPooling2D, Dense, Flatten\n",
"from keras.models import Sequential\n",
"from keras.regularizers import l2\n",
"from keras.optimizers import SGD, Adam\n",
"\n",
"input_shape = 1, MEL_BANDS, SEGMENT_LENGTH\n",
" \n",
"model = Sequential()\n",
"\n",
"model.add(Conv2D(80, (3, 3), kernel_regularizer = l2(0.001), kernel_initializer='he_uniform',\n",
" input_shape = input_shape, data_format='channels_first'))\n",
"model.add(LeakyReLU())\n",
"model.add(MaxPooling2D((3, 3), (3, 3)))\n",
"\n",
"model.add(Conv2D(160, (3, 3), kernel_regularizer = l2(0.001), kernel_initializer='he_uniform'))\n",
"model.add(LeakyReLU())\n",
"model.add(MaxPooling2D((3, 3), (3, 3)))\n",
"\n",
"model.add(Conv2D(240, (3, 3), kernel_regularizer = l2(0.001), kernel_initializer='he_uniform'))\n",
"model.add(LeakyReLU())\n",
"model.add(MaxPooling2D((3, 3), (3, 3)))\n",
"\n",
"model.add(Flatten())\n",
"model.add(Dropout(0.5))\n",
"\n",
"model.add(Dense(len(labels), kernel_regularizer = l2(0.001), kernel_initializer='he_uniform'))\n",
"model.add(Activation('softmax'))\n",
"\n",
"model.save_weights('init.h5')"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "EeR-DMkzu81p",
"colab_type": "code",
"colab": {}
},
"source": [
"class MeasureCallback(keras.callbacks.Callback):\n",
" def __init__(self):\n",
" self.data = []\n",
" self.learning_rate = 0.1\n",
"\n",
" def get_learning_rate(self):\n",
" return self.learning_rate\n",
"\n",
" def save(self, name):\n",
" df = pd.DataFrame(self.data, columns=['LR', 'epoch', 'val_loss', 'val_acc'])\n",
" df.to_csv(f\"out{str(name)}.csv\")\n",
" \n",
" def get_data(self):\n",
" return self.data\n",
"\n",
" def step(self):\n",
" self.learning_rate -= self.learning_rate / 5\n",
"\n",
" def on_epoch_end(self, epoch, logs=None):\n",
" self.data.append(\n",
" [self.learning_rate, epoch, logs['val_loss'], logs['val_acc']]\n",
" )"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "HQz2bs9oxPqR",
"colab_type": "code",
"colab": {}
},
"source": [
"\n",
"measure = MeasureCallback()\n",
"\n",
"counter = 0\n",
"while measure.get_learning_rate() > 1e-6:\n",
" counter += 1\n",
" print(f\"using {measure.get_learning_rate()} in epoch {counter}\")\n",
" model.load_weights('init.h5')\n",
" optimizer = Adam(lr = measure.get_learning_rate())\n",
" model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])\n",
"\n",
" model.fit_generator(generator = DataSequence(32, trainingGen), \n",
" validation_data = DataSequence(32, validationGen),\n",
" epochs = 100,\n",
" callbacks = [measure], \n",
" verbose = 0)\n",
"\n",
" measure.step()\n",
"\n",
" if counter % 10 == 0:\n",
" measure.save(counter)\n",
"\n",
"print(measure.get_data())"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "yUYOvoc5c8yn",
"colab_type": "code",
"colab": {}
},
"source": [
"measure.save(\"final\")"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "yN_PyycLXrkP",
"colab_type": "code",
"colab": {}
},
"source": [
"optimizer = SGD(lr=0.000032451, momentum=0.9, nesterov=True)\n",
"model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])\n",
"\n",
"measure = MeasureCallback()\n",
"\n",
"model.fit_generator(generator = DataSequence(32, trainingGen), \n",
" validation_data = DataSequence(32, validationGen),\n",
" epochs = 100,\n",
" callbacks = [measure])\n",
"\n",
"\n",
"with open('model.json', 'w') as file:\n",
" file.write(model.to_json())\n",
"\n",
"with open('labels.json', 'w') as file:\n",
" json.dump(le.classes_.tolist(), file)\n",
"\n",
"model.save_weights('model.h5')"
],
"execution_count": 0,
"outputs": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment