Last active
February 9, 2020 19:37
-
-
Save cosmincatalin/ea5486a0c9363d2ae44d6f0e2270c3d9 to your computer and use it in GitHub Desktop.
Voice Recognition Sagemaker Script (Part1)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import base64 | |
import glob | |
import json | |
import logging | |
import subprocess | |
import sys | |
import tarfile | |
import traceback | |
import uuid | |
import wave | |
from os import unlink, environ, makedirs | |
from os.path import basename | |
from pickle import load | |
from random import randint | |
from shutil import copy2, rmtree | |
from urllib.request import urlretrieve | |
import mxnet as mx | |
import numpy as np | |
from mxnet import autograd, nd, gluon | |
from mxnet.gluon import Trainer | |
from mxnet.gluon.loss import SoftmaxCrossEntropyLoss | |
from mxnet.gluon.nn import Conv2D, MaxPool2D, Dropout, Flatten, Dense, Sequential | |
from mxnet.initializer import Xavier | |
def install(package): | |
subprocess.call([sys.executable, "-m", "pip", "install", package]) | |
install("opencv-python") | |
install("pydub") | |
install("matplotlib") | |
import cv2 | |
import matplotlib | |
matplotlib.use("agg") | |
import matplotlib.pyplot as plt | |
environ["PATH"] += ":/tmp" | |
rmtree("ffmpeg-tmp", True) | |
makedirs("ffmpeg-tmp") | |
urlretrieve("https://johnvansickle.com/ffmpeg/builds/ffmpeg-git-amd64-static.tar.xz", | |
"ffmpeg-tmp/ffmpeg-git-64bit-static.tar.xz") | |
tar = tarfile.open("ffmpeg-tmp/ffmpeg-git-64bit-static.tar.xz") | |
tar.extractall("ffmpeg-tmp") | |
tar.close() | |
for file in [src for src in glob.glob("ffmpeg-tmp/*/**") if basename(src) in ["ffmpeg", "ffprobe"]]: | |
copy2(file, ".") | |
rmtree("ffmpeg-tmp", True) | |
from pydub import AudioSegment | |
logging.basicConfig(level=logging.INFO) | |
voices = ["Ivy", "Joanna", "Joey", "Justin", "Kendra", "Kimberly", "Matthew", "Salli"] | |
def train(hyperparameters, channel_input_dirs, num_gpus, hosts): | |
batch_size = hyperparameters.get("batch_size", 64) | |
epochs = hyperparameters.get("epochs", 3) | |
mx.random.seed(42) | |
training_dir = channel_input_dirs['training'] | |
with open("{}/train/data.p".format(training_dir), "rb") as pickle: | |
train_nd = load(pickle) | |
with open("{}/validation/data.p".format(training_dir), "rb") as pickle: | |
validation_nd = load(pickle) | |
train_data = gluon.data.DataLoader(train_nd, batch_size, shuffle=True) | |
validation_data = gluon.data.DataLoader(validation_nd, batch_size, shuffle=True) | |
net = Sequential() | |
with net.name_scope(): | |
net.add(Conv2D(channels=32, kernel_size=(3, 3), padding=0, activation="relu")) | |
net.add(Conv2D(channels=32, kernel_size=(3, 3), padding=0, activation="relu")) | |
net.add(MaxPool2D(pool_size=(2, 2))) | |
net.add(Dropout(.25)) | |
net.add(Flatten()) | |
net.add(Dense(8)) | |
ctx = mx.gpu() if num_gpus > 0 else mx.cpu() | |
net.collect_params().initialize(Xavier(magnitude=2.24), ctx=ctx) | |
loss = SoftmaxCrossEntropyLoss() | |
if len(hosts) == 1: | |
kvstore = "device" if num_gpus > 0 else "local" | |
else: | |
kvstore = "dist_device_sync'" if num_gpus > 0 else "dist_sync" | |
trainer = Trainer(net.collect_params(), optimizer="adam", kvstore=kvstore) | |
smoothing_constant = .01 | |
for e in range(epochs): | |
moving_loss = 0 | |
for i, (data, label) in enumerate(train_data): | |
data = data.as_in_context(ctx) | |
label = label.as_in_context(ctx) | |
with autograd.record(): | |
output = net(data) | |
loss_result = loss(output, label) | |
loss_result.backward() | |
trainer.step(batch_size) | |
curr_loss = nd.mean(loss_result).asscalar() | |
moving_loss = (curr_loss if ((i == 0) and (e == 0)) | |
else (1 - smoothing_constant) * moving_loss + smoothing_constant * curr_loss) | |
validation_accuracy = measure_performance(net, ctx, validation_data) | |
train_accuracy = measure_performance(net, ctx, train_data) | |
print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" % (e, moving_loss, train_accuracy, validation_accuracy)) | |
return net | |
def measure_performance(model, ctx, data_iter): | |
acc = mx.metric.Accuracy() | |
for _, (data, labels) in enumerate(data_iter): | |
data = data.as_in_context(ctx) | |
labels = labels.as_in_context(ctx) | |
output = model(data) | |
predictions = nd.argmax(output, axis=1) | |
acc.update(preds=predictions, labels=labels) | |
return acc.get()[1] | |
def save(net, model_dir): | |
y = net(mx.sym.var("data")) | |
y.save("{}/model.json".format(model_dir)) | |
net.collect_params().save("{}/model.params".format(model_dir)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment