Skip to content

Instantly share code, notes, and snippets.

Last active February 9, 2020 19:37
Show Gist options
  • Save cosmincatalin/ea5486a0c9363d2ae44d6f0e2270c3d9 to your computer and use it in GitHub Desktop.
Save cosmincatalin/ea5486a0c9363d2ae44d6f0e2270c3d9 to your computer and use it in GitHub Desktop.
Voice Recognition Sagemaker Script (Part1)
import base64
import glob
import json
import logging
import subprocess
import sys
import tarfile
import traceback
import uuid
import wave
from os import unlink, environ, makedirs
from os.path import basename
from pickle import load
from random import randint
from shutil import copy2, rmtree
from urllib.request import urlretrieve
import mxnet as mx
import numpy as np
from mxnet import autograd, nd, gluon
from mxnet.gluon import Trainer
from mxnet.gluon.loss import SoftmaxCrossEntropyLoss
from mxnet.gluon.nn import Conv2D, MaxPool2D, Dropout, Flatten, Dense, Sequential
from mxnet.initializer import Xavier
def install(package):[sys.executable, "-m", "pip", "install", package])
import cv2
import matplotlib
import matplotlib.pyplot as plt
environ["PATH"] += ":/tmp"
rmtree("ffmpeg-tmp", True)
tar ="ffmpeg-tmp/ffmpeg-git-64bit-static.tar.xz")
for file in [src for src in glob.glob("ffmpeg-tmp/*/**") if basename(src) in ["ffmpeg", "ffprobe"]]:
copy2(file, ".")
rmtree("ffmpeg-tmp", True)
from pydub import AudioSegment
voices = ["Ivy", "Joanna", "Joey", "Justin", "Kendra", "Kimberly", "Matthew", "Salli"]
def train(hyperparameters, channel_input_dirs, num_gpus, hosts):
batch_size = hyperparameters.get("batch_size", 64)
epochs = hyperparameters.get("epochs", 3)
training_dir = channel_input_dirs['training']
with open("{}/train/data.p".format(training_dir), "rb") as pickle:
train_nd = load(pickle)
with open("{}/validation/data.p".format(training_dir), "rb") as pickle:
validation_nd = load(pickle)
train_data =, batch_size, shuffle=True)
validation_data =, batch_size, shuffle=True)
net = Sequential()
with net.name_scope():
net.add(Conv2D(channels=32, kernel_size=(3, 3), padding=0, activation="relu"))
net.add(Conv2D(channels=32, kernel_size=(3, 3), padding=0, activation="relu"))
net.add(MaxPool2D(pool_size=(2, 2)))
ctx = mx.gpu() if num_gpus > 0 else mx.cpu()
net.collect_params().initialize(Xavier(magnitude=2.24), ctx=ctx)
loss = SoftmaxCrossEntropyLoss()
if len(hosts) == 1:
kvstore = "device" if num_gpus > 0 else "local"
kvstore = "dist_device_sync'" if num_gpus > 0 else "dist_sync"
trainer = Trainer(net.collect_params(), optimizer="adam", kvstore=kvstore)
smoothing_constant = .01
for e in range(epochs):
moving_loss = 0
for i, (data, label) in enumerate(train_data):
data = data.as_in_context(ctx)
label = label.as_in_context(ctx)
with autograd.record():
output = net(data)
loss_result = loss(output, label)
curr_loss = nd.mean(loss_result).asscalar()
moving_loss = (curr_loss if ((i == 0) and (e == 0))
else (1 - smoothing_constant) * moving_loss + smoothing_constant * curr_loss)
validation_accuracy = measure_performance(net, ctx, validation_data)
train_accuracy = measure_performance(net, ctx, train_data)
print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" % (e, moving_loss, train_accuracy, validation_accuracy))
return net
def measure_performance(model, ctx, data_iter):
acc = mx.metric.Accuracy()
for _, (data, labels) in enumerate(data_iter):
data = data.as_in_context(ctx)
labels = labels.as_in_context(ctx)
output = model(data)
predictions = nd.argmax(output, axis=1)
acc.update(preds=predictions, labels=labels)
return acc.get()[1]
def save(net, model_dir):
y = net(mx.sym.var("data"))"{}/model.json".format(model_dir))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment