Last active
June 11, 2017 10:45
-
-
Save navin-mohan/bba826c5f7062c9753a845f40296fece to your computer and use it in GitHub Desktop.
Helper Functions for ML
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Python 3 | |
import numpy as np | |
from PIL import Image | |
from io import BytesIO | |
from IPython import display | |
def split_test_train(dataset,percent): | |
'''Randomly splits the data set into test and train sets''' | |
mask = np.random.rand(dataset.shape[0]) < percent | |
train = dataset[mask] | |
test = dataset[~mask] | |
return train,test | |
def dense_to_one_hot(dense_labels,num_classes=10): | |
"""converts dense labels into one-hot sparse vectors""" | |
num_labels = dense_labels.shape[0] | |
index_offset = np.arange(num_labels) * num_classes | |
oh_labels = np.zeros((num_labels,num_classes),dtype='int') | |
oh_labels.flat[index_offset + dense_labels.reshape(-1).astype('int')] = 1 | |
return oh_labels | |
def preproc(x): | |
'''Normalize the data to 0-1''' | |
return x / x.max() | |
def generate_batch(dataset,batch_size): | |
'''Generate a random normalized batch of size batch_size''' | |
seq = dataset['features'] | |
batch_mask = np.random.choice(range(0,seq.shape[0]),batch_size,replace=False) | |
batch_x = preproc(seq[batch_mask]) | |
batch_y = dense_to_one_hot(dataset['target'][batch_mask]) | |
return batch_x,batch_y | |
def show_array(a,size,fmt='png'): | |
'''Display a numpy array''' | |
a = np.uint8(a) | |
f = BytesIO() | |
Image.fromarray(a).save(f,fmt) | |
display.display(display.Image(data=f.getvalue(),height=size[1],width=size[0])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment