Skip to content

Instantly share code, notes, and snippets.

@wangyu-
Created December 7, 2020 02:15
Show Gist options
  • Save wangyu-/659731947271d25b2db4383be777cd64 to your computer and use it in GitHub Desktop.
Save wangyu-/659731947271d25b2db4383be777cd64 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Oct 5 07:13:29 2020
@author: wangyu
"""
import random
import gzip
import numpy as np
import matplotlib.pyplot as plt
def parse_image(name):
dim=28*28
file=gzip.open(name, 'rb')
data = file.read()[16:]
assert len(data) % dim == 0, "file len is not a multiple of dimension %s"%(name)
result = np.frombuffer(data, dtype=np.uint8).reshape(len(data)//(dim), dim)
return np.float32(result)/255.0
def parse_label(name):
file=gzip.open(name, 'rb')
data = file.read()[8:]
result = np.frombuffer(data, dtype=np.uint8)
return result
def turn_into_vector(value):
assert value>=0 and value<10, "error range %d"%(value,)
vec = np.zeros((10, 1))
vec[value] = 1.0
return vec
def conv(images,labels):
first = [np.reshape(x, (784, 1)) for x in images]
second= [turn_into_vector(y) for y in labels]
return list(zip(first,second))
train_images = parse_image('./train-images-idx3-ubyte.gz')
train_labels = parse_label('./train-labels-idx1-ubyte.gz')
test_images = parse_image('./t10k-images-idx3-ubyte.gz')
test_labels = parse_label('./t10k-labels-idx1-ubyte.gz')
train_set=conv(train_images,train_labels)
test_set=conv(test_images,test_labels)
def sigmoid(z):
return 1.0/(1.0+np.exp(-z))
def d_sigmoid(z):
return sigmoid(z)*(1-sigmoid(z))
def copy_as_zero(arr):
return [np.zeros(x.shape) for x in arr]
class NN(object):
def __init__(self, shape):
self.shape=shape
self.depth=len(shape)-1
self.in_dim=shape[0]; self.out_dim=shape[-1]
self.biases=[]
self.weights=[]
self.train_history=[]
for i in range(0,self.depth):
self.biases.append(np.random.randn(shape[i+1], 1))
self.weights.append(np.random.randn(shape[i+1], shape[i]))
def train(self, train_set,verify_set, iterations,report_period, batch_size, alpha):
self.train_history=[]
acc=self.accuracy(verify_set)
self.train_history.append((0,1-acc))
print("iteration %d, accuracy: %.2f%%"%(0,100*acc))
try:
for i in range(iterations):
self.stochastic_gradient_descent(train_set,batch_size, alpha)
if((i+1)%report_period==0 or i+1==iterations):
acc=self.accuracy(verify_set)
self.train_history.append((i+1,1-acc))
print("iteration %d, accuracy: %.2f%%"%(i+1,100*acc));
except KeyboardInterrupt:
print("")
def plot(self):
x_axis=[]
y_axis=[]
for (x,y) in self.train_history:
x_axis.append(x)
y_axis.append(y)
plt.scatter(x_axis,y_axis,s=10,marker="o",color="blue")
plt.xlabel("Number of Iterations of Gradient Decent")
plt.ylabel("Error_rate")
plt.title("Iterations vs Error_rate")
plt.show()
def inference(self, a):
for i in range(0,self.depth):
a = sigmoid(np.dot(self.weights[i], a)+self.biases[i])
return a
def stochastic_gradient_descent(self, train_set,batch_size, alpha):
random_batch=random.sample(train_set,batch_size)
grad_b = copy_as_zero(self.biases)
grad_w = copy_as_zero(self.weights)
for (x,y) in random_batch:
grad_b_per_sample, grad_w_per_sample = self.back_propagation(x, y)
for i in range(0,self.depth):
grad_b[i] +=grad_b_per_sample[i]
grad_w[i] +=grad_w_per_sample[i]
for i in range(0,self.depth):
self.biases[i] -= alpha*grad_b[i]/batch_size
self.weights[i] -= alpha*grad_w[i]/batch_size
def back_propagation(self, x, y):
grad_b = copy_as_zero(self.biases)
grad_w = copy_as_zero(self.weights)
a_record = []; z_record = []
a = x; a_record.append(a)
for i in range(0, self.depth):
z = np.dot(self.weights[i], a)+self.biases[i]
a = sigmoid(z)
a_record.append(a)
z_record.append(z)
current_layer=self.depth-1
error = (a_record[current_layer+1]-y) * d_sigmoid(z_record[current_layer])
grad_b[current_layer] = error
grad_w[current_layer] = np.dot(error, a_record[current_layer].transpose())
while current_layer>=1:
current_layer-=1
error = np.dot(self.weights[current_layer+1].transpose(), error) * \
d_sigmoid(z_record[current_layer])
grad_b[current_layer] = error
grad_w[current_layer] = np.dot(error, a_record[current_layer].transpose())
return (grad_b, grad_w)
def accuracy(self, test_set):
correct=0
for (x,y) in test_set:
predicted=np.argmax(self.inference(x))
actual=np.argmax(y)
if predicted==actual:
correct+=1
return correct/len(test_set)
def macroF1(self, test_set):
TP=[0]*self.out_dim;TN=[0]*self.out_dim
FP=[0]*self.out_dim;FN=[0]*self.out_dim
F1=[0]*self.out_dim
for (x,y) in test_set:
predicted=np.argmax(self.inference(x))
actual=np.argmax(y)
for i in range(0,self.out_dim):
actual_binary =1 if actual==i else 0
predicted_binary=1 if predicted==i else 0
if actual_binary==1:
if predicted_binary==1:
TP[i]+=1
else:
FN[i]+=1
else:
if predicted_binary==0:
TN[i]+=1
else:
FP[i]+=1
zero=1e-30
for i in range(0,self.out_dim):
precision=TP[i]/(zero+TP[i]+FP[i])+zero
recall=TP[i]/(zero+TP[i]+FN[i])+zero
F1[i]=2/(1/precision+1/recall)
print("TP:",TP)
print("TN:",TN)
print("FP:",FP)
print("FN:",FN)
print("F1 for each class:",F1)
macroF1=sum(F1)/self.out_dim
return macroF1
np.random.seed(123456)
random.seed(123456)
nn=NN([784,50,10])
nn.train(train_set,train_set[:2000], 40000,200, 20, 1.5)
print("accuracy=",nn.accuracy(test_set))
print("macroF1=",nn.macroF1(test_set))
nn.plot()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment