wangyu-/nn.py

## nn.py
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Oct  5 07:13:29 2020

@author: wangyu
"""

import random
import gzip
import numpy as np
import matplotlib.pyplot as plt

def parse_image(name):
    dim=28*28
    file=gzip.open(name, 'rb')
    data = file.read()[16:]
    assert len(data) % dim == 0, "file len is not a multiple of dimension %s"%(name)
    result = np.frombuffer(data, dtype=np.uint8).reshape(len(data)//(dim), dim)
    return np.float32(result)/255.0

def parse_label(name):
    file=gzip.open(name, 'rb')
    data = file.read()[8:]
    result = np.frombuffer(data, dtype=np.uint8)
    return result

def turn_into_vector(value):
    assert value>=0 and value<10, "error range %d"%(value,)
    vec = np.zeros((10, 1))
    vec[value] = 1.0
    return vec

def conv(images,labels):
    first = [np.reshape(x, (784, 1)) for x in images]
    second= [turn_into_vector(y) for y in labels]
    return list(zip(first,second))

train_images = parse_image('./train-images-idx3-ubyte.gz')
train_labels = parse_label('./train-labels-idx1-ubyte.gz')

test_images = parse_image('./t10k-images-idx3-ubyte.gz')
test_labels = parse_label('./t10k-labels-idx1-ubyte.gz')

train_set=conv(train_images,train_labels)
test_set=conv(test_images,test_labels)

def sigmoid(z):
    return 1.0/(1.0+np.exp(-z))

def d_sigmoid(z):
    return sigmoid(z)*(1-sigmoid(z))

def copy_as_zero(arr):
    return [np.zeros(x.shape) for x in arr]

class NN(object):

    def __init__(self, shape):
        self.shape=shape
        self.depth=len(shape)-1
        self.in_dim=shape[0]; self.out_dim=shape[-1]
        self.biases=[]
        self.weights=[]
        self.train_history=[]
        for i in range(0,self.depth):
            self.biases.append(np.random.randn(shape[i+1], 1))
            self.weights.append(np.random.randn(shape[i+1], shape[i]))

    def train(self, train_set,verify_set, iterations,report_period, batch_size, alpha):
        self.train_history=[]
        acc=self.accuracy(verify_set)
        self.train_history.append((0,1-acc))
        print("iteration %d, accuracy: %.2f%%"%(0,100*acc))

        try:
            for i in range(iterations):
                self.stochastic_gradient_descent(train_set,batch_size, alpha)
                if((i+1)%report_period==0 or i+1==iterations):
                    acc=self.accuracy(verify_set)
                    self.train_history.append((i+1,1-acc))
                    print("iteration %d, accuracy: %.2f%%"%(i+1,100*acc));
        except KeyboardInterrupt:
            print("")

    def plot(self):
        x_axis=[]
        y_axis=[]
        for (x,y) in self.train_history:
            x_axis.append(x)
            y_axis.append(y)
        plt.scatter(x_axis,y_axis,s=10,marker="o",color="blue")
        plt.xlabel("Number of Iterations of Gradient Decent")
        plt.ylabel("Error_rate")
        plt.title("Iterations vs Error_rate")
        plt.show()

    def inference(self, a):
        for i in range(0,self.depth):
            a = sigmoid(np.dot(self.weights[i], a)+self.biases[i])
        return a

    def stochastic_gradient_descent(self, train_set,batch_size, alpha):
        random_batch=random.sample(train_set,batch_size)
        grad_b = copy_as_zero(self.biases)
        grad_w = copy_as_zero(self.weights)
        for (x,y) in random_batch:
            grad_b_per_sample, grad_w_per_sample = self.back_propagation(x, y)
            for i in range(0,self.depth):
                grad_b[i] +=grad_b_per_sample[i]
                grad_w[i] +=grad_w_per_sample[i]

        for i in range(0,self.depth):
                self.biases[i] -= alpha*grad_b[i]/batch_size
                self.weights[i] -= alpha*grad_w[i]/batch_size

    def back_propagation(self, x, y):
        grad_b = copy_as_zero(self.biases)
        grad_w = copy_as_zero(self.weights)
        a_record = []; z_record = []
        a = x; a_record.append(a)
        for i in range(0, self.depth):
            z = np.dot(self.weights[i], a)+self.biases[i]
            a = sigmoid(z)
            a_record.append(a)
            z_record.append(z)

        current_layer=self.depth-1
        error = (a_record[current_layer+1]-y) * d_sigmoid(z_record[current_layer])

        grad_b[current_layer] = error
        grad_w[current_layer] = np.dot(error, a_record[current_layer].transpose())

        while current_layer>=1:
            current_layer-=1
            error = np.dot(self.weights[current_layer+1].transpose(), error) * \
                d_sigmoid(z_record[current_layer])
            grad_b[current_layer] = error
            grad_w[current_layer] = np.dot(error, a_record[current_layer].transpose())
        return (grad_b, grad_w)

    def accuracy(self, test_set):
        correct=0
        for (x,y) in test_set:
            predicted=np.argmax(self.inference(x))
            actual=np.argmax(y)
            if predicted==actual:
                correct+=1
        return correct/len(test_set)

    def macroF1(self, test_set):
        TP=[0]*self.out_dim;TN=[0]*self.out_dim
        FP=[0]*self.out_dim;FN=[0]*self.out_dim
        F1=[0]*self.out_dim
        for (x,y) in test_set:
            predicted=np.argmax(self.inference(x))
            actual=np.argmax(y)
            for i in range(0,self.out_dim):
                actual_binary =1 if actual==i else 0
                predicted_binary=1 if predicted==i else 0
                if actual_binary==1:
                    if predicted_binary==1:
                        TP[i]+=1
                    else:
                        FN[i]+=1
                else:
                    if predicted_binary==0:
                        TN[i]+=1
                    else:
                        FP[i]+=1
        zero=1e-30
        for i in range(0,self.out_dim):
                precision=TP[i]/(zero+TP[i]+FP[i])+zero
                recall=TP[i]/(zero+TP[i]+FN[i])+zero
                F1[i]=2/(1/precision+1/recall)
        print("TP:",TP)
        print("TN:",TN)
        print("FP:",FP)
        print("FN:",FN)
        print("F1 for each class:",F1)
        macroF1=sum(F1)/self.out_dim
        return macroF1

np.random.seed(123456)
random.seed(123456)
nn=NN([784,50,10])
nn.train(train_set,train_set[:2000], 40000,200, 20, 1.5)
print("accuracy=",nn.accuracy(test_set))
print("macroF1=",nn.macroF1(test_set))
nn.plot()
	#!/usr/bin/env python3
	# -- coding: utf-8 --
	"""
	Created on Mon Oct 5 07:13:29 2020

	@author: wangyu
	"""

	import random
	import gzip
	import numpy as np
	import matplotlib.pyplot as plt

	def parse_image(name):
	dim=28*28
	file=gzip.open(name, 'rb')
	data = file.read()[16:]
	assert len(data) % dim == 0, "file len is not a multiple of dimension %s"%(name)
	result = np.frombuffer(data, dtype=np.uint8).reshape(len(data)//(dim), dim)
	return np.float32(result)/255.0

	def parse_label(name):
	file=gzip.open(name, 'rb')
	data = file.read()[8:]
	result = np.frombuffer(data, dtype=np.uint8)
	return result

	def turn_into_vector(value):
	assert value>=0 and value<10, "error range %d"%(value,)
	vec = np.zeros((10, 1))
	vec[value] = 1.0
	return vec

	def conv(images,labels):
	first = [np.reshape(x, (784, 1)) for x in images]
	second= [turn_into_vector(y) for y in labels]
	return list(zip(first,second))

	train_images = parse_image('./train-images-idx3-ubyte.gz')
	train_labels = parse_label('./train-labels-idx1-ubyte.gz')

	test_images = parse_image('./t10k-images-idx3-ubyte.gz')
	test_labels = parse_label('./t10k-labels-idx1-ubyte.gz')

	train_set=conv(train_images,train_labels)
	test_set=conv(test_images,test_labels)

	def sigmoid(z):
	return 1.0/(1.0+np.exp(-z))

	def d_sigmoid(z):
	return sigmoid(z)*(1-sigmoid(z))

	def copy_as_zero(arr):
	return [np.zeros(x.shape) for x in arr]

	class NN(object):

	def __init__(self, shape):
	self.shape=shape
	self.depth=len(shape)-1
	self.in_dim=shape[0]; self.out_dim=shape[-1]
	self.biases=[]
	self.weights=[]
	self.train_history=[]
	for i in range(0,self.depth):
	self.biases.append(np.random.randn(shape[i+1], 1))
	self.weights.append(np.random.randn(shape[i+1], shape[i]))

	def train(self, train_set,verify_set, iterations,report_period, batch_size, alpha):
	self.train_history=[]
	acc=self.accuracy(verify_set)
	self.train_history.append((0,1-acc))
	print("iteration %d, accuracy: %.2f%%"%(0,100*acc))

	try:
	for i in range(iterations):
	self.stochastic_gradient_descent(train_set,batch_size, alpha)
	if((i+1)%report_period==0 or i+1==iterations):
	acc=self.accuracy(verify_set)
	self.train_history.append((i+1,1-acc))
	print("iteration %d, accuracy: %.2f%%"%(i+1,100*acc));
	except KeyboardInterrupt:
	print("")

	def plot(self):
	x_axis=[]
	y_axis=[]
	for (x,y) in self.train_history:
	x_axis.append(x)
	y_axis.append(y)
	plt.scatter(x_axis,y_axis,s=10,marker="o",color="blue")
	plt.xlabel("Number of Iterations of Gradient Decent")
	plt.ylabel("Error_rate")
	plt.title("Iterations vs Error_rate")
	plt.show()

	def inference(self, a):
	for i in range(0,self.depth):
	a = sigmoid(np.dot(self.weights[i], a)+self.biases[i])
	return a

	def stochastic_gradient_descent(self, train_set,batch_size, alpha):
	random_batch=random.sample(train_set,batch_size)
	grad_b = copy_as_zero(self.biases)
	grad_w = copy_as_zero(self.weights)
	for (x,y) in random_batch:
	grad_b_per_sample, grad_w_per_sample = self.back_propagation(x, y)
	for i in range(0,self.depth):
	grad_b[i] +=grad_b_per_sample[i]
	grad_w[i] +=grad_w_per_sample[i]

	for i in range(0,self.depth):
	self.biases[i] -= alpha*grad_b[i]/batch_size
	self.weights[i] -= alpha*grad_w[i]/batch_size

	def back_propagation(self, x, y):
	grad_b = copy_as_zero(self.biases)
	grad_w = copy_as_zero(self.weights)
	a_record = []; z_record = []
	a = x; a_record.append(a)
	for i in range(0, self.depth):
	z = np.dot(self.weights[i], a)+self.biases[i]
	a = sigmoid(z)
	a_record.append(a)
	z_record.append(z)

	current_layer=self.depth-1
	error = (a_record[current_layer+1]-y) * d_sigmoid(z_record[current_layer])

	grad_b[current_layer] = error
	grad_w[current_layer] = np.dot(error, a_record[current_layer].transpose())

	while current_layer>=1:
	current_layer-=1
	error = np.dot(self.weights[current_layer+1].transpose(), error) * \
	d_sigmoid(z_record[current_layer])
	grad_b[current_layer] = error
	grad_w[current_layer] = np.dot(error, a_record[current_layer].transpose())
	return (grad_b, grad_w)

	def accuracy(self, test_set):
	correct=0
	for (x,y) in test_set:
	predicted=np.argmax(self.inference(x))
	actual=np.argmax(y)
	if predicted==actual:
	correct+=1
	return correct/len(test_set)

	def macroF1(self, test_set):
	TP=[0]self.out_dim;TN=[0]self.out_dim
	FP=[0]self.out_dim;FN=[0]self.out_dim
	F1=[0]*self.out_dim
	for (x,y) in test_set:
	predicted=np.argmax(self.inference(x))
	actual=np.argmax(y)
	for i in range(0,self.out_dim):
	actual_binary =1 if actual==i else 0
	predicted_binary=1 if predicted==i else 0
	if actual_binary==1:
	if predicted_binary==1:
	TP[i]+=1
	else:
	FN[i]+=1
	else:
	if predicted_binary==0:
	TN[i]+=1
	else:
	FP[i]+=1
	zero=1e-30
	for i in range(0,self.out_dim):
	precision=TP[i]/(zero+TP[i]+FP[i])+zero
	recall=TP[i]/(zero+TP[i]+FN[i])+zero
	F1[i]=2/(1/precision+1/recall)
	print("TP:",TP)
	print("TN:",TN)
	print("FP:",FP)
	print("FN:",FN)
	print("F1 for each class:",F1)
	macroF1=sum(F1)/self.out_dim
	return macroF1

	np.random.seed(123456)
	random.seed(123456)
	nn=NN([784,50,10])
	nn.train(train_set,train_set[:2000], 40000,200, 20, 1.5)
	print("accuracy=",nn.accuracy(test_set))
	print("macroF1=",nn.macroF1(test_set))
	nn.plot()