azizkayumov/grad_percentron.py

## grad_percentron.py
import numpy as np
import matplotlib.pyplot as plt
import time
import math


class GradientDescent:

    def __init__(self, x, y, a):
        self.x = x
        self.y = y
        # learning rate
        self.a = a
        # number of features
        self.n = len(x)
        # thetas
        self.w = [0,0,0]
        # number of training data
        self.m = len(x[0])

        print("Initialized with training data:")
        print("n = " + str(self.n) + ", m = " + str(self.m))
        print("w = " + str(self.w))

    def hypothesis(self, x):
        h = 0
        for i in range(self.n):
            h += self.w[i] * x[i]
        return 1 / (1 + math.exp(-h))

    def decision(self,probability):
        if probability >= 0.5:
            return 1
        return 0

    def cost(self):
        sum = 0
        for i in range(self.m):
            sum += -1 * self.y[i] * math.log(self.hypothesis(self.x[:, i])) - (1 - self.y[i]) * math.log(
                self.hypothesis(1 - self.x[:, i]))
        return sum / (2 * self.m)

    def train(self, number_of_iterations):
        # starting with o
        # repeat until convergence
        i = 0
        while i < number_of_iterations:
            temp_w = [0] * self.n
            for j in range(self.n):
                temp_w[j] = self.w[j] - self.a * self.sum(self.x[j]) / self.m

            # global optimum is found:
            if self.w == temp_w:
                print("Global optimum is found, stopping training")
                return

            for j in range(self.n):
                self.w[j] = temp_w[j]
            i += 1

    def sum(self, feature_x):
        s = 0
        for i in range(self.m):
            s += (self.hypothesis(self.x[:, i]) - self.y[i]) * feature_x[i]
        return s


# prepare training data: each item in the form of [bias, x1, x2]
# read from data.txt
x = [[],[],[]]
y = []
dataFile = open("data.txt", "r")
for line in dataFile.readlines():
    split = line.split(",")
    x[0].append(1)
    x[1].append(float(split[0]))
    x[2].append(float(split[1]))
    y.append(float(split[2]))
print(len(x))
dataFile.close()
x = np.array(x)

# Training
np_x = np.array(x)
gd = GradientDescent(np_x, y, 0.0001)

print("Training on x and y ...")
current_time = int(round(time.time() * 1000))
gd.train(1000)

current_time = int(round(time.time() * 1000)) - current_time
print("Training duration: " + str(current_time / 1000) + " seconds")
print("Trained w = " + str(gd.w) + ", cost = " + str(gd.cost()))


# plot Class 1 and Class 2 data points
plot_x1, plot_y1 = [], []
plot_x2, plot_y2 = [], []
for i in range(len(x[0])):
    if y[i] == 0:
        plot_x1.append(x[1][i])
        plot_y1.append(x[2][i])
    else:
        plot_x2.append(x[1][i])
        plot_y2.append(x[2][i])

scatter_class1 = plt.scatter(plot_x1, plot_y1, color='r')
scatter_class2 = plt.scatter(plot_x2, plot_y2, color='b')

plt.legend((scatter_class1, scatter_class2), ("Class 1", "Class 2"), scatterpoints=1, loc='lower right', ncol=3, fontsize=8)

# plot decision boundary
X = np.linspace(-5, 10, 100)
H = -(gd.w[0] + gd.w[1] * X) / gd.w[2]
plt.plot(X, H, '-g')

plt.title('Assignment #2. Task 2.b')
plt.grid()
plt.show()
	import numpy as np
	import matplotlib.pyplot as plt
	import time
	import math


	class GradientDescent:

	def __init__(self, x, y, a):
	self.x = x
	self.y = y
	# learning rate
	self.a = a
	# number of features
	self.n = len(x)
	# thetas
	self.w = [0,0,0]
	# number of training data
	self.m = len(x[0])

	print("Initialized with training data:")
	print("n = " + str(self.n) + ", m = " + str(self.m))
	print("w = " + str(self.w))

	def hypothesis(self, x):
	h = 0
	for i in range(self.n):
	h += self.w[i] * x[i]
	return 1 / (1 + math.exp(-h))

	def decision(self,probability):
	if probability >= 0.5:
	return 1
	return 0

	def cost(self):
	sum = 0
	for i in range(self.m):
	sum += -1 * self.y[i] * math.log(self.hypothesis(self.x[:, i])) - (1 - self.y[i]) * math.log(
	self.hypothesis(1 - self.x[:, i]))
	return sum / (2 * self.m)

	def train(self, number_of_iterations):
	# starting with o
	# repeat until convergence
	i = 0
	while i < number_of_iterations:
	temp_w = [0] * self.n
	for j in range(self.n):
	temp_w[j] = self.w[j] - self.a * self.sum(self.x[j]) / self.m

	# global optimum is found:
	if self.w == temp_w:
	print("Global optimum is found, stopping training")
	return

	for j in range(self.n):
	self.w[j] = temp_w[j]
	i += 1

	def sum(self, feature_x):
	s = 0
	for i in range(self.m):
	s += (self.hypothesis(self.x[:, i]) - self.y[i]) * feature_x[i]
	return s


	# prepare training data: each item in the form of [bias, x1, x2]
	# read from data.txt
	x = [[],[],[]]
	y = []
	dataFile = open("data.txt", "r")
	for line in dataFile.readlines():
	split = line.split(",")
	x[0].append(1)
	x[1].append(float(split[0]))
	x[2].append(float(split[1]))
	y.append(float(split[2]))
	print(len(x))
	dataFile.close()
	x = np.array(x)

	# Training
	np_x = np.array(x)
	gd = GradientDescent(np_x, y, 0.0001)

	print("Training on x and y ...")
	current_time = int(round(time.time() * 1000))
	gd.train(1000)

	current_time = int(round(time.time() * 1000)) - current_time
	print("Training duration: " + str(current_time / 1000) + " seconds")
	print("Trained w = " + str(gd.w) + ", cost = " + str(gd.cost()))


	# plot Class 1 and Class 2 data points
	plot_x1, plot_y1 = [], []
	plot_x2, plot_y2 = [], []
	for i in range(len(x[0])):
	if y[i] == 0:
	plot_x1.append(x[1][i])
	plot_y1.append(x[2][i])
	else:
	plot_x2.append(x[1][i])
	plot_y2.append(x[2][i])

	scatter_class1 = plt.scatter(plot_x1, plot_y1, color='r')
	scatter_class2 = plt.scatter(plot_x2, plot_y2, color='b')

	plt.legend((scatter_class1, scatter_class2), ("Class 1", "Class 2"), scatterpoints=1, loc='lower right', ncol=3, fontsize=8)

	# plot decision boundary
	X = np.linspace(-5, 10, 100)
	H = -(gd.w[0] + gd.w[1] * X) / gd.w[2]
	plt.plot(X, H, '-g')

	plt.title('Assignment #2. Task 2.b')
	plt.grid()
	plt.show()