Tarlan Ahad tarlanahad

## naive_bayes_process.py
from sklearn.model_selection import train_test_split
from sklearn import datasets

data_x, data_y = datasets.make_classification(n_samples=10000, n_features=5, n_classes=2, random_state=123)
x_tr, x_ts, y_tr, y_ts = train_test_split(data_x, data_y, test_size=0.2, random_state=123)

nb = NaiveBayes(x_tr, y_tr)
nb.fit()
preds = nb.predict(x_ts)

## naive_bayes_predict.py
def predict(self, X):
    y_pred = [self.__predict__(x) for x in X]
    return y_pred


def __predict__(self, x):  # Helper Function for the function 'predict'
    posteriors = []
    for idx, c in enumerate(self.classes):
        prior = self.priors[idx]  # P(yi)

## naive_bayes_fit.py
def fit(self):
    for c in self.classes:
        x_c = self.X[c == self.y]  # Get a feature vector that belongs to the class
        self.means.append(x_c.mean(axis=0))  # axis = 0 => column-wise Mean
        self.stds.append(x_c.std(axis=0))  # axis = 0 => column-wise STD
        self.priors.append(len(x_c) / len(self.X))  # Calculate frequency of each class

## naive_bayes_construct.py
import numpy as np


class NaiveBayes:

    def __init__(self, X, y):
        self.priors = []  # List of priors - P(y) for each class
        self.stds = []  # List of STDs of feature columns
        self.means = []  # List of Mean of feature columns
        self.classes = np.unique(y)  # unique class labels

## example_data.csv

          
            x1
            x2
            y

            
              0.304224
              -0.093895
              0.000000

            
              -0.160162
              0.148133
              1.000000

            
              0.810025
              -0.644255
              1.000000

            
              0.783948
              -0.689502
              1.000000

            
              -0.332728
              0.375302
              0.000000

            
              1.321665
              -1.077594
              0.000000

            
              0.321454
              -0.215809
              0.000000

## output_svm.txt
0  |  18832.536
1000  |  166.03684065670916
2000  |  161.0905355989399
3000  |  159.3269270573553
4000  |  158.62216516642283
5000  |  158.43322392805746
6000  |  158.30231625610742
7000  |  158.32575013233475
8000  |  158.48673031470614
9000  |  158.2576187419476

## start_svm.py
data = np.genfromtxt('files/data.csv', dtype=float, delimiter=',')
np.random.shuffle(data)

train_y = data[:, 0]
train_x = data[:, 1:]

clf = SVM(train_x, train_y, num_of_epochs=10000, lr=1e-3, C=30)
clf.fit()

p = np.sign(clf.predict(train_x))

## fit_predict_svm.py
def fit(self):
    for i in range(self.num_of_epochs):
        L, dw = self.get_cost_grads(self.X, self.w, self.y)
        self.w = self.w - self.lr * dw
        if i % 1000 == 0:
            print(i, ' | ', L)


def predict(self, X):
    X = np.column_stack((np.ones(len(X)), X))

## get_cost_grads_svm.py
def get_cost_grads(self, X, w, y):
    distances = self.distances(w)

    # Get current cost
    L = 1 / 2 * np.dot(w, w) - self.C * np.sum(distances)

    dw = np.zeros(len(w))

    for ind, d in enumerate(distances):
        if d == 0:  # if sample is not on the support vector

## distances_svm.py
def distances(self, w, with_lagrange=True):
    distances = self.y * (np.dot(self.X, w)) - 1

    # get distance from the current decision boundary
    # by considering 1 width of margin

    if with_lagrange:  # if lagrange multiplier considered
        # if distance is more than 0
        # sample is not on the support vector
        # Lagrange multiplier will be 0
	from sklearn.model_selection import train_test_split
	from sklearn import datasets

	data_x, data_y = datasets.make_classification(n_samples=10000, n_features=5, n_classes=2, random_state=123)
	x_tr, x_ts, y_tr, y_ts = train_test_split(data_x, data_y, test_size=0.2, random_state=123)

	nb = NaiveBayes(x_tr, y_tr)
	nb.fit()
	preds = nb.predict(x_ts)
	def predict(self, X):
	y_pred = [self.__predict__(x) for x in X]
	return y_pred


	def __predict__(self, x): # Helper Function for the function 'predict'
	posteriors = []
	for idx, c in enumerate(self.classes):
	prior = self.priors[idx] # P(yi)
	def fit(self):
	for c in self.classes:
	x_c = self.X[c == self.y] # Get a feature vector that belongs to the class
	self.means.append(x_c.mean(axis=0)) # axis = 0 => column-wise Mean
	self.stds.append(x_c.std(axis=0)) # axis = 0 => column-wise STD
	self.priors.append(len(x_c) / len(self.X)) # Calculate frequency of each class
	import numpy as np


	class NaiveBayes:

	def __init__(self, X, y):
	self.priors = [] # List of priors - P(y) for each class
	self.stds = [] # List of STDs of feature columns
	self.means = [] # List of Mean of feature columns
	self.classes = np.unique(y) # unique class labels
x1	x2	y
0.304224	-0.093895	0.000000
-0.160162	0.148133	1.000000
0.810025	-0.644255	1.000000
0.783948	-0.689502	1.000000
-0.332728	0.375302	0.000000
1.321665	-1.077594	0.000000
0.321454	-0.215809	0.000000
	0 \| 18832.536
	1000 \| 166.03684065670916
	2000 \| 161.0905355989399
	3000 \| 159.3269270573553
	4000 \| 158.62216516642283
	5000 \| 158.43322392805746
	6000 \| 158.30231625610742
	7000 \| 158.32575013233475
	8000 \| 158.48673031470614
	9000 \| 158.2576187419476
	data = np.genfromtxt('files/data.csv', dtype=float, delimiter=',')
	np.random.shuffle(data)

	train_y = data[:, 0]
	train_x = data[:, 1:]

	clf = SVM(train_x, train_y, num_of_epochs=10000, lr=1e-3, C=30)
	clf.fit()

	p = np.sign(clf.predict(train_x))
	def fit(self):
	for i in range(self.num_of_epochs):
	L, dw = self.get_cost_grads(self.X, self.w, self.y)
	self.w = self.w - self.lr * dw
	if i % 1000 == 0:
	print(i, ' \| ', L)


	def predict(self, X):
	X = np.column_stack((np.ones(len(X)), X))
	def get_cost_grads(self, X, w, y):
	distances = self.distances(w)

	# Get current cost
	L = 1 / 2 * np.dot(w, w) - self.C * np.sum(distances)

	dw = np.zeros(len(w))

	for ind, d in enumerate(distances):
	if d == 0: # if sample is not on the support vector
	def distances(self, w, with_lagrange=True):
	distances = self.y * (np.dot(self.X, w)) - 1

	# get distance from the current decision boundary
	# by considering 1 width of margin

	if with_lagrange: # if lagrange multiplier considered
	# if distance is more than 0
	# sample is not on the support vector
	# Lagrange multiplier will be 0