Skip to content

Instantly share code, notes, and snippets.

View tarlanahad's full-sized avatar

Tarlan Ahad tarlanahad

View GitHub Profile
from sklearn.model_selection import train_test_split
from sklearn import datasets
data_x, data_y = datasets.make_classification(n_samples=10000, n_features=5, n_classes=2, random_state=123)
x_tr, x_ts, y_tr, y_ts = train_test_split(data_x, data_y, test_size=0.2, random_state=123)
nb = NaiveBayes(x_tr, y_tr)
nb.fit()
preds = nb.predict(x_ts)
def predict(self, X):
y_pred = [self.__predict__(x) for x in X]
return y_pred
def __predict__(self, x): # Helper Function for the function 'predict'
posteriors = []
for idx, c in enumerate(self.classes):
prior = self.priors[idx] # P(yi)
def fit(self):
for c in self.classes:
x_c = self.X[c == self.y] # Get a feature vector that belongs to the class
self.means.append(x_c.mean(axis=0)) # axis = 0 => column-wise Mean
self.stds.append(x_c.std(axis=0)) # axis = 0 => column-wise STD
self.priors.append(len(x_c) / len(self.X)) # Calculate frequency of each class
import numpy as np
class NaiveBayes:
def __init__(self, X, y):
self.priors = [] # List of priors - P(y) for each class
self.stds = [] # List of STDs of feature columns
self.means = [] # List of Mean of feature columns
self.classes = np.unique(y) # unique class labels
x1 x2 y
0.304224 -0.093895 0.000000
-0.160162 0.148133 1.000000
0.810025 -0.644255 1.000000
0.783948 -0.689502 1.000000
-0.332728 0.375302 0.000000
1.321665 -1.077594 0.000000
0.321454 -0.215809 0.000000
0 | 18832.536
1000 | 166.03684065670916
2000 | 161.0905355989399
3000 | 159.3269270573553
4000 | 158.62216516642283
5000 | 158.43322392805746
6000 | 158.30231625610742
7000 | 158.32575013233475
8000 | 158.48673031470614
9000 | 158.2576187419476
data = np.genfromtxt('files/data.csv', dtype=float, delimiter=',')
np.random.shuffle(data)
train_y = data[:, 0]
train_x = data[:, 1:]
clf = SVM(train_x, train_y, num_of_epochs=10000, lr=1e-3, C=30)
clf.fit()
p = np.sign(clf.predict(train_x))
def fit(self):
for i in range(self.num_of_epochs):
L, dw = self.get_cost_grads(self.X, self.w, self.y)
self.w = self.w - self.lr * dw
if i % 1000 == 0:
print(i, ' | ', L)
def predict(self, X):
X = np.column_stack((np.ones(len(X)), X))
def get_cost_grads(self, X, w, y):
distances = self.distances(w)
# Get current cost
L = 1 / 2 * np.dot(w, w) - self.C * np.sum(distances)
dw = np.zeros(len(w))
for ind, d in enumerate(distances):
if d == 0: # if sample is not on the support vector
def distances(self, w, with_lagrange=True):
distances = self.y * (np.dot(self.X, w)) - 1
# get distance from the current decision boundary
# by considering 1 width of margin
if with_lagrange: # if lagrange multiplier considered
# if distance is more than 0
# sample is not on the support vector
# Lagrange multiplier will be 0