This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.model_selection import train_test_split | |
from sklearn import datasets | |
data_x, data_y = datasets.make_classification(n_samples=10000, n_features=5, n_classes=2, random_state=123) | |
x_tr, x_ts, y_tr, y_ts = train_test_split(data_x, data_y, test_size=0.2, random_state=123) | |
nb = NaiveBayes(x_tr, y_tr) | |
nb.fit() | |
preds = nb.predict(x_ts) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def predict(self, X): | |
y_pred = [self.__predict__(x) for x in X] | |
return y_pred | |
def __predict__(self, x): # Helper Function for the function 'predict' | |
posteriors = [] | |
for idx, c in enumerate(self.classes): | |
prior = self.priors[idx] # P(yi) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def fit(self): | |
for c in self.classes: | |
x_c = self.X[c == self.y] # Get a feature vector that belongs to the class | |
self.means.append(x_c.mean(axis=0)) # axis = 0 => column-wise Mean | |
self.stds.append(x_c.std(axis=0)) # axis = 0 => column-wise STD | |
self.priors.append(len(x_c) / len(self.X)) # Calculate frequency of each class |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
class NaiveBayes: | |
def __init__(self, X, y): | |
self.priors = [] # List of priors - P(y) for each class | |
self.stds = [] # List of STDs of feature columns | |
self.means = [] # List of Mean of feature columns | |
self.classes = np.unique(y) # unique class labels |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
x1 | x2 | y | |
---|---|---|---|
0.304224 | -0.093895 | 0.000000 | |
-0.160162 | 0.148133 | 1.000000 | |
0.810025 | -0.644255 | 1.000000 | |
0.783948 | -0.689502 | 1.000000 | |
-0.332728 | 0.375302 | 0.000000 | |
1.321665 | -1.077594 | 0.000000 | |
0.321454 | -0.215809 | 0.000000 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
0 | 18832.536 | |
1000 | 166.03684065670916 | |
2000 | 161.0905355989399 | |
3000 | 159.3269270573553 | |
4000 | 158.62216516642283 | |
5000 | 158.43322392805746 | |
6000 | 158.30231625610742 | |
7000 | 158.32575013233475 | |
8000 | 158.48673031470614 | |
9000 | 158.2576187419476 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
data = np.genfromtxt('files/data.csv', dtype=float, delimiter=',') | |
np.random.shuffle(data) | |
train_y = data[:, 0] | |
train_x = data[:, 1:] | |
clf = SVM(train_x, train_y, num_of_epochs=10000, lr=1e-3, C=30) | |
clf.fit() | |
p = np.sign(clf.predict(train_x)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def fit(self): | |
for i in range(self.num_of_epochs): | |
L, dw = self.get_cost_grads(self.X, self.w, self.y) | |
self.w = self.w - self.lr * dw | |
if i % 1000 == 0: | |
print(i, ' | ', L) | |
def predict(self, X): | |
X = np.column_stack((np.ones(len(X)), X)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_cost_grads(self, X, w, y): | |
distances = self.distances(w) | |
# Get current cost | |
L = 1 / 2 * np.dot(w, w) - self.C * np.sum(distances) | |
dw = np.zeros(len(w)) | |
for ind, d in enumerate(distances): | |
if d == 0: # if sample is not on the support vector |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def distances(self, w, with_lagrange=True): | |
distances = self.y * (np.dot(self.X, w)) - 1 | |
# get distance from the current decision boundary | |
# by considering 1 width of margin | |
if with_lagrange: # if lagrange multiplier considered | |
# if distance is more than 0 | |
# sample is not on the support vector | |
# Lagrange multiplier will be 0 |
NewerOlder