Axhat/Configs.txt

## Configs.txt
1. Find S
import pandas as pd
import numpy as np


def train(X, y):
    # Create the most specific hypothesis
    hypothesis = []
    for i in range(len(X[0])):
        hypothesis.append("null")

    # Iterate over the features
    for i in range(len(X)):
        # Check if the concept is true
        if y[i] == "yes":
            # Compare all the features in the example
            for j in range(len(X[i])):
                feature = X[i][j]
                if hypothesis[j] == "null":
                    hypothesis[j] = feature
                elif hypothesis[j] != feature:
                    hypothesis[j] = "general"
    return hypothesis


# Read csv and split into dependent and independent variable vectors
df = pd.read_csv('./a1.csv')
X = np.array(df.iloc[:, :-1])
y = np.array(df.iloc[:, -1])
hypothesis = train(X, y)
print(hypothesis)

2. Candidate Elimination
import pandas as pd
import numpy as np


def train(X, y):
    # For general hypotheses we use 1
    general_h = np.full((len(X[0]), len(X[0])), fill_value="?", dtype=object)
    # For specific hypotheses we use 0
    specific_h = X[0].copy()

    for i in range(len(X)):
        # If value is yes make, update specific hypotheses
        if y[i] == "yes":
            for j in range(len(X[i])):
                if X[i][j] != specific_h[j]:
                    specific_h[j] = '?'
                    general_h[j][j] = '?'
        # Else update specific hypotheses
        elif y[i] == "no":
            for j in range(len(X[i])):
                if X[i][j] != specific_h[j]:
                    general_h[j][j] = specific_h[j]
                else:
                    general_h[j][j] = '?'
    # Remove the most general hypotheses
    indices = []
    ref = np.full(len(X[0]), fill_value="?", dtype=object)
    for i in range(len(general_h)):
        if np.array_equal(general_h[i], ref):
            indices.append(i)

    general_h = np.delete(general_h, indices, axis=0)

    return [general_h, specific_h]


df = pd.read_csv('./a1.csv')
X = np.array(df.iloc[:, :-1])
y = np.array(df.iloc[:, -1])

general_h, specific_h = train(X, y)

print(general_h)
print(specific_h)

3.KNN
from math import sqrt
from re import I
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score

k = 7

def find_distance(x, y):
    distance = 0

    for i in range(0, len(x)):
        if i == 0:
            if x[i] == y[i]:
                distance += 1.00
        else:
            distance += round(pow(x[i]-y[i], 2))

    return round(sqrt(distance), 2)


def k_nearest_neighbours(X, y_train, x):
    distances = {}
    for i in range(0, len(X)):
        distance = find_distance(X[i], x)
        distances[i] = distance

    nearest_neighbours = sorted(distances.items(), key=lambda x: x[1])
    return dict(nearest_neighbours[0:k])


def predict(X_train, y_train, X):
    neighbours = k_nearest_neighbours(X_train, y_train, X)
    classes = {}
    max_val = 0
    max_class = ""

    for key in neighbours.keys():
        item_class = y_train[key]
        if item_class in classes:
            classes[item_class] += 1
        else:
            classes[item_class] = 1

        if classes[item_class] > max_val:
            max_val = classes[item_class]
            max_class = item_class

    return max_class


def main():
    df = pd.read_csv("./Climate_Data.csv")
    df = df.dropna(axis=0, subset=["AUST"])

    X = np.array(df.iloc[:, :-1])
    y = np.array(df.iloc[:, -1])

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=0)

    pred_df = {"Actual": [], "Predicted": []}
    for i in range(0, len(X_test)):
        pred_df["Actual"].append(y_test[i])
        pred_df["Predicted"].append(predict(X_train, y_train, X_test[i]))

    pred_df = pd.DataFrame.from_dict(pred_df)
    print(pred_df.head())
    cf_matrix = confusion_matrix(pred_df["Actual"], pred_df["Predicted"])
    print(cf_matrix)


main()

4.K-Means
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import style
style.use('ggplot')


class K_Means:
    def __init__(self, k=3, tol=0.001, max_iter=300):
        self.k = k
        self.tol = tol
        self.max_iter = max_iter

    def fit(self, data):

        self.centroids = {}

        for i in range(self.k):
            self.centroids[i] = data[i]

        for i in range(self.max_iter):
            self.classifications = {}

            for i in range(self.k):
                self.classifications[i] = []

            for featureset in data:
                distances = [np.linalg.norm(
                    featureset-self.centroids[centroid]) for centroid in self.centroids]
                classification = distances.index(min(distances))
                self.classifications[classification].append(featureset)

            prev_centroids = dict(self.centroids)

            for classification in self.classifications:
                self.centroids[classification] = np.average(
                    self.classifications[classification], axis=0)

            optimized = True

            for c in self.centroids:
                original_centroid = prev_centroids[c]
                current_centroid = self.centroids[c]
                if np.sum((current_centroid-original_centroid)/original_centroid*100.0) > self.tol:
                    print(np.sum((current_centroid-original_centroid) /
                          original_centroid*100.0))
                    optimized = False

            if optimized:
                break

    def predict(self, data):
        distances = [np.linalg.norm(data-self.centroids[centroid])
                     for centroid in self.centroids]
        classification = distances.index(min(distances))
        return classification


X = 1 + np.random.rand(20, 2)
X1 = -1 + np.random.rand(20, 2)
X2 = 5 + np.random.rand(20, 2)

X = np.concatenate((X, X1, X2))

plt.scatter(X[:, 0], X[:, 1])
plt.show()

colors = 10*["g", "r", "c", "b", "k"]

clf = K_Means()
clf.fit(X)

for centroid in clf.centroids:
    plt.scatter(clf.centroids[centroid][0], clf.centroids[centroid][1],
                marker="o", color="k", s=150, linewidths=5)

for classification in clf.classifications:
    color = colors[classification]
    for featureset in clf.classifications[classification]:
        plt.scatter(featureset[0], featureset[1],
                    marker="x", color=color, s=150, linewidths=5)

plt.show()

5.K-Means (Using KNN)
import numpy as np
import pandas as pd
from math import sqrt

df =pd.read_csv('kmean.csv')
print(df)

X=df['x'].values
print(X)
Y=df['y'].values
print(Y)
n = len(X)
pairs = []
for i in range(0,n):
    pairs.append((X[i],Y[i]))

# print(pairs)
T = 4

clusters=[]
clusters.append([])
clusters[0].append(pairs[0])

def euc_dist(C,X):
    x,y=C[0],C[1]
    x1,y1 = X[0],X[1]

    return sqrt(((x-x1)**2)+ ((y-y1)**2))

n=0

for i in range(1,len(df)):
    temp_clus = clusters[0]
    min_d = float('inf')
    count=0
    for j in clusters:
        for k in j :
            dist = euc_dist(pairs[i],k)
            if dist>=T:
                continue
            else:
                if dist<min_d:
                    count=1
                    min_d=dist
                    temp_clus=j


    if count==0:
        n=n+1
        clusters.append([])
        clusters[n].append(pairs[i])
    else:
        temp_clus.append(pairs[i])


# for cluster in clusters:
#     print(cluster)


6. Gradient Descent
import numpy as np
import pandas as pd

data = pd.read_csv("Iris.csv")

def grdient_descent(x1,x2,x3,x4,y):
    m1=m2=m3=m4=b=0
    n = len(x1)
    alpha=0.01
    for i in range(100):
        y_pred = m1*x1 + m2*x2 + m3*x3 + m4*x4 + b
        md1 = -(2/n)sum(x1(y-y_pred))
        md2 = -(2/n)sum(x2(y-y_pred))
        md3 = -(2/n)sum(x3(y-y_pred))
        md4 = -(2/n)sum(x4(y-y_pred))
        bd = -(2/n)*sum((y-y_pred))

        m1 = m1 - md1*alpha
        m2 = m2 - md2*alpha
        m3 = m3 - md3*alpha
        m4 = m4 - md4*alpha
        b = b - bd*alpha
    print(m1,'x1 + ',m2,'x2 + ',m3,'x3 + ',m4,'x4 + ',b)

x1=data['SepalLengthCm']
x2=data['SepalWidthCm']
x3=data['PetalLengthCm']
x4=data['PetalWidthCm']
y=data['Species']
grdient_descent(x1,x2,x3,x4,y)

7. Linear Regression
	import pandas as pd
import numpy as np


def train(X, y):
    # Function to train the model
    # We are using Ordinary Least Squares function for this
    X_mean = np.mean(X)
    y_mean = np.mean(y)

    # Squared sum of Xy and XX
    sum_xy = 0
    sum_xx = 0

    # Calculate the numerator and denominator
    for i in range(len(X)):
        x_diff = X[i] - X_mean
        y_diff = y[i] - y_mean

        sum_xy = sum_xy + (x_diff * y_diff)
        sum_xx = sum_xx + pow(X[i] - X_mean, 2)

    # Calculate the coefficients
    b1 = sum_xy/sum_xx
    b0 = y_mean - (X_mean * b1)

    # Return the parameters
    return [b0[0], b1[0]]


def main():
    df = pd.read_csv('./LR.csv')
    print(df.head())
    X = np.array(df.iloc[:, :-1])
    y = np.array(df.iloc[:, -1])

    print(X)

    print(y)

    b0, b1 = train(X, y)
    b0 = round(b0, 2)
    b1 = round(b1, 2)

    print('The equation for linear regression is y =', b0, '+', b1, '* x')


main()

8. Logistic Regression
9. SVM
# importing some basic libraries
%matplotlib inline
import matplotlib.pyplot as plt
from matplotlib import style
style.use('ggplot')
import numpy as np

class SVM(object):
    def __init__(self,visualization=True):
        self.visualization = visualization
        self.colors = {1:'r',-1:'b'}
        if self.visualization:
            self.fig = plt.figure()
            self.ax = self.fig.add_subplot(1,1,1)

    def fit(self,data):
        #train with data
        self.data = data
        # { |\w\|:{w,b}}
        opt_dict = {}

        transforms = [[1,1],[-1,1],[-1,-1],[1,-1]]

        all_data = np.array([])
        for yi in self.data:
            all_data = np.append(all_data,self.data[yi])

        self.max_feature_value = max(all_data)
        self.min_feature_value = min(all_data)
        all_data = None

        #with smaller steps our margins and db will be more precise
        step_sizes = [self.max_feature_value * 0.1,
                      self.max_feature_value * 0.01,
                      #point of expense
                      self.max_feature_value * 0.001,]

        #extremly expensise
        b_range_multiple = 5
        #we dont need to take as small step as w
        b_multiple = 5

        latest_optimum = self.max_feature_value*10

        """
        objective is to satisfy yi(x.w)+b>=1 for all training dataset such that ||w|| is minimum
        for this we will start with random w, and try to satisfy it with making b bigger and bigger
        """
        #making step smaller and smaller to get precise value
        for step in step_sizes:
            w = np.array([latest_optimum,latest_optimum])

            #we can do this because convex
            optimized = False
            while not optimized:
                for b in np.arange(-1*self.max_feature_value*b_range_multiple,
                                   self.max_feature_value*b_range_multiple,
                                   step*b_multiple):
                    for transformation in transforms:
                        w_t = w*transformation
                        found_option = True

                        #weakest link in SVM fundamentally
                        #SMO attempts to fix this a bit
                        # ti(xi.w+b) >=1
                        for i in self.data:
                            for xi in self.data[i]:
                                yi=i
                                if not yi*(np.dot(w_t,xi)+b)>=1:
                                    found_option=False
                        if found_option:
                            """
                            all points in dataset satisfy y(w.x)+b>=1 for this cuurent w_t, b
                            then put w,b in dict with ||w|| as key
                            """
                            opt_dict[np.linalg.norm(w_t)]=[w_t,b]

                #after w[0] or w[1]<0 then values of w starts repeating itself because of transformation
                #Think about it, it is easy
                #print(w,len(opt_dict)) Try printing to understand
                if w[0]<0:
                    optimized=True
                    print("optimized a step")
                else:
                    w = w-step

            # sorting ||w|| to put the smallest ||w|| at poition 0
            norms = sorted([n for n in opt_dict])
            #optimal values of w,b
            opt_choice = opt_dict[norms[0]]

            self.w=opt_choice[0]
            self.b=opt_choice[1]

            #start with new latest_optimum (initial values for w)
            latest_optimum = opt_choice[0][0]+step*2

    def predict(self,features):
        #sign(x.w+b)
        classification = np.sign(np.dot(np.array(features),self.w)+self.b)
        if classification!=0 and self.visualization:
            self.ax.scatter(features[0],features[1],s=200,marker='*',c=self.colors[classification])
        return (classification,np.dot(np.array(features),self.w)+self.b)

    def visualize(self):
        [[self.ax.scatter(x[0],x[1],s=100,c=self.colors[i]) for x in data_dict[i]] for i in data_dict]

        # hyperplane = x.w+b (actually its a line)
        # v = x0.w0+x1.w1+b -> x1 = (v-w[0].x[0]-b)/w1
        #psv = 1     psv line ->  x.w+b = 1a small value of b we will increase it later
        #nsv = -1    nsv line ->  x.w+b = -1
        # dec = 0    db line  ->  x.w+b = 0
        def hyperplane(x,w,b,v):
            #returns a x2 value on line when given x1
            return (-w[0]*x-b+v)/w[1]

        hyp_x_min= self.min_feature_value*0.9
        hyp_x_max = self.max_feature_value*1.1

        # (w.x+b)=1
        # positive support vector hyperplane
        pav1 = hyperplane(hyp_x_min,self.w,self.b,1)
        pav2 = hyperplane(hyp_x_max,self.w,self.b,1)
        self.ax.plot([hyp_x_min,hyp_x_max],[pav1,pav2],'k')

        # (w.x+b)=-1
        # negative support vector hyperplane
        nav1 = hyperplane(hyp_x_min,self.w,self.b,-1)
        nav2 = hyperplane(hyp_x_max,self.w,self.b,-1)
        self.ax.plot([hyp_x_min,hyp_x_max],[nav1,nav2],'k')

        # (w.x+b)=0
        # db support vector hyperplane
        db1 = hyperplane(hyp_x_min,self.w,self.b,0)
        db2 = hyperplane(hyp_x_max,self.w,self.b,0)
        self.ax.plot([hyp_x_min,hyp_x_max],[db1,db2],'y--')

	data_dict = {-1:np.array([[1,7],[2,8],[3,8]]),1:np.array([[5,1],[6,-1],[7,3]])}

	svm = SVM() # Linear Kernel
svm.fit(data=data_dict)
svm.visualize()

svm.predict([3,8])
	1. Find S
	import pandas as pd
	import numpy as np


	def train(X, y):
	# Create the most specific hypothesis
	hypothesis = []
	for i in range(len(X[0])):
	hypothesis.append("null")

	# Iterate over the features
	for i in range(len(X)):
	# Check if the concept is true
	if y[i] == "yes":
	# Compare all the features in the example
	for j in range(len(X[i])):
	feature = X[i][j]
	if hypothesis[j] == "null":
	hypothesis[j] = feature
	elif hypothesis[j] != feature:
	hypothesis[j] = "general"
	return hypothesis


	# Read csv and split into dependent and independent variable vectors
	df = pd.read_csv('./a1.csv')
	X = np.array(df.iloc[:, :-1])
	y = np.array(df.iloc[:, -1])
	hypothesis = train(X, y)
	print(hypothesis)

	2. Candidate Elimination
	import pandas as pd
	import numpy as np


	def train(X, y):
	# For general hypotheses we use 1
	general_h = np.full((len(X[0]), len(X[0])), fill_value="?", dtype=object)
	# For specific hypotheses we use 0
	specific_h = X[0].copy()

	for i in range(len(X)):
	# If value is yes make, update specific hypotheses
	if y[i] == "yes":
	for j in range(len(X[i])):
	if X[i][j] != specific_h[j]:
	specific_h[j] = '?'
	general_h[j][j] = '?'
	# Else update specific hypotheses
	elif y[i] == "no":
	for j in range(len(X[i])):
	if X[i][j] != specific_h[j]:
	general_h[j][j] = specific_h[j]
	else:
	general_h[j][j] = '?'
	# Remove the most general hypotheses
	indices = []
	ref = np.full(len(X[0]), fill_value="?", dtype=object)
	for i in range(len(general_h)):
	if np.array_equal(general_h[i], ref):
	indices.append(i)

	general_h = np.delete(general_h, indices, axis=0)

	return [general_h, specific_h]


	df = pd.read_csv('./a1.csv')
	X = np.array(df.iloc[:, :-1])
	y = np.array(df.iloc[:, -1])

	general_h, specific_h = train(X, y)

	print(general_h)
	print(specific_h)

	3.KNN
	from math import sqrt
	from re import I
	import numpy as np
	import pandas as pd
	from sklearn.model_selection import train_test_split
	from sklearn.metrics import confusion_matrix, accuracy_score

	k = 7

	def find_distance(x, y):
	distance = 0

	for i in range(0, len(x)):
	if i == 0:
	if x[i] == y[i]:
	distance += 1.00
	else:
	distance += round(pow(x[i]-y[i], 2))

	return round(sqrt(distance), 2)


	def k_nearest_neighbours(X, y_train, x):
	distances = {}
	for i in range(0, len(X)):
	distance = find_distance(X[i], x)
	distances[i] = distance

	nearest_neighbours = sorted(distances.items(), key=lambda x: x[1])
	return dict(nearest_neighbours[0:k])


	def predict(X_train, y_train, X):
	neighbours = k_nearest_neighbours(X_train, y_train, X)
	classes = {}
	max_val = 0
	max_class = ""

	for key in neighbours.keys():
	item_class = y_train[key]
	if item_class in classes:
	classes[item_class] += 1
	else:
	classes[item_class] = 1

	if classes[item_class] > max_val:
	max_val = classes[item_class]
	max_class = item_class

	return max_class


	def main():
	df = pd.read_csv("./Climate_Data.csv")
	df = df.dropna(axis=0, subset=["AUST"])

	X = np.array(df.iloc[:, :-1])
	y = np.array(df.iloc[:, -1])

	X_train, X_test, y_train, y_test = train_test_split(
	X, y, test_size=0.2, random_state=0)

	pred_df = {"Actual": [], "Predicted": []}
	for i in range(0, len(X_test)):
	pred_df["Actual"].append(y_test[i])
	pred_df["Predicted"].append(predict(X_train, y_train, X_test[i]))

	pred_df = pd.DataFrame.from_dict(pred_df)
	print(pred_df.head())
	cf_matrix = confusion_matrix(pred_df["Actual"], pred_df["Predicted"])
	print(cf_matrix)


	main()

	4.K-Means
	import numpy as np
	import matplotlib.pyplot as plt
	from matplotlib import style
	style.use('ggplot')


	class K_Means:
	def __init__(self, k=3, tol=0.001, max_iter=300):
	self.k = k
	self.tol = tol
	self.max_iter = max_iter

	def fit(self, data):

	self.centroids = {}

	for i in range(self.k):
	self.centroids[i] = data[i]

	for i in range(self.max_iter):
	self.classifications = {}

	for i in range(self.k):
	self.classifications[i] = []

	for featureset in data:
	distances = [np.linalg.norm(
	featureset-self.centroids[centroid]) for centroid in self.centroids]
	classification = distances.index(min(distances))
	self.classifications[classification].append(featureset)

	prev_centroids = dict(self.centroids)

	for classification in self.classifications:
	self.centroids[classification] = np.average(
	self.classifications[classification], axis=0)

	optimized = True

	for c in self.centroids:
	original_centroid = prev_centroids[c]
	current_centroid = self.centroids[c]
	if np.sum((current_centroid-original_centroid)/original_centroid*100.0) > self.tol:
	print(np.sum((current_centroid-original_centroid) /
	original_centroid*100.0))
	optimized = False

	if optimized:
	break

	def predict(self, data):
	distances = [np.linalg.norm(data-self.centroids[centroid])
	for centroid in self.centroids]
	classification = distances.index(min(distances))
	return classification


	X = 1 + np.random.rand(20, 2)
	X1 = -1 + np.random.rand(20, 2)
	X2 = 5 + np.random.rand(20, 2)

	X = np.concatenate((X, X1, X2))

	plt.scatter(X[:, 0], X[:, 1])
	plt.show()

	colors = 10*["g", "r", "c", "b", "k"]

	clf = K_Means()
	clf.fit(X)

	for centroid in clf.centroids:
	plt.scatter(clf.centroids[centroid][0], clf.centroids[centroid][1],
	marker="o", color="k", s=150, linewidths=5)

	for classification in clf.classifications:
	color = colors[classification]
	for featureset in clf.classifications[classification]:
	plt.scatter(featureset[0], featureset[1],
	marker="x", color=color, s=150, linewidths=5)

	plt.show()

	5.K-Means (Using KNN)
	import numpy as np
	import pandas as pd
	from math import sqrt

	df =pd.read_csv('kmean.csv')
	print(df)

	X=df['x'].values
	print(X)
	Y=df['y'].values
	print(Y)
	n = len(X)
	pairs = []
	for i in range(0,n):
	pairs.append((X[i],Y[i]))

	# print(pairs)
	T = 4

	clusters=[]
	clusters.append([])
	clusters[0].append(pairs[0])

	def euc_dist(C,X):
	x,y=C[0],C[1]
	x1,y1 = X[0],X[1]

	return sqrt(((x-x1)2)+ ((y-y1)2))

	n=0

	for i in range(1,len(df)):
	temp_clus = clusters[0]
	min_d = float('inf')
	count=0
	for j in clusters:
	for k in j :
	dist = euc_dist(pairs[i],k)
	if dist>=T:
	continue
	else:
	if dist<min_d:
	count=1
	min_d=dist
	temp_clus=j


	if count==0:
	n=n+1
	clusters.append([])
	clusters[n].append(pairs[i])
	else:
	temp_clus.append(pairs[i])


	# for cluster in clusters:
	# print(cluster)


	6. Gradient Descent
	import numpy as np
	import pandas as pd

	data = pd.read_csv("Iris.csv")

	def grdient_descent(x1,x2,x3,x4,y):
	m1=m2=m3=m4=b=0
	n = len(x1)
	alpha=0.01
	for i in range(100):
	y_pred = m1x1 + m2x2 + m3x3 + m4x4 + b
	md1 = -(2/n)sum(x1(y-y_pred))
	md2 = -(2/n)sum(x2(y-y_pred))
	md3 = -(2/n)sum(x3(y-y_pred))
	md4 = -(2/n)sum(x4(y-y_pred))
	bd = -(2/n)*sum((y-y_pred))

	m1 = m1 - md1*alpha
	m2 = m2 - md2*alpha
	m3 = m3 - md3*alpha
	m4 = m4 - md4*alpha
	b = b - bd*alpha
	print(m1,'x1 + ',m2,'x2 + ',m3,'x3 + ',m4,'x4 + ',b)

	x1=data['SepalLengthCm']
	x2=data['SepalWidthCm']
	x3=data['PetalLengthCm']
	x4=data['PetalWidthCm']
	y=data['Species']
	grdient_descent(x1,x2,x3,x4,y)

	7. Linear Regression
	import pandas as pd
	import numpy as np


	def train(X, y):
	# Function to train the model
	# We are using Ordinary Least Squares function for this
	X_mean = np.mean(X)
	y_mean = np.mean(y)

	# Squared sum of Xy and XX
	sum_xy = 0
	sum_xx = 0

	# Calculate the numerator and denominator
	for i in range(len(X)):
	x_diff = X[i] - X_mean
	y_diff = y[i] - y_mean

	sum_xy = sum_xy + (x_diff * y_diff)
	sum_xx = sum_xx + pow(X[i] - X_mean, 2)

	# Calculate the coefficients
	b1 = sum_xy/sum_xx
	b0 = y_mean - (X_mean * b1)

	# Return the parameters
	return [b0[0], b1[0]]


	def main():
	df = pd.read_csv('./LR.csv')
	print(df.head())
	X = np.array(df.iloc[:, :-1])
	y = np.array(df.iloc[:, -1])

	print(X)

	print(y)

	b0, b1 = train(X, y)
	b0 = round(b0, 2)
	b1 = round(b1, 2)

	print('The equation for linear regression is y =', b0, '+', b1, '* x')


	main()

	8. Logistic Regression
	9. SVM
	# importing some basic libraries
	%matplotlib inline
	import matplotlib.pyplot as plt
	from matplotlib import style
	style.use('ggplot')
	import numpy as np

	class SVM(object):
	def __init__(self,visualization=True):
	self.visualization = visualization
	self.colors = {1:'r',-1:'b'}
	if self.visualization:
	self.fig = plt.figure()
	self.ax = self.fig.add_subplot(1,1,1)

	def fit(self,data):
	#train with data
	self.data = data
	# { \|\w\\|:{w,b}}
	opt_dict = {}

	transforms = [[1,1],[-1,1],[-1,-1],[1,-1]]

	all_data = np.array([])
	for yi in self.data:
	all_data = np.append(all_data,self.data[yi])

	self.max_feature_value = max(all_data)
	self.min_feature_value = min(all_data)
	all_data = None

	#with smaller steps our margins and db will be more precise
	step_sizes = [self.max_feature_value * 0.1,
	self.max_feature_value * 0.01,
	#point of expense
	self.max_feature_value * 0.001,]

	#extremly expensise
	b_range_multiple = 5
	#we dont need to take as small step as w
	b_multiple = 5

	latest_optimum = self.max_feature_value*10

	"""
	objective is to satisfy yi(x.w)+b>=1 for all training dataset such that \|\|w\|\| is minimum
	for this we will start with random w, and try to satisfy it with making b bigger and bigger
	"""
	#making step smaller and smaller to get precise value
	for step in step_sizes:
	w = np.array([latest_optimum,latest_optimum])

	#we can do this because convex
	optimized = False
	while not optimized:
	for b in np.arange(-1self.max_feature_valueb_range_multiple,
	self.max_feature_value*b_range_multiple,
	step*b_multiple):
	for transformation in transforms:
	w_t = w*transformation
	found_option = True

	#weakest link in SVM fundamentally
	#SMO attempts to fix this a bit
	# ti(xi.w+b) >=1
	for i in self.data:
	for xi in self.data[i]:
	yi=i
	if not yi*(np.dot(w_t,xi)+b)>=1:
	found_option=False
	if found_option:
	"""
	all points in dataset satisfy y(w.x)+b>=1 for this cuurent w_t, b
	then put w,b in dict with \|\|w\|\| as key
	"""
	opt_dict[np.linalg.norm(w_t)]=[w_t,b]

	#after w[0] or w[1]<0 then values of w starts repeating itself because of transformation
	#Think about it, it is easy
	#print(w,len(opt_dict)) Try printing to understand
	if w[0]<0:
	optimized=True
	print("optimized a step")
	else:
	w = w-step

	# sorting \|\|w\|\| to put the smallest \|\|w\|\| at poition 0
	norms = sorted([n for n in opt_dict])
	#optimal values of w,b
	opt_choice = opt_dict[norms[0]]

	self.w=opt_choice[0]
	self.b=opt_choice[1]

	#start with new latest_optimum (initial values for w)
	latest_optimum = opt_choice[0][0]+step*2

	def predict(self,features):
	#sign(x.w+b)
	classification = np.sign(np.dot(np.array(features),self.w)+self.b)
	if classification!=0 and self.visualization:
	self.ax.scatter(features[0],features[1],s=200,marker='*',c=self.colors[classification])
	return (classification,np.dot(np.array(features),self.w)+self.b)

	def visualize(self):
	[[self.ax.scatter(x[0],x[1],s=100,c=self.colors[i]) for x in data_dict[i]] for i in data_dict]

	# hyperplane = x.w+b (actually its a line)
	# v = x0.w0+x1.w1+b -> x1 = (v-w[0].x[0]-b)/w1
	#psv = 1 psv line -> x.w+b = 1a small value of b we will increase it later
	#nsv = -1 nsv line -> x.w+b = -1
	# dec = 0 db line -> x.w+b = 0
	def hyperplane(x,w,b,v):
	#returns a x2 value on line when given x1
	return (-w[0]*x-b+v)/w[1]

	hyp_x_min= self.min_feature_value*0.9
	hyp_x_max = self.max_feature_value*1.1

	# (w.x+b)=1
	# positive support vector hyperplane
	pav1 = hyperplane(hyp_x_min,self.w,self.b,1)
	pav2 = hyperplane(hyp_x_max,self.w,self.b,1)
	self.ax.plot([hyp_x_min,hyp_x_max],[pav1,pav2],'k')

	# (w.x+b)=-1
	# negative support vector hyperplane
	nav1 = hyperplane(hyp_x_min,self.w,self.b,-1)
	nav2 = hyperplane(hyp_x_max,self.w,self.b,-1)
	self.ax.plot([hyp_x_min,hyp_x_max],[nav1,nav2],'k')

	# (w.x+b)=0
	# db support vector hyperplane
	db1 = hyperplane(hyp_x_min,self.w,self.b,0)
	db2 = hyperplane(hyp_x_max,self.w,self.b,0)
	self.ax.plot([hyp_x_min,hyp_x_max],[db1,db2],'y--')

	data_dict = {-1:np.array([[1,7],[2,8],[3,8]]),1:np.array([[5,1],[6,-1],[7,3]])}

	svm = SVM() # Linear Kernel
	svm.fit(data=data_dict)
	svm.visualize()

	svm.predict([3,8])