This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
## Load Iris dataset | |
df = pd.read_csv('/Users/rohith/Documents/Datasets/Iris_dataset/Iris.csv') | |
## Retrieve the target values and drop the Id along with it | |
target = df['Species'] | |
df = df.drop(['Species','Id'],axis=1) | |
## Drop the two features we won't be using from the dataframe | |
df = df.drop(['SepalWidthCm','PetalWidthCm'],axis=1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.cluster import KMeans | |
clf = KMeans(n_clusters = 3) | |
clf.fit(train_data) | |
pred = clf.predict(test_data) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
pred = [] | |
for point in test_data: | |
## Find distance between test data point and centroids | |
dis_point_c1 = ((c1[0]-point[0])**2 + (c1[1]-point[1])**2 + | |
(c1[2]-point[2])**2 + (c1[3]-point[3])**2)**0.5 | |
dis_point_c2 = ((c2[0]-point[0])**2 + (c2[1]-point[1])**2 + | |
(c2[2]-point[2])**2 + (c2[3]-point[3])**2)**0.5 | |
dis_point_c3 = ((c3[0]-point[0])**2 + (c3[1]-point[1])**2 + | |
(c3[2]-point[2])**2 + (c3[3]-point[3])**2)**0.5 | |
## Find the cluster to which the point is closest to and append |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## K-Means Algorithm | |
import random | |
import numpy as np | |
## Randomly place the centroids of the three clusters | |
c1 = [float(np.random.randint(4,8)),float(np.random.randint(1,5)), | |
float(np.random.randint(1,7)),float(np.random.randint(0,3))] | |
c2 = [float(np.random.randint(4,8)),float(np.random.randint(1,5)), | |
float(np.random.randint(1,7)),float(np.random.randint(0,3))] | |
c3 = [float(np.random.randint(4,8)),float(np.random.randint(1,5)), | |
float(np.random.randint(1,7)),float(np.random.randint(0,3))] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
from sklearn.utils import shuffle | |
## Load Iris dataset | |
df = pd.read_csv('/Users/rohith/Documents/Datasets/Iris_dataset/iris.csv') | |
## Store the target vaue | |
classes = df['Species'] | |
## Drop the Id and Class values from dat | |
df = df.drop(['Id','Species'],axis=1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.svm import SVC | |
from sklearn.metrics import accuracy_score | |
clf = SVC(kernel='linear') | |
clf.fit(x_train,y_train) | |
y_pred = clf.predict(x_test) | |
print(accuracy_score(y_test,y_pred)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.metrics import accuracy_score | |
## Clip the weights | |
index = list(range(10,90)) | |
w1 = np.delete(w1,index) | |
w2 = np.delete(w2,index) | |
w1 = w1.reshape(10,1) | |
w2 = w2.reshape(10,1) | |
## Extract the test data features |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Support Vector Machine | |
import numpy as np | |
train_f1 = x_train[:,0] | |
train_f2 = x_train[:,1] | |
train_f1 = train_f1.reshape(90,1) | |
train_f2 = train_f2.reshape(90,1) | |
w1 = np.zeros((90,1)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.utils import shuffle | |
from sklearn.cross_validation import train_test_split | |
import numpy as np | |
## Drop rest of the features and extract the target values | |
df = df.drop(['SepalWidthCm','PetalWidthCm'],axis=1) | |
Y = [] | |
target = df['Species'] | |
for val in target: | |
if(val == 'Iris-setosa'): | |
Y.append(-1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import matplotlib.pyplot as plt | |
x = df['SepalLengthCm'] | |
y = df['PetalLengthCm'] | |
setosa_x = x[:50] | |
setosa_y = y[:50] | |
versicolor_x = x[50:] | |
versicolor_y = y[50:] |