Rohith Gandhi G grohith327

## KNN_1.py
import pandas as pd

## Load Iris dataset
df = pd.read_csv('/Users/rohith/Documents/Datasets/Iris_dataset/Iris.csv')
## Retrieve the target values and drop the Id along with it
target = df['Species']
df = df.drop(['Species','Id'],axis=1)
## Drop the two features we won't be using from the dataframe
df = df.drop(['SepalWidthCm','PetalWidthCm'],axis=1)

## k_means_4.py
from sklearn.cluster import KMeans

clf = KMeans(n_clusters = 3)
clf.fit(train_data)
pred = clf.predict(test_data)

## k_means_3.py
pred = []
for point in test_data:
    ## Find distance between test data point and centroids
    dis_point_c1 = ((c1[0]-point[0])**2 + (c1[1]-point[1])**2 +
                    (c1[2]-point[2])**2 + (c1[3]-point[3])**2)**0.5
    dis_point_c2 = ((c2[0]-point[0])**2 + (c2[1]-point[1])**2 +
                    (c2[2]-point[2])**2 + (c2[3]-point[3])**2)**0.5
    dis_point_c3 = ((c3[0]-point[0])**2 + (c3[1]-point[1])**2 +
                    (c3[2]-point[2])**2 + (c3[3]-point[3])**2)**0.5
    ## Find the cluster to which the point is closest to and append

## k_means_2.py
## K-Means Algorithm
import random
import numpy as np
## Randomly place the centroids of the three clusters
c1 = [float(np.random.randint(4,8)),float(np.random.randint(1,5)),
      float(np.random.randint(1,7)),float(np.random.randint(0,3))]
c2 = [float(np.random.randint(4,8)),float(np.random.randint(1,5)),
      float(np.random.randint(1,7)),float(np.random.randint(0,3))]
c3 = [float(np.random.randint(4,8)),float(np.random.randint(1,5)),
      float(np.random.randint(1,7)),float(np.random.randint(0,3))]

## k_means_1.py
import pandas as pd
import numpy as np
from sklearn.utils import shuffle

## Load Iris dataset
df = pd.read_csv('/Users/rohith/Documents/Datasets/Iris_dataset/iris.csv')
## Store the target vaue
classes = df['Species']
## Drop the Id and Class values from dat
df = df.drop(['Id','Species'],axis=1)

## svm_6.py
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

clf = SVC(kernel='linear')
clf.fit(x_train,y_train)
y_pred = clf.predict(x_test)
print(accuracy_score(y_test,y_pred))

## svm_5.py
from sklearn.metrics import accuracy_score

## Clip the weights
index = list(range(10,90))
w1 = np.delete(w1,index)
w2 = np.delete(w2,index)

w1 = w1.reshape(10,1)
w2 = w2.reshape(10,1)
## Extract the test data features

## svm_4.py
## Support Vector Machine
import numpy as np

train_f1 = x_train[:,0]
train_f2 = x_train[:,1]

train_f1 = train_f1.reshape(90,1)
train_f2 = train_f2.reshape(90,1)

w1 = np.zeros((90,1))

## svm_3.py
from sklearn.utils import shuffle
from sklearn.cross_validation import train_test_split
import numpy as np
## Drop rest of the features and extract the target values
df = df.drop(['SepalWidthCm','PetalWidthCm'],axis=1)
Y = []
target = df['Species']
for val in target:
    if(val == 'Iris-setosa'):
        Y.append(-1)

## svm_2.py
import matplotlib.pyplot as plt

x = df['SepalLengthCm']
y = df['PetalLengthCm']

setosa_x = x[:50]
setosa_y = y[:50]

versicolor_x = x[50:]
versicolor_y = y[50:]
	import pandas as pd

	## Load Iris dataset
	df = pd.read_csv('/Users/rohith/Documents/Datasets/Iris_dataset/Iris.csv')
	## Retrieve the target values and drop the Id along with it
	target = df['Species']
	df = df.drop(['Species','Id'],axis=1)
	## Drop the two features we won't be using from the dataframe
	df = df.drop(['SepalWidthCm','PetalWidthCm'],axis=1)
	from sklearn.cluster import KMeans

	clf = KMeans(n_clusters = 3)
	clf.fit(train_data)
	pred = clf.predict(test_data)
	pred = []
	for point in test_data:
	## Find distance between test data point and centroids
	dis_point_c1 = ((c1[0]-point[0])2 + (c1[1]-point[1])2 +
	(c1[2]-point[2])2 + (c1[3]-point[3])2)**0.5
	dis_point_c2 = ((c2[0]-point[0])2 + (c2[1]-point[1])2 +
	(c2[2]-point[2])2 + (c2[3]-point[3])2)**0.5
	dis_point_c3 = ((c3[0]-point[0])2 + (c3[1]-point[1])2 +
	(c3[2]-point[2])2 + (c3[3]-point[3])2)**0.5
	## Find the cluster to which the point is closest to and append
	## K-Means Algorithm
	import random
	import numpy as np
	## Randomly place the centroids of the three clusters
	c1 = [float(np.random.randint(4,8)),float(np.random.randint(1,5)),
	float(np.random.randint(1,7)),float(np.random.randint(0,3))]
	c2 = [float(np.random.randint(4,8)),float(np.random.randint(1,5)),
	float(np.random.randint(1,7)),float(np.random.randint(0,3))]
	c3 = [float(np.random.randint(4,8)),float(np.random.randint(1,5)),
	float(np.random.randint(1,7)),float(np.random.randint(0,3))]
	import pandas as pd
	import numpy as np
	from sklearn.utils import shuffle

	## Load Iris dataset
	df = pd.read_csv('/Users/rohith/Documents/Datasets/Iris_dataset/iris.csv')
	## Store the target vaue
	classes = df['Species']
	## Drop the Id and Class values from dat
	df = df.drop(['Id','Species'],axis=1)
	from sklearn.svm import SVC
	from sklearn.metrics import accuracy_score

	clf = SVC(kernel='linear')
	clf.fit(x_train,y_train)
	y_pred = clf.predict(x_test)
	print(accuracy_score(y_test,y_pred))
	from sklearn.metrics import accuracy_score

	## Clip the weights
	index = list(range(10,90))
	w1 = np.delete(w1,index)
	w2 = np.delete(w2,index)

	w1 = w1.reshape(10,1)
	w2 = w2.reshape(10,1)
	## Extract the test data features
	## Support Vector Machine
	import numpy as np

	train_f1 = x_train[:,0]
	train_f2 = x_train[:,1]

	train_f1 = train_f1.reshape(90,1)
	train_f2 = train_f2.reshape(90,1)

	w1 = np.zeros((90,1))
	from sklearn.utils import shuffle
	from sklearn.cross_validation import train_test_split
	import numpy as np
	## Drop rest of the features and extract the target values
	df = df.drop(['SepalWidthCm','PetalWidthCm'],axis=1)
	Y = []
	target = df['Species']
	for val in target:
	if(val == 'Iris-setosa'):
	Y.append(-1)
	import matplotlib.pyplot as plt

	x = df['SepalLengthCm']
	y = df['PetalLengthCm']

	setosa_x = x[:50]
	setosa_y = y[:50]

	versicolor_x = x[50:]
	versicolor_y = y[50:]