SRILAKSHMI-Analyst/svm project.py

## svm project.py
#!/usr/bin/env python
# coding: utf-8

# # **Support Vector Machine(svm)**
#
#
# **"Support Vector Machine" (SVM) is a supervised machine learning algorithm which can be used for both classification or regression challenges.we perform classification by finding the hyper-plane that differentiate the two classes.**\
# **Types of Kernel**\
# **1.Linear Kernel**\
# **2.Polynomial Kernel**\
# **3.Radial Basis Function kernel (RBF)/ Gaussian Kernel.**
#

# # 1. Import necessary packages

# In[66]:


import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')


# **The use of numpy package is for numerical computations, pandas for reading the files and warnings used for avoiding warnings.**
#

# # 2.Reading the file

# In[67]:


url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"


# In[68]:


colnames = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width','Class']

irisdata = pd.read_csv(url,names=colnames)


# ### A look at the dataset

# In[69]:


irisdata.head()


# **Here we have a look at the dataset by using the head function to view the first 5 observations in the data.**

# # 3.To identify shape of the dataset

# In[70]:


irisdata.shape


# # 4.using describe()

# In[71]:


irisdata.describe()


# **We use the describe() function it gives the summary statistics of the Iris data.**

# # 5.Using info()

# In[72]:


irisdata.info()


# **info() function gives the length of the columns,data type and also specifies if our dataset has any missing values or not.**

# # 6.correlation()

# In[73]:


correlation=irisdata.corr()


# In[74]:


correlation


# In[75]:


import seaborn as sns
sns.heatmap(correlation,annot=True)


# **Visualizing the correlation using heat map function from the seaborn package.**

# # 7.Pairplot()

# In[92]:


sns.pairplot(irisdata)


# **Pair plots are used to visualize the relation between each and every column of the data with each and every other column in the form of scatter plots and histograms.**

# # 8.Barplots()

# In[76]:


sns.barplot(x="Class",y="sepal-length",data=irisdata)


# In[77]:


sns.barplot(x="Class",y="sepal-width",data=irisdata)


# In[78]:


sns.barplot(x="Class",y="petal-length",data=irisdata)


# In[79]:


sns.barplot(x="Class",y="petal-width",data=irisdata)


# # 9.We specify the location of x and y variables

# In[80]:


x = irisdata.drop('Class', axis=1)
y = irisdata['Class']


# In[81]:


print(x.shape)
print(y.shape)


# **We split the data into x and y variables and view their shape using shape function.**

# # 10.Split the Iris data into Training and Testing data.

# In[82]:


from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20)


# **To train the model we split the data into training and testing data using train_test_split function of model_selection module in the ratio 80:20.**

# # 11.(a) Fit the svc model by using Polynomial kernel

# In[42]:


from sklearn.svm import SVC
svclassifier = SVC(kernel='poly', degree=8)
svclassifier.fit(x_train, y_train)

y_pred = svclassifier.predict(x_test)


# In[22]:


from sklearn.metrics import classification_report, confusion_matrix
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))


# **We fit the svc function with poly kernel and predict with new data(x_test) and print the confusion matrix,classification report of the fitted model.**

# In[43]:


from sklearn.model_selection import cross_val_score

data1=cross_val_score(svclassifier,x_train,y_train,cv=5)
data1


# In[24]:


data1.mean()


# **To verify the model reliability we use cross validation to obtain the average accuracy of the model(93.33%) when new data is fit to it.**

# # (b) Fit the svc model by using rbf kernel

# In[44]:


svclassifier2 = SVC(kernel='rbf')
svclassifier2.fit(x_train, y_train)
y_pred = svclassifier2.predict(x_test)
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))


# **We fit the svc function with rbf kernel and predict with new data(x_test) and print the confusion matrix,classification report of the fitted model.**

# In[45]:


from sklearn.model_selection import cross_val_score

data2=cross_val_score(svclassifier2,x_train,y_train,cv=5)
data2


# In[37]:


data2.mean()


# **To verify the model reliability we use cross validation to obtain the average accuracy of the model(97.5%) when new data is fit to it.**

# # (c) Fit the svc model by using Sigmoid kernel

# In[46]:


svclassifier3 = SVC(kernel='sigmoid')
svclassifier3.fit(x_train, y_train)
y_pred = svclassifier3.predict(x_test)
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))


# **We fit the svc function with sigmoid and predict with new data(x_test) and print the confusion matrix,classification report of the fitted model.**

# In[87]:


from sklearn.model_selection import cross_val_score

data3=cross_val_score(svclassifier3,x_train,y_train,cv=5)
data3


# In[34]:


data3.mean()


# **To verify the model reliability we use cross validation to obtain the average accuracy of the model(34%) when new data is fit to it.**

# # (d) Fit the svc model by using Linear kernel

# In[83]:


svclassifier4 = SVC(kernel='linear')
svclassifier4.fit(x_train, y_train)
y_pred = svclassifier4.predict(x_test)
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))


# **We fit the svc function with linear kernel and predict with new data(x_test) and print the confusion matrix,classification report of the fitted model.**

# In[84]:


from sklearn.model_selection import cross_val_score

data4=cross_val_score(svclassifier4,x_train,y_train,cv=5)
data4


# In[85]:


data4.mean()


# **To verify the model reliability we use cross validation to obtain the average accuracy of the model(97.5%) when new data is fit to it.**

# # 12.Testing with new observation whose actual class is Iris-setosa.

# In[38]:


new_observation = [5.1,3.5,1.4,0.2]
y_pred = svclassifier.predict([new_observation])
print('predicted class value is ', y_pred)


# In[39]:


new_observation = [5.0,3.4,1.6,0.4]
y_pred = svclassifier2.predict([new_observation])
print('predicted class value is ', y_pred)


# In[40]:


new_observation = [5.0,3.4,1.6,0.4]
y_pred = svclassifier3.predict([new_observation])
print('predicted class value is ', y_pred)


# In[53]:


new_observation = [5.0,3.4,1.6,0.4]
y_pred = svclassifier4.predict([new_observation])
print('predicted class value is ', y_pred)


# **We find that all models have classified the new observation correctly to the class Iris-setosa except the model with sigmoid kernel which classified it as Iris-virginica as it had very low accuracy(34%) compared to other models.**

# In[ ]:
	#!/usr/bin/env python
	# coding: utf-8

	# # Support Vector Machine(svm)
	#
	#
	# "Support Vector Machine" (SVM) is a supervised machine learning algorithm which can be used for both classification or regression challenges.we perform classification by finding the hyper-plane that differentiate the two classes.\
	# Types of Kernel\
	# 1.Linear Kernel\
	# 2.Polynomial Kernel\
	# 3.Radial Basis Function kernel (RBF)/ Gaussian Kernel.
	#

	# # 1. Import necessary packages

	# In[66]:


	import numpy as np
	import pandas as pd
	import warnings
	warnings.filterwarnings('ignore')


	# The use of numpy package is for numerical computations, pandas for reading the files and warnings used for avoiding warnings.
	#

	# # 2.Reading the file

	# In[67]:


	url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"


	# In[68]:


	colnames = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width','Class']

	irisdata = pd.read_csv(url,names=colnames)


	# ### A look at the dataset

	# In[69]:


	irisdata.head()


	# Here we have a look at the dataset by using the head function to view the first 5 observations in the data.

	# # 3.To identify shape of the dataset

	# In[70]:


	irisdata.shape


	# # 4.using describe()

	# In[71]:


	irisdata.describe()


	# We use the describe() function it gives the summary statistics of the Iris data.

	# # 5.Using info()

	# In[72]:


	irisdata.info()


	# info() function gives the length of the columns,data type and also specifies if our dataset has any missing values or not.

	# # 6.correlation()

	# In[73]:


	correlation=irisdata.corr()


	# In[74]:


	correlation


	# In[75]:


	import seaborn as sns
	sns.heatmap(correlation,annot=True)


	# Visualizing the correlation using heat map function from the seaborn package.

	# # 7.Pairplot()

	# In[92]:


	sns.pairplot(irisdata)


	# Pair plots are used to visualize the relation between each and every column of the data with each and every other column in the form of scatter plots and histograms.

	# # 8.Barplots()

	# In[76]:


	sns.barplot(x="Class",y="sepal-length",data=irisdata)


	# In[77]:


	sns.barplot(x="Class",y="sepal-width",data=irisdata)


	# In[78]:


	sns.barplot(x="Class",y="petal-length",data=irisdata)


	# In[79]:


	sns.barplot(x="Class",y="petal-width",data=irisdata)


	# # 9.We specify the location of x and y variables

	# In[80]:


	x = irisdata.drop('Class', axis=1)
	y = irisdata['Class']


	# In[81]:


	print(x.shape)
	print(y.shape)


	# We split the data into x and y variables and view their shape using shape function.

	# # 10.Split the Iris data into Training and Testing data.

	# In[82]:


	from sklearn.model_selection import train_test_split
	x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20)


	# To train the model we split the data into training and testing data using train_test_split function of model_selection module in the ratio 80:20.

	# # 11.(a) Fit the svc model by using Polynomial kernel

	# In[42]:


	from sklearn.svm import SVC
	svclassifier = SVC(kernel='poly', degree=8)
	svclassifier.fit(x_train, y_train)

	y_pred = svclassifier.predict(x_test)


	# In[22]:


	from sklearn.metrics import classification_report, confusion_matrix
	print(confusion_matrix(y_test, y_pred))
	print(classification_report(y_test, y_pred))


	# We fit the svc function with poly kernel and predict with new data(x_test) and print the confusion matrix,classification report of the fitted model.

	# In[43]:


	from sklearn.model_selection import cross_val_score

	data1=cross_val_score(svclassifier,x_train,y_train,cv=5)
	data1


	# In[24]:


	data1.mean()


	# To verify the model reliability we use cross validation to obtain the average accuracy of the model(93.33%) when new data is fit to it.

	# # (b) Fit the svc model by using rbf kernel

	# In[44]:


	svclassifier2 = SVC(kernel='rbf')
	svclassifier2.fit(x_train, y_train)
	y_pred = svclassifier2.predict(x_test)
	print(confusion_matrix(y_test, y_pred))
	print(classification_report(y_test, y_pred))


	# We fit the svc function with rbf kernel and predict with new data(x_test) and print the confusion matrix,classification report of the fitted model.

	# In[45]:


	from sklearn.model_selection import cross_val_score

	data2=cross_val_score(svclassifier2,x_train,y_train,cv=5)
	data2


	# In[37]:


	data2.mean()


	# To verify the model reliability we use cross validation to obtain the average accuracy of the model(97.5%) when new data is fit to it.

	# # (c) Fit the svc model by using Sigmoid kernel

	# In[46]:


	svclassifier3 = SVC(kernel='sigmoid')
	svclassifier3.fit(x_train, y_train)
	y_pred = svclassifier3.predict(x_test)
	print(confusion_matrix(y_test, y_pred))
	print(classification_report(y_test, y_pred))


	# We fit the svc function with sigmoid and predict with new data(x_test) and print the confusion matrix,classification report of the fitted model.

	# In[87]:


	from sklearn.model_selection import cross_val_score

	data3=cross_val_score(svclassifier3,x_train,y_train,cv=5)
	data3


	# In[34]:


	data3.mean()


	# To verify the model reliability we use cross validation to obtain the average accuracy of the model(34%) when new data is fit to it.

	# # (d) Fit the svc model by using Linear kernel

	# In[83]:


	svclassifier4 = SVC(kernel='linear')
	svclassifier4.fit(x_train, y_train)
	y_pred = svclassifier4.predict(x_test)
	print(confusion_matrix(y_test, y_pred))
	print(classification_report(y_test, y_pred))


	# We fit the svc function with linear kernel and predict with new data(x_test) and print the confusion matrix,classification report of the fitted model.

	# In[84]:


	from sklearn.model_selection import cross_val_score

	data4=cross_val_score(svclassifier4,x_train,y_train,cv=5)
	data4


	# In[85]:


	data4.mean()


	# To verify the model reliability we use cross validation to obtain the average accuracy of the model(97.5%) when new data is fit to it.

	# # 12.Testing with new observation whose actual class is Iris-setosa.

	# In[38]:


	new_observation = [5.1,3.5,1.4,0.2]
	y_pred = svclassifier.predict([new_observation])
	print('predicted class value is ', y_pred)


	# In[39]:


	new_observation = [5.0,3.4,1.6,0.4]
	y_pred = svclassifier2.predict([new_observation])
	print('predicted class value is ', y_pred)


	# In[40]:


	new_observation = [5.0,3.4,1.6,0.4]
	y_pred = svclassifier3.predict([new_observation])
	print('predicted class value is ', y_pred)


	# In[53]:


	new_observation = [5.0,3.4,1.6,0.4]
	y_pred = svclassifier4.predict([new_observation])
	print('predicted class value is ', y_pred)


	# We find that all models have classified the new observation correctly to the class Iris-setosa except the model with sigmoid kernel which classified it as Iris-virginica as it had very low accuracy(34%) compared to other models.

	# In[ ]: