Skip to content

Instantly share code, notes, and snippets.

@Vatsal596
Last active March 27, 2023 17:44
Show Gist options
  • Save Vatsal596/d39c6048b7247b787ce560cb08eb152d to your computer and use it in GitHub Desktop.
Save Vatsal596/d39c6048b7247b787ce560cb08eb152d to your computer and use it in GitHub Desktop.
import pandas as pd ## To load the data and create DataFrame
import matplotlib.pyplot as plt ## For plotting of data
import seaborn as sns ## For plotting of data
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
#1. CLASSIFICATION USING KNN
from sklearn.neighbors import KNeighborsClassifier ## For KNN Classification method
# Load Dataset
df = pd.read_csv('/content/sample_data/Iris.csv')
#Dividing Data Into Features and Labels
feature_columns = ['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']
# Assign values to the X and y variables:
X = df[feature_columns].values
y = df['Species'].values
# Assign values to the X and y variables: Alternative method
# X= df.iloc[:, [1,5]].values
# y= df.iloc[:, 5].values
# Split dataset into random train and test subsets:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
# Standardize features by removing mean and scaling to unit variance:
# scaler = StandardScaler()
# scaler.fit(X_train)
# X_train = scaler.transform(X_train)
# X_test = scaler.transform(X_test)
# Use the KNN classifier to fit data:
classifier = KNeighborsClassifier(n_neighbors=5)
classifier.fit(X_train, y_train)
# Predict y data with classifier:
y_predict = classifier.predict(X_test)
# Print results:
print("CONFUSION MATRIX : ")
print(confusion_matrix(y_test, y_predict))
print(classification_report(y_test, y_predict))
#------------------ PLOTTING USING HEATMAP--------------------------
# cm = confusion_matrix(y_test, y_predict)
# # # Transform to df for easier plotting
# cm_df = pd.DataFrame(cm,
# index = ['setosa','versicolor','virginica'],
# columns = ['setosa','versicolor','virginica'])
# plt.figure(figsize=(5.5,4))
# sns.heatmap(cm_df, annot=True)
# plt.title('KNN \nAccuracy:{0:.3f}'.format(accuracy_score(y_test, y_predict)))
# plt.ylabel('True label')
# plt.xlabel('Predicted label')
# plt.show()
#2. CLASSIFICATION USING SVM
#Import SVM model
from sklearn import svm
from sklearn.preprocessing import LabelEncoder
# Load Dataset
df = pd.read_csv('/content/sample_data/Iris.csv')
#Dividing Data Into Features and Labels
feature_columns = ['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']
# Assign values to the X and y variables:
X = df[feature_columns].values
y = df['Species'].values
# label_encoder_y= LabelEncoder()
# y= label_encoder_y.fit_transform(y)
# Split dataset into random train and test subsets:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
# Use the SVM classifier to fit data:
classifier1 = svm.SVC(kernel='sigmoid') # linear, sigmoid, rbf
classifier1.fit(X_train, y_train)
# # Predict y data with classifier:
y_predict = classifier1.predict(X_test)
# Print results:
print("CONFUSION MATRIX : ")
print(confusion_matrix(y_test, y_predict))
print(classification_report(y_test, y_predict))
#3. CLASSIFICATION USING DECISION TREE
#Import Decision Tree model
from sklearn.tree import DecisionTreeClassifier
# Load Dataset
df = pd.read_csv('/content/sample_data/Iris.csv')
#Dividing Data Into Features and Labels
feature_columns = ['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']
# Assign values to the X and y variables:
X = df[feature_columns].values
y = df['Species'].values
# Split dataset into random train and test subsets:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=44)
# Use the Decision Tree classifier to fit data:
classifier2 = DecisionTreeClassifier(criterion="gini")
# train the model
classifier2.fit(X_train, y_train)
# Predict y data with classifier:
y_predict = classifier2.predict(X_test)
# Print results:
print("CONFUSION MATRIX : ")
print(confusion_matrix(y_test, y_predict))
print(classification_report(y_test, y_predict))
print("Accuracy:",accuracy_score(y_test, y_predict))
#4. CLASSIFICATION USING GAUSSIAN NAIVE BAYES
#Import Gaussian Naive Bayes model
from sklearn.naive_bayes import GaussianNB
# Load Dataset
df = pd.read_csv('/content/sample_data/Iris.csv')
#Dividing Data Into Features and Labels
feature_columns = ['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']
# Assign values to the X and y variables:
X = df[feature_columns].values
y = df['Species'].values
# Split dataset into random train and test subsets:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
# Use the Decision Tree classifier to fit data:
classifier3 = GaussianNB()
# train the model
classifier3.fit(X_train, y_train)
# Predict y data with classifier:
y_predict = classifier3.predict(X_test)
# Print results:
print("CONFUSION MATRIX : ")
print(confusion_matrix(y_test, y_predict))
print(classification_report(y_test, y_predict))
#5. CLASSIFICATION USING RANDOM FOREST
#Import RANDOM FOREST CLASSIFIER
from sklearn.ensemble import RandomForestClassifier
# Load Dataset
df = pd.read_csv('/content/sample_data/Iris.csv')
#Dividing Data Into Features and Labels
feature_columns = ['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']
# Assign values to the X and y variables:
X = df[feature_columns].values
y = df['Species'].values
# Split dataset into random train and test subsets:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)
# Use the Decision Tree classifier to fit data:
classifier4 = RandomForestClassifier()
# train the model
classifier4.fit(X_train, y_train)
# Predict y data with classifier:
y_predict = classifier4.predict(X_test)
# Print results:
print("CONFUSION MATRIX : ")
print(confusion_matrix(y_test, y_predict))
print(classification_report(y_test, y_predict))
#5. REGRESSION USING LOGISTIC REGRESSION
#Import LOGISTIC REGRESSION CLASSIFIER
from sklearn.linear_model import LogisticRegression
# Load Dataset
df = pd.read_csv('/content/sample_data/Iris.csv')
#Dividing Data Into Features and Labels
feature_columns = ['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm']
# Assign values to the X and y variables:
X = df[feature_columns].values
y = df['Species'].values
# Split dataset into random train and test subsets:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
# Use the Decision Tree classifier to fit data:
classifier5 = LogisticRegression()
# train the model
classifier5.fit(X_train, y_train)
# Predict y data with classifier:
y_predict = classifier5.predict(X_test)
# Print results:
print("CONFUSION MATRIX : ")
print(confusion_matrix(y_test, y_predict))
print(classification_report(y_test, y_predict))
df.head(10)
len(df)
df.shape
df.query('SepalLengthCm > 4.5')
df.iloc[0:3]
df['SepalLengthCm'].max
df['SepalLengthCm'].min
df['SepalLengthCm'].count
df['Species'].replace(["Iris-setosa", "Iris-virginica"],["V","B"])
df.rename(columns={'Species':"Hello-World"})
df[df['Id'].isnull()]
df.drop('Id',axis=1)
df.drop(['Id','SepalLengthCm'],axis=1)
df.drop(df.columns[1],axis=1)
df.drop([0,1])
df['SepalLengthCm'].unique()
df['SepalLengthCm'].value_counts()
df["SepalLengthCm"].nunique()
df.groupby('SepalLengthCm').mean()
df.head(10)
df.groupby(['SepalLengthCm','SepalWidthCm']).mean()
df.head(10)
#matplotlib
x=df['SepalLengthCm']
y=df['SepalWidthCm']
plt.plot(x,y)
plt.title("Graph")
plt.xlabel("Lenght")
plt.ylabel("Weight")
df.plot(kind="scatter", x="Id", y="SepalWidthCm")
plt.show()
df['SepalWidthCm'].hist()
plt.scatter(df["Id"],df['SepalWidthCm'],linewidth=1,marker="o",edgecolor="black",s=200)
plt.show()
#numpy
x=np.array([0,1,2,3])
y=np.array([3,5,1,6])
plt.subplot(1,3,1)
plt.plot(x,y)
plt.subplot(1,3,2)
plt.plot(x,y)
plt.subplot(1,3,3)
plt.plot(x,y)
#seaborm
sns.histplot(df['SepalWidthCm'].head(10),kde=True, bins=10)
x=df["SepalLengthCm"]
plt.hist(x,bins=10,color="Red")
plt.title("Histogram")
plt.xlabel("Number")
plt.ylabel("Length")
plt.show()
x=df[["SepalLengthCm","SepalWidthCm","PetalLengthCm","PetalWidthCm"]]
print(x.describe())
x.boxplot()
#sklearn
feature_columns=["SepalLengthCm","SepalWidthCm","PetalLengthCm","PetalWidthCm"]
print(feature_columns)
x=df[feature_columns].values
y=df['Species'].values
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2)
scaler=StandardScaler()
scaler.fit(x_train)
x_train=scaler.transform(x_train)
x_test=scaler.transform(x_test)
classifier=KNeighborsClassifier(n_neighbors=5)
classifier.fit(x_train,y_train)
y_predict=classifier.predict(x_test)
print("Confusion Matrix:- ")
print(confusion_matrix(y_test,y_predict))
print(classification_report(y_test,y_predict))
cm=confusion_matrix(y_test,y_predict)
cm_df=pd.DataFrame(cm,index=['Sentosa','Versicolor','Virginica'], columns=['Sentosa','Versicolor','Virginica'])
plt.figure(figsize=(5.5,4))
sns.heatmap(cm_df,annot=True)
plt.title("HeatMap")
plt.xlabel("True Label")
plt.ylabel("Predict Label")
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment