Last active
March 27, 2023 17:44
-
-
Save Vatsal596/d39c6048b7247b787ce560cb08eb152d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd ## To load the data and create DataFrame | |
import matplotlib.pyplot as plt ## For plotting of data | |
import seaborn as sns ## For plotting of data | |
from sklearn.model_selection import train_test_split | |
from sklearn.preprocessing import StandardScaler | |
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score | |
#1. CLASSIFICATION USING KNN | |
from sklearn.neighbors import KNeighborsClassifier ## For KNN Classification method | |
# Load Dataset | |
df = pd.read_csv('/content/sample_data/Iris.csv') | |
#Dividing Data Into Features and Labels | |
feature_columns = ['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm'] | |
# Assign values to the X and y variables: | |
X = df[feature_columns].values | |
y = df['Species'].values | |
# Assign values to the X and y variables: Alternative method | |
# X= df.iloc[:, [1,5]].values | |
# y= df.iloc[:, 5].values | |
# Split dataset into random train and test subsets: | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) | |
# Standardize features by removing mean and scaling to unit variance: | |
# scaler = StandardScaler() | |
# scaler.fit(X_train) | |
# X_train = scaler.transform(X_train) | |
# X_test = scaler.transform(X_test) | |
# Use the KNN classifier to fit data: | |
classifier = KNeighborsClassifier(n_neighbors=5) | |
classifier.fit(X_train, y_train) | |
# Predict y data with classifier: | |
y_predict = classifier.predict(X_test) | |
# Print results: | |
print("CONFUSION MATRIX : ") | |
print(confusion_matrix(y_test, y_predict)) | |
print(classification_report(y_test, y_predict)) | |
#------------------ PLOTTING USING HEATMAP-------------------------- | |
# cm = confusion_matrix(y_test, y_predict) | |
# # # Transform to df for easier plotting | |
# cm_df = pd.DataFrame(cm, | |
# index = ['setosa','versicolor','virginica'], | |
# columns = ['setosa','versicolor','virginica']) | |
# plt.figure(figsize=(5.5,4)) | |
# sns.heatmap(cm_df, annot=True) | |
# plt.title('KNN \nAccuracy:{0:.3f}'.format(accuracy_score(y_test, y_predict))) | |
# plt.ylabel('True label') | |
# plt.xlabel('Predicted label') | |
# plt.show() | |
#2. CLASSIFICATION USING SVM | |
#Import SVM model | |
from sklearn import svm | |
from sklearn.preprocessing import LabelEncoder | |
# Load Dataset | |
df = pd.read_csv('/content/sample_data/Iris.csv') | |
#Dividing Data Into Features and Labels | |
feature_columns = ['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm'] | |
# Assign values to the X and y variables: | |
X = df[feature_columns].values | |
y = df['Species'].values | |
# label_encoder_y= LabelEncoder() | |
# y= label_encoder_y.fit_transform(y) | |
# Split dataset into random train and test subsets: | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) | |
# Use the SVM classifier to fit data: | |
classifier1 = svm.SVC(kernel='sigmoid') # linear, sigmoid, rbf | |
classifier1.fit(X_train, y_train) | |
# # Predict y data with classifier: | |
y_predict = classifier1.predict(X_test) | |
# Print results: | |
print("CONFUSION MATRIX : ") | |
print(confusion_matrix(y_test, y_predict)) | |
print(classification_report(y_test, y_predict)) | |
#3. CLASSIFICATION USING DECISION TREE | |
#Import Decision Tree model | |
from sklearn.tree import DecisionTreeClassifier | |
# Load Dataset | |
df = pd.read_csv('/content/sample_data/Iris.csv') | |
#Dividing Data Into Features and Labels | |
feature_columns = ['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm'] | |
# Assign values to the X and y variables: | |
X = df[feature_columns].values | |
y = df['Species'].values | |
# Split dataset into random train and test subsets: | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=44) | |
# Use the Decision Tree classifier to fit data: | |
classifier2 = DecisionTreeClassifier(criterion="gini") | |
# train the model | |
classifier2.fit(X_train, y_train) | |
# Predict y data with classifier: | |
y_predict = classifier2.predict(X_test) | |
# Print results: | |
print("CONFUSION MATRIX : ") | |
print(confusion_matrix(y_test, y_predict)) | |
print(classification_report(y_test, y_predict)) | |
print("Accuracy:",accuracy_score(y_test, y_predict)) | |
#4. CLASSIFICATION USING GAUSSIAN NAIVE BAYES | |
#Import Gaussian Naive Bayes model | |
from sklearn.naive_bayes import GaussianNB | |
# Load Dataset | |
df = pd.read_csv('/content/sample_data/Iris.csv') | |
#Dividing Data Into Features and Labels | |
feature_columns = ['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm'] | |
# Assign values to the X and y variables: | |
X = df[feature_columns].values | |
y = df['Species'].values | |
# Split dataset into random train and test subsets: | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) | |
# Use the Decision Tree classifier to fit data: | |
classifier3 = GaussianNB() | |
# train the model | |
classifier3.fit(X_train, y_train) | |
# Predict y data with classifier: | |
y_predict = classifier3.predict(X_test) | |
# Print results: | |
print("CONFUSION MATRIX : ") | |
print(confusion_matrix(y_test, y_predict)) | |
print(classification_report(y_test, y_predict)) | |
#5. CLASSIFICATION USING RANDOM FOREST | |
#Import RANDOM FOREST CLASSIFIER | |
from sklearn.ensemble import RandomForestClassifier | |
# Load Dataset | |
df = pd.read_csv('/content/sample_data/Iris.csv') | |
#Dividing Data Into Features and Labels | |
feature_columns = ['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm'] | |
# Assign values to the X and y variables: | |
X = df[feature_columns].values | |
y = df['Species'].values | |
# Split dataset into random train and test subsets: | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) | |
# Use the Decision Tree classifier to fit data: | |
classifier4 = RandomForestClassifier() | |
# train the model | |
classifier4.fit(X_train, y_train) | |
# Predict y data with classifier: | |
y_predict = classifier4.predict(X_test) | |
# Print results: | |
print("CONFUSION MATRIX : ") | |
print(confusion_matrix(y_test, y_predict)) | |
print(classification_report(y_test, y_predict)) | |
#5. REGRESSION USING LOGISTIC REGRESSION | |
#Import LOGISTIC REGRESSION CLASSIFIER | |
from sklearn.linear_model import LogisticRegression | |
# Load Dataset | |
df = pd.read_csv('/content/sample_data/Iris.csv') | |
#Dividing Data Into Features and Labels | |
feature_columns = ['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm'] | |
# Assign values to the X and y variables: | |
X = df[feature_columns].values | |
y = df['Species'].values | |
# Split dataset into random train and test subsets: | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) | |
# Use the Decision Tree classifier to fit data: | |
classifier5 = LogisticRegression() | |
# train the model | |
classifier5.fit(X_train, y_train) | |
# Predict y data with classifier: | |
y_predict = classifier5.predict(X_test) | |
# Print results: | |
print("CONFUSION MATRIX : ") | |
print(confusion_matrix(y_test, y_predict)) | |
print(classification_report(y_test, y_predict)) | |
df.head(10) | |
len(df) | |
df.shape | |
df.query('SepalLengthCm > 4.5') | |
df.iloc[0:3] | |
df['SepalLengthCm'].max | |
df['SepalLengthCm'].min | |
df['SepalLengthCm'].count | |
df['Species'].replace(["Iris-setosa", "Iris-virginica"],["V","B"]) | |
df.rename(columns={'Species':"Hello-World"}) | |
df[df['Id'].isnull()] | |
df.drop('Id',axis=1) | |
df.drop(['Id','SepalLengthCm'],axis=1) | |
df.drop(df.columns[1],axis=1) | |
df.drop([0,1]) | |
df['SepalLengthCm'].unique() | |
df['SepalLengthCm'].value_counts() | |
df["SepalLengthCm"].nunique() | |
df.groupby('SepalLengthCm').mean() | |
df.head(10) | |
df.groupby(['SepalLengthCm','SepalWidthCm']).mean() | |
df.head(10) | |
#matplotlib | |
x=df['SepalLengthCm'] | |
y=df['SepalWidthCm'] | |
plt.plot(x,y) | |
plt.title("Graph") | |
plt.xlabel("Lenght") | |
plt.ylabel("Weight") | |
df.plot(kind="scatter", x="Id", y="SepalWidthCm") | |
plt.show() | |
df['SepalWidthCm'].hist() | |
plt.scatter(df["Id"],df['SepalWidthCm'],linewidth=1,marker="o",edgecolor="black",s=200) | |
plt.show() | |
#numpy | |
x=np.array([0,1,2,3]) | |
y=np.array([3,5,1,6]) | |
plt.subplot(1,3,1) | |
plt.plot(x,y) | |
plt.subplot(1,3,2) | |
plt.plot(x,y) | |
plt.subplot(1,3,3) | |
plt.plot(x,y) | |
#seaborm | |
sns.histplot(df['SepalWidthCm'].head(10),kde=True, bins=10) | |
x=df["SepalLengthCm"] | |
plt.hist(x,bins=10,color="Red") | |
plt.title("Histogram") | |
plt.xlabel("Number") | |
plt.ylabel("Length") | |
plt.show() | |
x=df[["SepalLengthCm","SepalWidthCm","PetalLengthCm","PetalWidthCm"]] | |
print(x.describe()) | |
x.boxplot() | |
#sklearn | |
feature_columns=["SepalLengthCm","SepalWidthCm","PetalLengthCm","PetalWidthCm"] | |
print(feature_columns) | |
x=df[feature_columns].values | |
y=df['Species'].values | |
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2) | |
scaler=StandardScaler() | |
scaler.fit(x_train) | |
x_train=scaler.transform(x_train) | |
x_test=scaler.transform(x_test) | |
classifier=KNeighborsClassifier(n_neighbors=5) | |
classifier.fit(x_train,y_train) | |
y_predict=classifier.predict(x_test) | |
print("Confusion Matrix:- ") | |
print(confusion_matrix(y_test,y_predict)) | |
print(classification_report(y_test,y_predict)) | |
cm=confusion_matrix(y_test,y_predict) | |
cm_df=pd.DataFrame(cm,index=['Sentosa','Versicolor','Virginica'], columns=['Sentosa','Versicolor','Virginica']) | |
plt.figure(figsize=(5.5,4)) | |
sns.heatmap(cm_df,annot=True) | |
plt.title("HeatMap") | |
plt.xlabel("True Label") | |
plt.ylabel("Predict Label") | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment