Created
February 25, 2021 12:10
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# A.Importing Libraries --> | |
import pandas as pd | |
from sklearn.pipeline import make_pipeline | |
from sklearn.preprocessing import StandardScaler | |
from sklearn.metrics import classification_report | |
from sklearn.metrics import confusion_matrix | |
# Models Used --> | |
from sklearn.ensemble import RandomForestClassifier | |
from sklearn.tree import DecisionTreeClassifier | |
from sklearn.ensemble import GradientBoostingClassifier | |
from sklearn.svm import SVC | |
# B.Setting Up Dataframe --> | |
train_data = pd.read_csv('data/train.csv') | |
test_data = pd.read_csv('data/test.csv') | |
result = pd.read_csv('data/my_submission.csv') | |
women = train_data.loc[train_data.Sex == 'female']["Survived"] | |
rate_women = sum(women)/len(women) | |
men = train_data.loc[train_data.Sex =='male']["Survived"] | |
rate_men = sum(men)/len(men) | |
# C.Setting Up Training Testing Data --> | |
y_train = train_data["Survived"] | |
y_test = result['Survived'] | |
features = ["Pclass","Sex", "SibSp","Parch"] | |
X_train = pd.get_dummies(train_data[features]) | |
X_test = pd.get_dummies(test_data[features]) | |
# D.Training Models On Data --> | |
# 1.Decision Tree Classifier --> | |
dtc = make_pipeline(StandardScaler(), DecisionTreeClassifier(random_state=0)) | |
dtc.fit(X_train,y_train) | |
# 2.Random Forest Classifier --> | |
rf = make_pipeline(RandomForestClassifier(n_estimators=100, max_depth=5, random_state=0)) | |
rf.fit(X_train, y_train) | |
# 3.Gradient boosting regressor --> | |
gbc = make_pipeline(GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0)) | |
gbc.fit(X_train, y_train) | |
# 4.Support Vector classification --> | |
svc = make_pipeline(SVC(gamma="auto")) | |
svc.fit(X_train,y_train) | |
# E.Analysing Results --> | |
# 1.Decision Tree Classifier --> | |
print("-----------------------------------") | |
print('Decision Tree Classifier') | |
print('Accuracy Score: {:,.2f}'.format(dtc.score(X_test, y_test))) | |
print() | |
print(classification_report(y_test, dtc.predict(X_test))) | |
print() | |
print('confusion_matrix: \n {}'.format(confusion_matrix(y_test, dtc.predict(X_test)))) | |
print() | |
print("-----------------------------------") | |
# 2.Random Forest Classifier --> | |
print("-----------------------------------") | |
print('Random Forest Classification') | |
print('Accuracy Score: {:,.2f}'.format(rf.score(X_test,y_test))) | |
print() | |
print(classification_report(y_test, rf.predict(X_test))) | |
print() | |
print('confusion_matrix: \n {}'.format(confusion_matrix(y_test,rf.predict(X_test)))) | |
print() | |
print("-----------------------------------") | |
# 3.Gradient Boosted regressor --> | |
print("-----------------------------------") | |
print('Gradient Booster') | |
print('Accuracy Score: {:,.2f}'.format(gbc.score(X_test, y_test))) | |
print() | |
print(classification_report(y_test, gbc.predict(X_test))) | |
print() | |
print('confusion_matrix: \n {}'.format(confusion_matrix(y_test, gbc.predict(X_test)))) | |
print() | |
print("-----------------------------------") | |
# 4.Support Vector classification --> | |
print("-----------------------------------") | |
print('Support Vector Machine') | |
print('Accuracy score: {:,.2f}'.format(svc.score(X_test,y_test))) | |
print() | |
print(classification_report(y_test, svc.predict(X_test))) | |
print() | |
print('confusion_matrix: \n {}'.format(confusion_matrix(y_test,svc.predict(X_test)))) | |
print() | |
print("-----------------------------------") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment