Skip to content

Instantly share code, notes, and snippets.

View PankajMehar's full-sized avatar
🏠
Working from home

PankajMehar PankajMehar

🏠
Working from home
View GitHub Profile
import pandas as pd
import numpy as np
import random as rnd
from sklearn.tree import DecisionTreeClassifier
import matplotlib.pyplot as plt
import random as rnd
## Code courtsey: Manav Sehgal (https://www.kaggle.com/startupsci)
## Link to Original Code: https://www.kaggle.com/startupsci/titanic-data-science-solutions
train_df = pd.read_csv(train)
test_df = pd.read_csv(test)
train_df = train_df.drop(['Ticket', 'Cabin'], axis=1)
test_df = test_df.drop(['Ticket', 'Cabin'], axis=1)
import pandas as pd
import numpy as np
import random as rnd
from sklearn.tree import DecisionTreeClassifier
import matplotlib.pyplot as plt
import random as rnd
train = pd.get_dummies(train)
test = pd.get_dummies(test)
# x contains of features
X = train.iloc[:, 1:]
# y is the target
y = train.iloc[:, 0]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state= 50)
def gridfunc(classifier, parameter, X, y):
clf = classifier
parameters = parameter
acc_scorer = make_scorer(accuracy_score)
# Run the grid search
grid_obj = GridSearchCV(clf, parameters, scoring=acc_scorer)
grid_obj = grid_obj.fit(X_train, y_train)
return grid_obj
y_predict = clf.predict(X_test)
y_predict
from sklearn import metrics
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_predict)
0.84328358208955223
Variable Definition Key
survival Survival 0 = No, 1 = Yes
pclass Ticket class 1 = 1st, 2 = 2nd, 3 = 3rd
sex Sex
Age Age in years
sibsp # of siblings / spouses aboard the Titanic
parch # of parents / children aboard the Titanic
ticket Ticket number
fare Passenger fare
cabin Cabin number
train_df = train_df.drop(['Ticket', 'Cabin'], axis=1)
test_df = test_df.drop(['Ticket', 'Cabin'], axis=1)