Created
April 17, 2018 16:29
-
-
Save Shirataki2/721b2f430b26c93447fcdaaf80610083 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import csv as csv | |
import numpy as np | |
from sklearn.ensemble import RandomForestClassifier | |
from sklearn.svm import SVC | |
from sklearn.model_selection import train_test_split, cross_val_score | |
# load data | |
train_df = pd.read_csv("train.csv", header=0) | |
# convert sex to be a dummy val "gender"{male = 0, female = 1} | |
train_df["Gender"] = train_df["Sex"].map({"male": 0, "female": 1}).astype(int) | |
# complement the missing values: "age" | |
median_age = train_df["Age"].dropna().median() | |
if len(train_df.Age[train_df.Age.isnull()]) > 0: | |
train_df.loc[(train_df.Age.isnull()), "Age"] = median_age | |
# remove unused columns | |
train_df = train_df.drop(["Name", "Ticket", "Sex", "Embarked", | |
"Fare", "Cabin", | |
"PassengerId"], axis=1) | |
# load data | |
test_df = pd.read_csv("test.csv", header=0) | |
# convert sex to be a dummy val "gender"{male = 0, female = 1} | |
test_df["Gender"] = test_df["Sex"].map({"male": 0, "female": 1}).astype(int) | |
# complement the missing values: "age" | |
median_age = test_df["Age"].dropna().median() | |
if len(test_df.Age[test_df.Age.isnull()]) > 0: | |
test_df.loc[(test_df.Age.isnull()), "Age"] = median_age | |
# remove unused columns | |
ids = test_df["PassengerId"].values | |
test_df = test_df.drop(["Name", "Ticket", "Sex", "Embarked", | |
"Fare", "Cabin", | |
"PassengerId"], axis=1) | |
train_data = train_df.values | |
X_train, y_train = train_data[0::, 1::], train_data[0::, 0] | |
test_data = test_df.values | |
params = [2**i for i in range(-10, 10)] | |
best_score = 0 | |
best_params = {} | |
for gamma in params: | |
for C in params: | |
svm = SVC(gamma=gamma, C=C) | |
scores = cross_val_score(svm, X_train, y_train, cv=5) | |
score = np.mean(scores) | |
if score > best_score: | |
best_score = score | |
best_params = {'gamma': gamma, 'C': C} | |
print(best_score) | |
print(best_params) | |
svm = SVC(** best_params) | |
output = svm.fit(X_train, y_train).predict(test_data).astype(int) | |
submit_file = open("titanic_submit.csv", 'w') | |
file_object = csv.writer(submit_file) | |
file_object.writerow(["PassengerId", "survived"]) | |
file_object.writerows(zip(ids, output)) | |
submit_file.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment