Skip to content

Instantly share code, notes, and snippets.

@Shirataki2
Created April 17, 2018 16:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Shirataki2/721b2f430b26c93447fcdaaf80610083 to your computer and use it in GitHub Desktop.
Save Shirataki2/721b2f430b26c93447fcdaaf80610083 to your computer and use it in GitHub Desktop.
import pandas as pd
import csv as csv
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, cross_val_score
# load data
train_df = pd.read_csv("train.csv", header=0)
# convert sex to be a dummy val "gender"{male = 0, female = 1}
train_df["Gender"] = train_df["Sex"].map({"male": 0, "female": 1}).astype(int)
# complement the missing values: "age"
median_age = train_df["Age"].dropna().median()
if len(train_df.Age[train_df.Age.isnull()]) > 0:
train_df.loc[(train_df.Age.isnull()), "Age"] = median_age
# remove unused columns
train_df = train_df.drop(["Name", "Ticket", "Sex", "Embarked",
"Fare", "Cabin",
"PassengerId"], axis=1)
# load data
test_df = pd.read_csv("test.csv", header=0)
# convert sex to be a dummy val "gender"{male = 0, female = 1}
test_df["Gender"] = test_df["Sex"].map({"male": 0, "female": 1}).astype(int)
# complement the missing values: "age"
median_age = test_df["Age"].dropna().median()
if len(test_df.Age[test_df.Age.isnull()]) > 0:
test_df.loc[(test_df.Age.isnull()), "Age"] = median_age
# remove unused columns
ids = test_df["PassengerId"].values
test_df = test_df.drop(["Name", "Ticket", "Sex", "Embarked",
"Fare", "Cabin",
"PassengerId"], axis=1)
train_data = train_df.values
X_train, y_train = train_data[0::, 1::], train_data[0::, 0]
test_data = test_df.values
params = [2**i for i in range(-10, 10)]
best_score = 0
best_params = {}
for gamma in params:
for C in params:
svm = SVC(gamma=gamma, C=C)
scores = cross_val_score(svm, X_train, y_train, cv=5)
score = np.mean(scores)
if score > best_score:
best_score = score
best_params = {'gamma': gamma, 'C': C}
print(best_score)
print(best_params)
svm = SVC(** best_params)
output = svm.fit(X_train, y_train).predict(test_data).astype(int)
submit_file = open("titanic_submit.csv", 'w')
file_object = csv.writer(submit_file)
file_object.writerow(["PassengerId", "survived"])
file_object.writerows(zip(ids, output))
submit_file.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment