Skip to content

Instantly share code, notes, and snippets.

@doloopwhile
Created October 21, 2017 08:27
Show Gist options
  • Save doloopwhile/3fea26abfd98f0fa689d34f5e02f0de3 to your computer and use it in GitHub Desktop.
Save doloopwhile/3fea26abfd98f0fa689d34f5e02f0de3 to your computer and use it in GitHub Desktop.
titanic
import sys
import csv
import pandas as pd
from pprint import pprint
from sklearn import linear_model
from sklearn import svm
from sklearn.preprocessing import StandardScaler
def read_csv(filename):
df = pd.read_csv(filename)
df = df.drop(['Name', 'Cabin', 'Embarked', 'Ticket'], axis=1)
# 男:1 女: -1
def sex_to_int(x):
if x == 'male':
return 1
elif x == 'female':
return -1
else:
return 0
df['Sex'] = df['Sex'].map(sex_to_int)
return df
def regressions():
kw = dict()
return [
linear_model.LinearRegression(normalize=True),
linear_model.LogisticRegression(),
svm.SVC(kernel="poly", degree=2, **kw),
svm.SVC(kernel="poly", degree=3, **kw),
svm.SVC(kernel="poly", degree=4, **kw),
svm.SVC(kernel="rbf", **kw),
svm.SVC(kernel="sigmoid", **kw),
]
def main():
scaler = StandardScaler()
train_df = read_csv('train.csv').drop('PassengerId', axis=1)
age_mean = train_df['Age'].mean()
fare_mean = train_df['Fare']
train_df['Age'] = train_df['Age'].fillna(age_mean)
train_df['Fare'] = train_df['Fare'].fillna(fare_mean)
train_df['Survived'] = train_df['Survived'].map(lambda x: 2 * x - 1)
train_survived = train_df['Survived']
train_df = train_df.drop('Survived', axis=1)
train_df1 = train_df[0:800]
train_survived1 = train_survived[0:800]
train_df2 = train_df[800:]
train_survived2 = train_survived[800:]
scaler.fit(train_df1)
fitted_regs = []
for reg in regressions():
print(reg)
reg.fit(scaler.transform(train_df1), train_survived1)
estimated = reg.predict(scaler.transform(train_df2))
correct = 0
for _, item in pd.DataFrame({ 'actual': train_survived2, 'estimated': estimated }).iterrows():
if (item['actual'] == 1) == (item['estimated'] >= 0.5):
correct += 1
print(correct / len(train_survived2.index))
print()
fitted_regs.append(reg)
if True:
reg = fitted_regs[5]
print(reg)
test_df = read_csv('test.csv')
# Fare が nan の場合があるので、訓練データの平均値で穴埋め
test_df['Age'] = test_df['Age'].fillna(age_mean)
test_df['Fare'] = test_df['Fare'].fillna(fare_mean)
ids = test_df['PassengerId']
test_df = test_df.drop('PassengerId', axis=1)
survived = reg.predict(scaler.transform(test_df))
# print(survived[0])
survived = [(1 if x >= 0 else 0 ) for x in survived]
answer = pd.DataFrame({ 'PassengerId': ids, 'Survived': survived })
answer.to_csv('answer.csv', index=False)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment