Skip to content

Instantly share code, notes, and snippets.

@monmonmon
Created Sep 12, 2018
Embed
What would you like to do?
Kaggle Titanic by SVM
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
ds = pd.read_csv('train.csv')
# Ageの欠損値を Mr, Mrs, etc ごとの中央値で埋める
noage = ds[ ds['Age'].isna() ]
honorifics = ['Mr', 'Mrs', 'Ms', 'Miss', 'Master', 'Rev', 'Dr']
for h in honorifics:
regexp = fr".*\b{h}\..*"
tn1 = ds[ ds['Name'].str.match(regexp) ]
median = tn1['Age'].median()
#print(f"{h}: {len(tn1)}, {median}")
hoe = noage[ noage['Name'].str.match(regexp) ]
ds['Age'] = ds['Age'].fillna( hoe['Age'].fillna(median) )
# Sex カラムを One Hot Encode する(male カラムは除外)
ds['female'] = pd.get_dummies(ds['Sex'])['female'].values
# Sex, Age カラムのみ学習に使う
X = ds.loc[:, ['female', 'Age']]
# Survived カラム
y = ds.loc[:, ['Survived']]
# 標準化
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
X = sc_X.fit_transform(X)
# 訓練データとテストデータを分割
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
# Support Vector Machine
from sklearn.svm import SVC
svc = SVC()
svc.fit(X_train, y_train)
y_pred = svc.predict(X_test)
svc.score(X_test, y_test)
# 0.7988826815642458
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))
# precision recall f1-score support
#
# 0 0.84 0.84 0.84 110
# 1 0.74 0.74 0.74 69
#
# avg / total 0.80 0.80 0.80 179
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment