Created
October 15, 2017 10:39
-
-
Save monisoi/e1d5483b2e58f96fee872ee3ea19226e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style>\n", | |
" .dataframe thead tr:only-child th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: left;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>PassengerId</th>\n", | |
" <th>Survived</th>\n", | |
" <th>Pclass</th>\n", | |
" <th>Name</th>\n", | |
" <th>Sex</th>\n", | |
" <th>Age</th>\n", | |
" <th>SibSp</th>\n", | |
" <th>Parch</th>\n", | |
" <th>Ticket</th>\n", | |
" <th>Fare</th>\n", | |
" <th>Cabin</th>\n", | |
" <th>Embarked</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>3</td>\n", | |
" <td>Braund, Mr. Owen Harris</td>\n", | |
" <td>male</td>\n", | |
" <td>22.0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>A/5 21171</td>\n", | |
" <td>7.2500</td>\n", | |
" <td>NaN</td>\n", | |
" <td>S</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n", | |
" <td>female</td>\n", | |
" <td>38.0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>PC 17599</td>\n", | |
" <td>71.2833</td>\n", | |
" <td>C85</td>\n", | |
" <td>C</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" <td>3</td>\n", | |
" <td>Heikkinen, Miss. Laina</td>\n", | |
" <td>female</td>\n", | |
" <td>26.0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>STON/O2. 3101282</td>\n", | |
" <td>7.9250</td>\n", | |
" <td>NaN</td>\n", | |
" <td>S</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>4</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>Futrelle, Mrs. Jacques Heath (Lily May Peel)</td>\n", | |
" <td>female</td>\n", | |
" <td>35.0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>113803</td>\n", | |
" <td>53.1000</td>\n", | |
" <td>C123</td>\n", | |
" <td>S</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>5</td>\n", | |
" <td>0</td>\n", | |
" <td>3</td>\n", | |
" <td>Allen, Mr. William Henry</td>\n", | |
" <td>male</td>\n", | |
" <td>35.0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>373450</td>\n", | |
" <td>8.0500</td>\n", | |
" <td>NaN</td>\n", | |
" <td>S</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>6</td>\n", | |
" <td>0</td>\n", | |
" <td>3</td>\n", | |
" <td>Moran, Mr. James</td>\n", | |
" <td>male</td>\n", | |
" <td>NaN</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>330877</td>\n", | |
" <td>8.4583</td>\n", | |
" <td>NaN</td>\n", | |
" <td>Q</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>7</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>McCarthy, Mr. Timothy J</td>\n", | |
" <td>male</td>\n", | |
" <td>54.0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>17463</td>\n", | |
" <td>51.8625</td>\n", | |
" <td>E46</td>\n", | |
" <td>S</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>8</td>\n", | |
" <td>0</td>\n", | |
" <td>3</td>\n", | |
" <td>Palsson, Master. Gosta Leonard</td>\n", | |
" <td>male</td>\n", | |
" <td>2.0</td>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" <td>349909</td>\n", | |
" <td>21.0750</td>\n", | |
" <td>NaN</td>\n", | |
" <td>S</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>9</td>\n", | |
" <td>1</td>\n", | |
" <td>3</td>\n", | |
" <td>Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)</td>\n", | |
" <td>female</td>\n", | |
" <td>27.0</td>\n", | |
" <td>0</td>\n", | |
" <td>2</td>\n", | |
" <td>347742</td>\n", | |
" <td>11.1333</td>\n", | |
" <td>NaN</td>\n", | |
" <td>S</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>10</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>Nasser, Mrs. Nicholas (Adele Achem)</td>\n", | |
" <td>female</td>\n", | |
" <td>14.0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>237736</td>\n", | |
" <td>30.0708</td>\n", | |
" <td>NaN</td>\n", | |
" <td>C</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" PassengerId Survived Pclass \\\n", | |
"0 1 0 3 \n", | |
"1 2 1 1 \n", | |
"2 3 1 3 \n", | |
"3 4 1 1 \n", | |
"4 5 0 3 \n", | |
"5 6 0 3 \n", | |
"6 7 0 1 \n", | |
"7 8 0 3 \n", | |
"8 9 1 3 \n", | |
"9 10 1 2 \n", | |
"\n", | |
" Name Sex Age SibSp \\\n", | |
"0 Braund, Mr. Owen Harris male 22.0 1 \n", | |
"1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", | |
"2 Heikkinen, Miss. Laina female 26.0 0 \n", | |
"3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", | |
"4 Allen, Mr. William Henry male 35.0 0 \n", | |
"5 Moran, Mr. James male NaN 0 \n", | |
"6 McCarthy, Mr. Timothy J male 54.0 0 \n", | |
"7 Palsson, Master. Gosta Leonard male 2.0 3 \n", | |
"8 Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) female 27.0 0 \n", | |
"9 Nasser, Mrs. Nicholas (Adele Achem) female 14.0 1 \n", | |
"\n", | |
" Parch Ticket Fare Cabin Embarked \n", | |
"0 0 A/5 21171 7.2500 NaN S \n", | |
"1 0 PC 17599 71.2833 C85 C \n", | |
"2 0 STON/O2. 3101282 7.9250 NaN S \n", | |
"3 0 113803 53.1000 C123 S \n", | |
"4 0 373450 8.0500 NaN S \n", | |
"5 0 330877 8.4583 NaN Q \n", | |
"6 0 17463 51.8625 E46 S \n", | |
"7 1 349909 21.0750 NaN S \n", | |
"8 2 347742 11.1333 NaN S \n", | |
"9 0 237736 30.0708 NaN C " | |
] | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# csvを読み込んで先頭10行を見る\n", | |
"train_df = pd.read_csv(\"train.csv\")\n", | |
"train_df.head(10)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# Sexをワンホットエンコーディング\n", | |
"train_df_proc = train_df\n", | |
"train_df_proc['Sex'] = train_df['Sex'].map( {'female': 0, 'male': 1} ).astype(int) " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
" C Q S\n", | |
"0 0 0 1\n", | |
"1 1 0 0\n", | |
"2 0 0 1\n", | |
"3 0 0 1\n", | |
"4 0 0 1\n" | |
] | |
} | |
], | |
"source": [ | |
"# Embarkedをワンホットエンコーディング\n", | |
"embarked_dummy = pd.get_dummies(train_df['Embarked'])\n", | |
"print(embarked_dummy.head(5))\n", | |
"train_df_proc = pd.concat((train_df_proc, embarked_dummy), axis=1)\n", | |
"train_df_proc = train_df_proc.drop('Embarked', axis=1)\n", | |
"train_df_proc = train_df_proc.drop('S', axis=1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style>\n", | |
" .dataframe thead tr:only-child th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: left;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>PassengerId</th>\n", | |
" <th>Survived</th>\n", | |
" <th>Pclass</th>\n", | |
" <th>Sex</th>\n", | |
" <th>Age</th>\n", | |
" <th>SibSp</th>\n", | |
" <th>Parch</th>\n", | |
" <th>Fare</th>\n", | |
" <th>C</th>\n", | |
" <th>Q</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>PassengerId</th>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.145894</td>\n", | |
" <td>-0.087759</td>\n", | |
" <td>-0.024313</td>\n", | |
" <td>0.042758</td>\n", | |
" <td>-0.081876</td>\n", | |
" <td>-0.050419</td>\n", | |
" <td>0.029345</td>\n", | |
" <td>-0.033257</td>\n", | |
" <td>-0.052956</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Survived</th>\n", | |
" <td>0.145894</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>-0.037227</td>\n", | |
" <td>-0.535727</td>\n", | |
" <td>-0.244604</td>\n", | |
" <td>0.100339</td>\n", | |
" <td>0.018723</td>\n", | |
" <td>0.134019</td>\n", | |
" <td>0.098712</td>\n", | |
" <td>-0.039232</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Pclass</th>\n", | |
" <td>-0.087759</td>\n", | |
" <td>-0.037227</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>-0.041725</td>\n", | |
" <td>-0.307590</td>\n", | |
" <td>-0.100324</td>\n", | |
" <td>0.049894</td>\n", | |
" <td>-0.315069</td>\n", | |
" <td>-0.228001</td>\n", | |
" <td>-0.038676</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Sex</th>\n", | |
" <td>-0.024313</td>\n", | |
" <td>-0.535727</td>\n", | |
" <td>-0.041725</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.172307</td>\n", | |
" <td>-0.095344</td>\n", | |
" <td>-0.081832</td>\n", | |
" <td>-0.129871</td>\n", | |
" <td>-0.053879</td>\n", | |
" <td>-0.002826</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Age</th>\n", | |
" <td>0.042758</td>\n", | |
" <td>-0.244604</td>\n", | |
" <td>-0.307590</td>\n", | |
" <td>0.172307</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>-0.161625</td>\n", | |
" <td>-0.274813</td>\n", | |
" <td>-0.091542</td>\n", | |
" <td>0.076824</td>\n", | |
" <td>0.017855</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>SibSp</th>\n", | |
" <td>-0.081876</td>\n", | |
" <td>0.100339</td>\n", | |
" <td>-0.100324</td>\n", | |
" <td>-0.095344</td>\n", | |
" <td>-0.161625</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.258993</td>\n", | |
" <td>0.285492</td>\n", | |
" <td>-0.050628</td>\n", | |
" <td>0.169778</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Parch</th>\n", | |
" <td>-0.050419</td>\n", | |
" <td>0.018723</td>\n", | |
" <td>0.049894</td>\n", | |
" <td>-0.081832</td>\n", | |
" <td>-0.274813</td>\n", | |
" <td>0.258993</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.388783</td>\n", | |
" <td>-0.068949</td>\n", | |
" <td>-0.065543</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Fare</th>\n", | |
" <td>0.029345</td>\n", | |
" <td>0.134019</td>\n", | |
" <td>-0.315069</td>\n", | |
" <td>-0.129871</td>\n", | |
" <td>-0.091542</td>\n", | |
" <td>0.285492</td>\n", | |
" <td>0.388783</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.239531</td>\n", | |
" <td>0.015604</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>C</th>\n", | |
" <td>-0.033257</td>\n", | |
" <td>0.098712</td>\n", | |
" <td>-0.228001</td>\n", | |
" <td>-0.053879</td>\n", | |
" <td>0.076824</td>\n", | |
" <td>-0.050628</td>\n", | |
" <td>-0.068949</td>\n", | |
" <td>0.239531</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>-0.076941</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Q</th>\n", | |
" <td>-0.052956</td>\n", | |
" <td>-0.039232</td>\n", | |
" <td>-0.038676</td>\n", | |
" <td>-0.002826</td>\n", | |
" <td>0.017855</td>\n", | |
" <td>0.169778</td>\n", | |
" <td>-0.065543</td>\n", | |
" <td>0.015604</td>\n", | |
" <td>-0.076941</td>\n", | |
" <td>1.000000</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" PassengerId Survived Pclass Sex Age SibSp \\\n", | |
"PassengerId 1.000000 0.145894 -0.087759 -0.024313 0.042758 -0.081876 \n", | |
"Survived 0.145894 1.000000 -0.037227 -0.535727 -0.244604 0.100339 \n", | |
"Pclass -0.087759 -0.037227 1.000000 -0.041725 -0.307590 -0.100324 \n", | |
"Sex -0.024313 -0.535727 -0.041725 1.000000 0.172307 -0.095344 \n", | |
"Age 0.042758 -0.244604 -0.307590 0.172307 1.000000 -0.161625 \n", | |
"SibSp -0.081876 0.100339 -0.100324 -0.095344 -0.161625 1.000000 \n", | |
"Parch -0.050419 0.018723 0.049894 -0.081832 -0.274813 0.258993 \n", | |
"Fare 0.029345 0.134019 -0.315069 -0.129871 -0.091542 0.285492 \n", | |
"C -0.033257 0.098712 -0.228001 -0.053879 0.076824 -0.050628 \n", | |
"Q -0.052956 -0.039232 -0.038676 -0.002826 0.017855 0.169778 \n", | |
"\n", | |
" Parch Fare C Q \n", | |
"PassengerId -0.050419 0.029345 -0.033257 -0.052956 \n", | |
"Survived 0.018723 0.134019 0.098712 -0.039232 \n", | |
"Pclass 0.049894 -0.315069 -0.228001 -0.038676 \n", | |
"Sex -0.081832 -0.129871 -0.053879 -0.002826 \n", | |
"Age -0.274813 -0.091542 0.076824 0.017855 \n", | |
"SibSp 0.258993 0.285492 -0.050628 0.169778 \n", | |
"Parch 1.000000 0.388783 -0.068949 -0.065543 \n", | |
"Fare 0.388783 1.000000 0.239531 0.015604 \n", | |
"C -0.068949 0.239531 1.000000 -0.076941 \n", | |
"Q -0.065543 0.015604 -0.076941 1.000000 " | |
] | |
}, | |
"execution_count": 10, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# NaNを削除してから相関係数を出力\n", | |
"train_df_proc_dn = train_df_proc.dropna(axis=0, how='any')\n", | |
"train_df_proc_dn.corr()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.1" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment