Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save yehjames/76d87e719ce09cd819eec9945c091805 to your computer and use it in GitHub Desktop.
Save yehjames/76d87e719ce09cd819eec9945c091805 to your computer and use it in GitHub Desktop.
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd\n",
"pd.options.mode.chained_assignment = None"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"titanic = pd.read_csv('Titanic/train.csv')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>PassengerId</th>\n",
" <th>Survived</th>\n",
" <th>Pclass</th>\n",
" <th>Name</th>\n",
" <th>Sex</th>\n",
" <th>Age</th>\n",
" <th>SibSp</th>\n",
" <th>Parch</th>\n",
" <th>Ticket</th>\n",
" <th>Fare</th>\n",
" <th>Cabin</th>\n",
" <th>Embarked</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>Braund, Mr. Owen Harris</td>\n",
" <td>male</td>\n",
" <td>22.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>A/5 21171</td>\n",
" <td>7.2500</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n",
" <td>female</td>\n",
" <td>38.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>PC 17599</td>\n",
" <td>71.2833</td>\n",
" <td>C85</td>\n",
" <td>C</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>Heikkinen, Miss. Laina</td>\n",
" <td>female</td>\n",
" <td>26.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>STON/O2. 3101282</td>\n",
" <td>7.9250</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Futrelle, Mrs. Jacques Heath (Lily May Peel)</td>\n",
" <td>female</td>\n",
" <td>35.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>113803</td>\n",
" <td>53.1000</td>\n",
" <td>C123</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>Allen, Mr. William Henry</td>\n",
" <td>male</td>\n",
" <td>35.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>373450</td>\n",
" <td>8.0500</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>6</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>Moran, Mr. James</td>\n",
" <td>male</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>330877</td>\n",
" <td>8.4583</td>\n",
" <td>NaN</td>\n",
" <td>Q</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>7</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>McCarthy, Mr. Timothy J</td>\n",
" <td>male</td>\n",
" <td>54.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>17463</td>\n",
" <td>51.8625</td>\n",
" <td>E46</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>8</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>Palsson, Master. Gosta Leonard</td>\n",
" <td>male</td>\n",
" <td>2.0</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>349909</td>\n",
" <td>21.0750</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>9</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)</td>\n",
" <td>female</td>\n",
" <td>27.0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>347742</td>\n",
" <td>11.1333</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>10</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>Nasser, Mrs. Nicholas (Adele Achem)</td>\n",
" <td>female</td>\n",
" <td>14.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>237736</td>\n",
" <td>30.0708</td>\n",
" <td>NaN</td>\n",
" <td>C</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>11</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>Sandstrom, Miss. Marguerite Rut</td>\n",
" <td>female</td>\n",
" <td>4.0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>PP 9549</td>\n",
" <td>16.7000</td>\n",
" <td>G6</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>12</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Bonnell, Miss. Elizabeth</td>\n",
" <td>female</td>\n",
" <td>58.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>113783</td>\n",
" <td>26.5500</td>\n",
" <td>C103</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>13</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>Saundercock, Mr. William Henry</td>\n",
" <td>male</td>\n",
" <td>20.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>A/5. 2151</td>\n",
" <td>8.0500</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>14</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>Andersson, Mr. Anders Johan</td>\n",
" <td>male</td>\n",
" <td>39.0</td>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" <td>347082</td>\n",
" <td>31.2750</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>15</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>Vestrom, Miss. Hulda Amanda Adolfina</td>\n",
" <td>female</td>\n",
" <td>14.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>350406</td>\n",
" <td>7.8542</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>16</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>Hewlett, Mrs. (Mary D Kingcome)</td>\n",
" <td>female</td>\n",
" <td>55.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>248706</td>\n",
" <td>16.0000</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>17</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>Rice, Master. Eugene</td>\n",
" <td>male</td>\n",
" <td>2.0</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>382652</td>\n",
" <td>29.1250</td>\n",
" <td>NaN</td>\n",
" <td>Q</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>18</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>Williams, Mr. Charles Eugene</td>\n",
" <td>male</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>244373</td>\n",
" <td>13.0000</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>19</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>Vander Planke, Mrs. Julius (Emelia Maria Vande...</td>\n",
" <td>female</td>\n",
" <td>31.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>345763</td>\n",
" <td>18.0000</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>20</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>Masselmani, Mrs. Fatima</td>\n",
" <td>female</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2649</td>\n",
" <td>7.2250</td>\n",
" <td>NaN</td>\n",
" <td>C</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>21</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>Fynney, Mr. Joseph J</td>\n",
" <td>male</td>\n",
" <td>35.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>239865</td>\n",
" <td>26.0000</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>22</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>Beesley, Mr. Lawrence</td>\n",
" <td>male</td>\n",
" <td>34.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>248698</td>\n",
" <td>13.0000</td>\n",
" <td>D56</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>23</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>McGowan, Miss. Anna \"Annie\"</td>\n",
" <td>female</td>\n",
" <td>15.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>330923</td>\n",
" <td>8.0292</td>\n",
" <td>NaN</td>\n",
" <td>Q</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>24</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Sloper, Mr. William Thompson</td>\n",
" <td>male</td>\n",
" <td>28.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>113788</td>\n",
" <td>35.5000</td>\n",
" <td>A6</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>25</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>Palsson, Miss. Torborg Danira</td>\n",
" <td>female</td>\n",
" <td>8.0</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>349909</td>\n",
" <td>21.0750</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>26</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>Asplund, Mrs. Carl Oscar (Selma Augusta Emilia...</td>\n",
" <td>female</td>\n",
" <td>38.0</td>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" <td>347077</td>\n",
" <td>31.3875</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>27</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>Emir, Mr. Farred Chehab</td>\n",
" <td>male</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2631</td>\n",
" <td>7.2250</td>\n",
" <td>NaN</td>\n",
" <td>C</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>28</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>Fortune, Mr. Charles Alexander</td>\n",
" <td>male</td>\n",
" <td>19.0</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>19950</td>\n",
" <td>263.0000</td>\n",
" <td>C23 C25 C27</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>29</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>O'Dwyer, Miss. Ellen \"Nellie\"</td>\n",
" <td>female</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>330959</td>\n",
" <td>7.8792</td>\n",
" <td>NaN</td>\n",
" <td>Q</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>30</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>Todoroff, Mr. Lalio</td>\n",
" <td>male</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>349216</td>\n",
" <td>7.8958</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>861</th>\n",
" <td>862</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>Giles, Mr. Frederick Edward</td>\n",
" <td>male</td>\n",
" <td>21.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>28134</td>\n",
" <td>11.5000</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>862</th>\n",
" <td>863</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Swift, Mrs. Frederick Joel (Margaret Welles Ba...</td>\n",
" <td>female</td>\n",
" <td>48.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>17466</td>\n",
" <td>25.9292</td>\n",
" <td>D17</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>863</th>\n",
" <td>864</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>Sage, Miss. Dorothy Edith \"Dolly\"</td>\n",
" <td>female</td>\n",
" <td>NaN</td>\n",
" <td>8</td>\n",
" <td>2</td>\n",
" <td>CA. 2343</td>\n",
" <td>69.5500</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>864</th>\n",
" <td>865</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>Gill, Mr. John William</td>\n",
" <td>male</td>\n",
" <td>24.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>233866</td>\n",
" <td>13.0000</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>865</th>\n",
" <td>866</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>Bystrom, Mrs. (Karolina)</td>\n",
" <td>female</td>\n",
" <td>42.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>236852</td>\n",
" <td>13.0000</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>866</th>\n",
" <td>867</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>Duran y More, Miss. Asuncion</td>\n",
" <td>female</td>\n",
" <td>27.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>SC/PARIS 2149</td>\n",
" <td>13.8583</td>\n",
" <td>NaN</td>\n",
" <td>C</td>\n",
" </tr>\n",
" <tr>\n",
" <th>867</th>\n",
" <td>868</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>Roebling, Mr. Washington Augustus II</td>\n",
" <td>male</td>\n",
" <td>31.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>PC 17590</td>\n",
" <td>50.4958</td>\n",
" <td>A24</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>868</th>\n",
" <td>869</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>van Melkebeke, Mr. Philemon</td>\n",
" <td>male</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>345777</td>\n",
" <td>9.5000</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>869</th>\n",
" <td>870</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>Johnson, Master. Harold Theodor</td>\n",
" <td>male</td>\n",
" <td>4.0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>347742</td>\n",
" <td>11.1333</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>870</th>\n",
" <td>871</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>Balkic, Mr. Cerin</td>\n",
" <td>male</td>\n",
" <td>26.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>349248</td>\n",
" <td>7.8958</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>871</th>\n",
" <td>872</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Beckwith, Mrs. Richard Leonard (Sallie Monypeny)</td>\n",
" <td>female</td>\n",
" <td>47.0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>11751</td>\n",
" <td>52.5542</td>\n",
" <td>D35</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>872</th>\n",
" <td>873</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>Carlsson, Mr. Frans Olof</td>\n",
" <td>male</td>\n",
" <td>33.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>695</td>\n",
" <td>5.0000</td>\n",
" <td>B51 B53 B55</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>873</th>\n",
" <td>874</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>Vander Cruyssen, Mr. Victor</td>\n",
" <td>male</td>\n",
" <td>47.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>345765</td>\n",
" <td>9.0000</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>874</th>\n",
" <td>875</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>Abelson, Mrs. Samuel (Hannah Wizosky)</td>\n",
" <td>female</td>\n",
" <td>28.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>P/PP 3381</td>\n",
" <td>24.0000</td>\n",
" <td>NaN</td>\n",
" <td>C</td>\n",
" </tr>\n",
" <tr>\n",
" <th>875</th>\n",
" <td>876</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>Najib, Miss. Adele Kiamie \"Jane\"</td>\n",
" <td>female</td>\n",
" <td>15.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2667</td>\n",
" <td>7.2250</td>\n",
" <td>NaN</td>\n",
" <td>C</td>\n",
" </tr>\n",
" <tr>\n",
" <th>876</th>\n",
" <td>877</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>Gustafsson, Mr. Alfred Ossian</td>\n",
" <td>male</td>\n",
" <td>20.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>7534</td>\n",
" <td>9.8458</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>877</th>\n",
" <td>878</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>Petroff, Mr. Nedelio</td>\n",
" <td>male</td>\n",
" <td>19.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>349212</td>\n",
" <td>7.8958</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>878</th>\n",
" <td>879</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>Laleff, Mr. Kristo</td>\n",
" <td>male</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>349217</td>\n",
" <td>7.8958</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>879</th>\n",
" <td>880</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)</td>\n",
" <td>female</td>\n",
" <td>56.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>11767</td>\n",
" <td>83.1583</td>\n",
" <td>C50</td>\n",
" <td>C</td>\n",
" </tr>\n",
" <tr>\n",
" <th>880</th>\n",
" <td>881</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>Shelley, Mrs. William (Imanita Parrish Hall)</td>\n",
" <td>female</td>\n",
" <td>25.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>230433</td>\n",
" <td>26.0000</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>881</th>\n",
" <td>882</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>Markun, Mr. Johann</td>\n",
" <td>male</td>\n",
" <td>33.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>349257</td>\n",
" <td>7.8958</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>882</th>\n",
" <td>883</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>Dahlberg, Miss. Gerda Ulrika</td>\n",
" <td>female</td>\n",
" <td>22.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>7552</td>\n",
" <td>10.5167</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>883</th>\n",
" <td>884</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>Banfield, Mr. Frederick James</td>\n",
" <td>male</td>\n",
" <td>28.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>C.A./SOTON 34068</td>\n",
" <td>10.5000</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>884</th>\n",
" <td>885</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>Sutehall, Mr. Henry Jr</td>\n",
" <td>male</td>\n",
" <td>25.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>SOTON/OQ 392076</td>\n",
" <td>7.0500</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>885</th>\n",
" <td>886</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>Rice, Mrs. William (Margaret Norton)</td>\n",
" <td>female</td>\n",
" <td>39.0</td>\n",
" <td>0</td>\n",
" <td>5</td>\n",
" <td>382652</td>\n",
" <td>29.1250</td>\n",
" <td>NaN</td>\n",
" <td>Q</td>\n",
" </tr>\n",
" <tr>\n",
" <th>886</th>\n",
" <td>887</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>Montvila, Rev. Juozas</td>\n",
" <td>male</td>\n",
" <td>27.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>211536</td>\n",
" <td>13.0000</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>887</th>\n",
" <td>888</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Graham, Miss. Margaret Edith</td>\n",
" <td>female</td>\n",
" <td>19.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>112053</td>\n",
" <td>30.0000</td>\n",
" <td>B42</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>888</th>\n",
" <td>889</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>Johnston, Miss. Catherine Helen \"Carrie\"</td>\n",
" <td>female</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>W./C. 6607</td>\n",
" <td>23.4500</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>889</th>\n",
" <td>890</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Behr, Mr. Karl Howell</td>\n",
" <td>male</td>\n",
" <td>26.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>111369</td>\n",
" <td>30.0000</td>\n",
" <td>C148</td>\n",
" <td>C</td>\n",
" </tr>\n",
" <tr>\n",
" <th>890</th>\n",
" <td>891</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>Dooley, Mr. Patrick</td>\n",
" <td>male</td>\n",
" <td>32.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>370376</td>\n",
" <td>7.7500</td>\n",
" <td>NaN</td>\n",
" <td>Q</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>891 rows × 12 columns</p>\n",
"</div>"
],
"text/plain": [
" PassengerId Survived Pclass \\\n",
"0 1 0 3 \n",
"1 2 1 1 \n",
"2 3 1 3 \n",
"3 4 1 1 \n",
"4 5 0 3 \n",
"5 6 0 3 \n",
"6 7 0 1 \n",
"7 8 0 3 \n",
"8 9 1 3 \n",
"9 10 1 2 \n",
"10 11 1 3 \n",
"11 12 1 1 \n",
"12 13 0 3 \n",
"13 14 0 3 \n",
"14 15 0 3 \n",
"15 16 1 2 \n",
"16 17 0 3 \n",
"17 18 1 2 \n",
"18 19 0 3 \n",
"19 20 1 3 \n",
"20 21 0 2 \n",
"21 22 1 2 \n",
"22 23 1 3 \n",
"23 24 1 1 \n",
"24 25 0 3 \n",
"25 26 1 3 \n",
"26 27 0 3 \n",
"27 28 0 1 \n",
"28 29 1 3 \n",
"29 30 0 3 \n",
".. ... ... ... \n",
"861 862 0 2 \n",
"862 863 1 1 \n",
"863 864 0 3 \n",
"864 865 0 2 \n",
"865 866 1 2 \n",
"866 867 1 2 \n",
"867 868 0 1 \n",
"868 869 0 3 \n",
"869 870 1 3 \n",
"870 871 0 3 \n",
"871 872 1 1 \n",
"872 873 0 1 \n",
"873 874 0 3 \n",
"874 875 1 2 \n",
"875 876 1 3 \n",
"876 877 0 3 \n",
"877 878 0 3 \n",
"878 879 0 3 \n",
"879 880 1 1 \n",
"880 881 1 2 \n",
"881 882 0 3 \n",
"882 883 0 3 \n",
"883 884 0 2 \n",
"884 885 0 3 \n",
"885 886 0 3 \n",
"886 887 0 2 \n",
"887 888 1 1 \n",
"888 889 0 3 \n",
"889 890 1 1 \n",
"890 891 0 3 \n",
"\n",
" Name Sex Age SibSp \\\n",
"0 Braund, Mr. Owen Harris male 22.0 1 \n",
"1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n",
"2 Heikkinen, Miss. Laina female 26.0 0 \n",
"3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n",
"4 Allen, Mr. William Henry male 35.0 0 \n",
"5 Moran, Mr. James male NaN 0 \n",
"6 McCarthy, Mr. Timothy J male 54.0 0 \n",
"7 Palsson, Master. Gosta Leonard male 2.0 3 \n",
"8 Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) female 27.0 0 \n",
"9 Nasser, Mrs. Nicholas (Adele Achem) female 14.0 1 \n",
"10 Sandstrom, Miss. Marguerite Rut female 4.0 1 \n",
"11 Bonnell, Miss. Elizabeth female 58.0 0 \n",
"12 Saundercock, Mr. William Henry male 20.0 0 \n",
"13 Andersson, Mr. Anders Johan male 39.0 1 \n",
"14 Vestrom, Miss. Hulda Amanda Adolfina female 14.0 0 \n",
"15 Hewlett, Mrs. (Mary D Kingcome) female 55.0 0 \n",
"16 Rice, Master. Eugene male 2.0 4 \n",
"17 Williams, Mr. Charles Eugene male NaN 0 \n",
"18 Vander Planke, Mrs. Julius (Emelia Maria Vande... female 31.0 1 \n",
"19 Masselmani, Mrs. Fatima female NaN 0 \n",
"20 Fynney, Mr. Joseph J male 35.0 0 \n",
"21 Beesley, Mr. Lawrence male 34.0 0 \n",
"22 McGowan, Miss. Anna \"Annie\" female 15.0 0 \n",
"23 Sloper, Mr. William Thompson male 28.0 0 \n",
"24 Palsson, Miss. Torborg Danira female 8.0 3 \n",
"25 Asplund, Mrs. Carl Oscar (Selma Augusta Emilia... female 38.0 1 \n",
"26 Emir, Mr. Farred Chehab male NaN 0 \n",
"27 Fortune, Mr. Charles Alexander male 19.0 3 \n",
"28 O'Dwyer, Miss. Ellen \"Nellie\" female NaN 0 \n",
"29 Todoroff, Mr. Lalio male NaN 0 \n",
".. ... ... ... ... \n",
"861 Giles, Mr. Frederick Edward male 21.0 1 \n",
"862 Swift, Mrs. Frederick Joel (Margaret Welles Ba... female 48.0 0 \n",
"863 Sage, Miss. Dorothy Edith \"Dolly\" female NaN 8 \n",
"864 Gill, Mr. John William male 24.0 0 \n",
"865 Bystrom, Mrs. (Karolina) female 42.0 0 \n",
"866 Duran y More, Miss. Asuncion female 27.0 1 \n",
"867 Roebling, Mr. Washington Augustus II male 31.0 0 \n",
"868 van Melkebeke, Mr. Philemon male NaN 0 \n",
"869 Johnson, Master. Harold Theodor male 4.0 1 \n",
"870 Balkic, Mr. Cerin male 26.0 0 \n",
"871 Beckwith, Mrs. Richard Leonard (Sallie Monypeny) female 47.0 1 \n",
"872 Carlsson, Mr. Frans Olof male 33.0 0 \n",
"873 Vander Cruyssen, Mr. Victor male 47.0 0 \n",
"874 Abelson, Mrs. Samuel (Hannah Wizosky) female 28.0 1 \n",
"875 Najib, Miss. Adele Kiamie \"Jane\" female 15.0 0 \n",
"876 Gustafsson, Mr. Alfred Ossian male 20.0 0 \n",
"877 Petroff, Mr. Nedelio male 19.0 0 \n",
"878 Laleff, Mr. Kristo male NaN 0 \n",
"879 Potter, Mrs. Thomas Jr (Lily Alexenia Wilson) female 56.0 0 \n",
"880 Shelley, Mrs. William (Imanita Parrish Hall) female 25.0 0 \n",
"881 Markun, Mr. Johann male 33.0 0 \n",
"882 Dahlberg, Miss. Gerda Ulrika female 22.0 0 \n",
"883 Banfield, Mr. Frederick James male 28.0 0 \n",
"884 Sutehall, Mr. Henry Jr male 25.0 0 \n",
"885 Rice, Mrs. William (Margaret Norton) female 39.0 0 \n",
"886 Montvila, Rev. Juozas male 27.0 0 \n",
"887 Graham, Miss. Margaret Edith female 19.0 0 \n",
"888 Johnston, Miss. Catherine Helen \"Carrie\" female NaN 1 \n",
"889 Behr, Mr. Karl Howell male 26.0 0 \n",
"890 Dooley, Mr. Patrick male 32.0 0 \n",
"\n",
" Parch Ticket Fare Cabin Embarked \n",
"0 0 A/5 21171 7.2500 NaN S \n",
"1 0 PC 17599 71.2833 C85 C \n",
"2 0 STON/O2. 3101282 7.9250 NaN S \n",
"3 0 113803 53.1000 C123 S \n",
"4 0 373450 8.0500 NaN S \n",
"5 0 330877 8.4583 NaN Q \n",
"6 0 17463 51.8625 E46 S \n",
"7 1 349909 21.0750 NaN S \n",
"8 2 347742 11.1333 NaN S \n",
"9 0 237736 30.0708 NaN C \n",
"10 1 PP 9549 16.7000 G6 S \n",
"11 0 113783 26.5500 C103 S \n",
"12 0 A/5. 2151 8.0500 NaN S \n",
"13 5 347082 31.2750 NaN S \n",
"14 0 350406 7.8542 NaN S \n",
"15 0 248706 16.0000 NaN S \n",
"16 1 382652 29.1250 NaN Q \n",
"17 0 244373 13.0000 NaN S \n",
"18 0 345763 18.0000 NaN S \n",
"19 0 2649 7.2250 NaN C \n",
"20 0 239865 26.0000 NaN S \n",
"21 0 248698 13.0000 D56 S \n",
"22 0 330923 8.0292 NaN Q \n",
"23 0 113788 35.5000 A6 S \n",
"24 1 349909 21.0750 NaN S \n",
"25 5 347077 31.3875 NaN S \n",
"26 0 2631 7.2250 NaN C \n",
"27 2 19950 263.0000 C23 C25 C27 S \n",
"28 0 330959 7.8792 NaN Q \n",
"29 0 349216 7.8958 NaN S \n",
".. ... ... ... ... ... \n",
"861 0 28134 11.5000 NaN S \n",
"862 0 17466 25.9292 D17 S \n",
"863 2 CA. 2343 69.5500 NaN S \n",
"864 0 233866 13.0000 NaN S \n",
"865 0 236852 13.0000 NaN S \n",
"866 0 SC/PARIS 2149 13.8583 NaN C \n",
"867 0 PC 17590 50.4958 A24 S \n",
"868 0 345777 9.5000 NaN S \n",
"869 1 347742 11.1333 NaN S \n",
"870 0 349248 7.8958 NaN S \n",
"871 1 11751 52.5542 D35 S \n",
"872 0 695 5.0000 B51 B53 B55 S \n",
"873 0 345765 9.0000 NaN S \n",
"874 0 P/PP 3381 24.0000 NaN C \n",
"875 0 2667 7.2250 NaN C \n",
"876 0 7534 9.8458 NaN S \n",
"877 0 349212 7.8958 NaN S \n",
"878 0 349217 7.8958 NaN S \n",
"879 1 11767 83.1583 C50 C \n",
"880 1 230433 26.0000 NaN S \n",
"881 0 349257 7.8958 NaN S \n",
"882 0 7552 10.5167 NaN S \n",
"883 0 C.A./SOTON 34068 10.5000 NaN S \n",
"884 0 SOTON/OQ 392076 7.0500 NaN S \n",
"885 5 382652 29.1250 NaN Q \n",
"886 0 211536 13.0000 NaN S \n",
"887 0 112053 30.0000 B42 S \n",
"888 2 W./C. 6607 23.4500 NaN S \n",
"889 0 111369 30.0000 C148 C \n",
"890 0 370376 7.7500 NaN Q \n",
"\n",
"[891 rows x 12 columns]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"titanic"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"X = titanic[['Pclass', 'Age', 'Sex']]\n",
"y = titanic['Survived']"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"X['Age'] = X['Age'].fillna(X['Age'].mean())"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=33)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Pclass</th>\n",
" <th>Age</th>\n",
" <th>Sex</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>110</th>\n",
" <td>1</td>\n",
" <td>47.000000</td>\n",
" <td>male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>360</th>\n",
" <td>3</td>\n",
" <td>40.000000</td>\n",
" <td>male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>364</th>\n",
" <td>3</td>\n",
" <td>29.699118</td>\n",
" <td>male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>320</th>\n",
" <td>3</td>\n",
" <td>22.000000</td>\n",
" <td>male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>296</th>\n",
" <td>3</td>\n",
" <td>23.500000</td>\n",
" <td>male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>586</th>\n",
" <td>2</td>\n",
" <td>47.000000</td>\n",
" <td>male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>321</th>\n",
" <td>3</td>\n",
" <td>27.000000</td>\n",
" <td>male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>615</th>\n",
" <td>2</td>\n",
" <td>24.000000</td>\n",
" <td>female</td>\n",
" </tr>\n",
" <tr>\n",
" <th>107</th>\n",
" <td>3</td>\n",
" <td>29.699118</td>\n",
" <td>male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>611</th>\n",
" <td>3</td>\n",
" <td>29.699118</td>\n",
" <td>male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>613</th>\n",
" <td>3</td>\n",
" <td>29.699118</td>\n",
" <td>male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>339</th>\n",
" <td>1</td>\n",
" <td>45.000000</td>\n",
" <td>male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>695</th>\n",
" <td>2</td>\n",
" <td>52.000000</td>\n",
" <td>male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>252</th>\n",
" <td>1</td>\n",
" <td>62.000000</td>\n",
" <td>male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>301</th>\n",
" <td>3</td>\n",
" <td>29.699118</td>\n",
" <td>male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>690</th>\n",
" <td>1</td>\n",
" <td>31.000000</td>\n",
" <td>male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>779</th>\n",
" <td>1</td>\n",
" <td>43.000000</td>\n",
" <td>female</td>\n",
" </tr>\n",
" <tr>\n",
" <th>834</th>\n",
" <td>3</td>\n",
" <td>18.000000</td>\n",
" <td>male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>196</th>\n",
" <td>3</td>\n",
" <td>29.699118</td>\n",
" <td>male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>176</th>\n",
" <td>3</td>\n",
" <td>29.699118</td>\n",
" <td>male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>781</th>\n",
" <td>1</td>\n",
" <td>17.000000</td>\n",
" <td>female</td>\n",
" </tr>\n",
" <tr>\n",
" <th>608</th>\n",
" <td>2</td>\n",
" <td>22.000000</td>\n",
" <td>female</td>\n",
" </tr>\n",
" <tr>\n",
" <th>655</th>\n",
" <td>2</td>\n",
" <td>24.000000</td>\n",
" <td>male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>384</th>\n",
" <td>3</td>\n",
" <td>29.699118</td>\n",
" <td>male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>177</th>\n",
" <td>1</td>\n",
" <td>50.000000</td>\n",
" <td>female</td>\n",
" </tr>\n",
" <tr>\n",
" <th>516</th>\n",
" <td>2</td>\n",
" <td>34.000000</td>\n",
" <td>female</td>\n",
" </tr>\n",
" <tr>\n",
" <th>67</th>\n",
" <td>3</td>\n",
" <td>19.000000</td>\n",
" <td>male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>653</th>\n",
" <td>3</td>\n",
" <td>29.699118</td>\n",
" <td>female</td>\n",
" </tr>\n",
" <tr>\n",
" <th>596</th>\n",
" <td>2</td>\n",
" <td>29.699118</td>\n",
" <td>female</td>\n",
" </tr>\n",
" <tr>\n",
" <th>85</th>\n",
" <td>3</td>\n",
" <td>33.000000</td>\n",
" <td>female</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>403</th>\n",
" <td>3</td>\n",
" <td>28.000000</td>\n",
" <td>male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>744</th>\n",
" <td>3</td>\n",
" <td>31.000000</td>\n",
" <td>male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>344</th>\n",
" <td>2</td>\n",
" <td>36.000000</td>\n",
" <td>male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>84</th>\n",
" <td>2</td>\n",
" <td>17.000000</td>\n",
" <td>female</td>\n",
" </tr>\n",
" <tr>\n",
" <th>528</th>\n",
" <td>3</td>\n",
" <td>39.000000</td>\n",
" <td>male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>246</th>\n",
" <td>3</td>\n",
" <td>25.000000</td>\n",
" <td>female</td>\n",
" </tr>\n",
" <tr>\n",
" <th>614</th>\n",
" <td>3</td>\n",
" <td>35.000000</td>\n",
" <td>male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>592</th>\n",
" <td>3</td>\n",
" <td>47.000000</td>\n",
" <td>male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>848</th>\n",
" <td>2</td>\n",
" <td>28.000000</td>\n",
" <td>male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>662</th>\n",
" <td>1</td>\n",
" <td>47.000000</td>\n",
" <td>male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>395</th>\n",
" <td>3</td>\n",
" <td>22.000000</td>\n",
" <td>male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>743</th>\n",
" <td>3</td>\n",
" <td>24.000000</td>\n",
" <td>male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>172</th>\n",
" <td>3</td>\n",
" <td>1.000000</td>\n",
" <td>female</td>\n",
" </tr>\n",
" <tr>\n",
" <th>543</th>\n",
" <td>2</td>\n",
" <td>32.000000</td>\n",
" <td>male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>845</th>\n",
" <td>3</td>\n",
" <td>42.000000</td>\n",
" <td>male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>545</th>\n",
" <td>1</td>\n",
" <td>64.000000</td>\n",
" <td>male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>803</th>\n",
" <td>3</td>\n",
" <td>0.420000</td>\n",
" <td>male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>813</th>\n",
" <td>3</td>\n",
" <td>6.000000</td>\n",
" <td>female</td>\n",
" </tr>\n",
" <tr>\n",
" <th>398</th>\n",
" <td>2</td>\n",
" <td>23.000000</td>\n",
" <td>male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>723</th>\n",
" <td>2</td>\n",
" <td>50.000000</td>\n",
" <td>male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>61</th>\n",
" <td>1</td>\n",
" <td>38.000000</td>\n",
" <td>female</td>\n",
" </tr>\n",
" <tr>\n",
" <th>102</th>\n",
" <td>1</td>\n",
" <td>21.000000</td>\n",
" <td>male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>195</th>\n",
" <td>1</td>\n",
" <td>58.000000</td>\n",
" <td>female</td>\n",
" </tr>\n",
" <tr>\n",
" <th>57</th>\n",
" <td>3</td>\n",
" <td>28.500000</td>\n",
" <td>male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>201</th>\n",
" <td>3</td>\n",
" <td>29.699118</td>\n",
" <td>male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>658</th>\n",
" <td>2</td>\n",
" <td>23.000000</td>\n",
" <td>male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>578</th>\n",
" <td>3</td>\n",
" <td>29.699118</td>\n",
" <td>female</td>\n",
" </tr>\n",
" <tr>\n",
" <th>728</th>\n",
" <td>2</td>\n",
" <td>25.000000</td>\n",
" <td>male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>391</th>\n",
" <td>3</td>\n",
" <td>21.000000</td>\n",
" <td>male</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>2</td>\n",
" <td>35.000000</td>\n",
" <td>male</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>668 rows × 3 columns</p>\n",
"</div>"
],
"text/plain": [
" Pclass Age Sex\n",
"110 1 47.000000 male\n",
"360 3 40.000000 male\n",
"364 3 29.699118 male\n",
"320 3 22.000000 male\n",
"296 3 23.500000 male\n",
"586 2 47.000000 male\n",
"321 3 27.000000 male\n",
"615 2 24.000000 female\n",
"107 3 29.699118 male\n",
"611 3 29.699118 male\n",
"613 3 29.699118 male\n",
"339 1 45.000000 male\n",
"695 2 52.000000 male\n",
"252 1 62.000000 male\n",
"301 3 29.699118 male\n",
"690 1 31.000000 male\n",
"779 1 43.000000 female\n",
"834 3 18.000000 male\n",
"196 3 29.699118 male\n",
"176 3 29.699118 male\n",
"781 1 17.000000 female\n",
"608 2 22.000000 female\n",
"655 2 24.000000 male\n",
"384 3 29.699118 male\n",
"177 1 50.000000 female\n",
"516 2 34.000000 female\n",
"67 3 19.000000 male\n",
"653 3 29.699118 female\n",
"596 2 29.699118 female\n",
"85 3 33.000000 female\n",
".. ... ... ...\n",
"403 3 28.000000 male\n",
"744 3 31.000000 male\n",
"344 2 36.000000 male\n",
"84 2 17.000000 female\n",
"528 3 39.000000 male\n",
"246 3 25.000000 female\n",
"614 3 35.000000 male\n",
"592 3 47.000000 male\n",
"848 2 28.000000 male\n",
"662 1 47.000000 male\n",
"395 3 22.000000 male\n",
"743 3 24.000000 male\n",
"172 3 1.000000 female\n",
"543 2 32.000000 male\n",
"845 3 42.000000 male\n",
"545 1 64.000000 male\n",
"803 3 0.420000 male\n",
"813 3 6.000000 female\n",
"398 2 23.000000 male\n",
"723 2 50.000000 male\n",
"61 1 38.000000 female\n",
"102 1 21.000000 male\n",
"195 1 58.000000 female\n",
"57 3 28.500000 male\n",
"201 3 29.699118 male\n",
"658 2 23.000000 male\n",
"578 3 29.699118 female\n",
"728 2 25.000000 male\n",
"391 3 21.000000 male\n",
"20 2 35.000000 male\n",
"\n",
"[668 rows x 3 columns]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_train"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"X_train['Sex'] = X_train['Sex'].map({'male':0,'female':1})\n",
"X_test['Sex'] = X_test['Sex'].map({'male':0,'female':1})"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Pclass</th>\n",
" <th>Age</th>\n",
" <th>Sex</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>110</th>\n",
" <td>1</td>\n",
" <td>47.000000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>360</th>\n",
" <td>3</td>\n",
" <td>40.000000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>364</th>\n",
" <td>3</td>\n",
" <td>29.699118</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>320</th>\n",
" <td>3</td>\n",
" <td>22.000000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>296</th>\n",
" <td>3</td>\n",
" <td>23.500000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>586</th>\n",
" <td>2</td>\n",
" <td>47.000000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>321</th>\n",
" <td>3</td>\n",
" <td>27.000000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>615</th>\n",
" <td>2</td>\n",
" <td>24.000000</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>107</th>\n",
" <td>3</td>\n",
" <td>29.699118</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>611</th>\n",
" <td>3</td>\n",
" <td>29.699118</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>613</th>\n",
" <td>3</td>\n",
" <td>29.699118</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>339</th>\n",
" <td>1</td>\n",
" <td>45.000000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>695</th>\n",
" <td>2</td>\n",
" <td>52.000000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>252</th>\n",
" <td>1</td>\n",
" <td>62.000000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>301</th>\n",
" <td>3</td>\n",
" <td>29.699118</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>690</th>\n",
" <td>1</td>\n",
" <td>31.000000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>779</th>\n",
" <td>1</td>\n",
" <td>43.000000</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>834</th>\n",
" <td>3</td>\n",
" <td>18.000000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>196</th>\n",
" <td>3</td>\n",
" <td>29.699118</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>176</th>\n",
" <td>3</td>\n",
" <td>29.699118</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>781</th>\n",
" <td>1</td>\n",
" <td>17.000000</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>608</th>\n",
" <td>2</td>\n",
" <td>22.000000</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>655</th>\n",
" <td>2</td>\n",
" <td>24.000000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>384</th>\n",
" <td>3</td>\n",
" <td>29.699118</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>177</th>\n",
" <td>1</td>\n",
" <td>50.000000</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>516</th>\n",
" <td>2</td>\n",
" <td>34.000000</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>67</th>\n",
" <td>3</td>\n",
" <td>19.000000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>653</th>\n",
" <td>3</td>\n",
" <td>29.699118</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>596</th>\n",
" <td>2</td>\n",
" <td>29.699118</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>85</th>\n",
" <td>3</td>\n",
" <td>33.000000</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>403</th>\n",
" <td>3</td>\n",
" <td>28.000000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>744</th>\n",
" <td>3</td>\n",
" <td>31.000000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>344</th>\n",
" <td>2</td>\n",
" <td>36.000000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>84</th>\n",
" <td>2</td>\n",
" <td>17.000000</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>528</th>\n",
" <td>3</td>\n",
" <td>39.000000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>246</th>\n",
" <td>3</td>\n",
" <td>25.000000</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>614</th>\n",
" <td>3</td>\n",
" <td>35.000000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>592</th>\n",
" <td>3</td>\n",
" <td>47.000000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>848</th>\n",
" <td>2</td>\n",
" <td>28.000000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>662</th>\n",
" <td>1</td>\n",
" <td>47.000000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>395</th>\n",
" <td>3</td>\n",
" <td>22.000000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>743</th>\n",
" <td>3</td>\n",
" <td>24.000000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>172</th>\n",
" <td>3</td>\n",
" <td>1.000000</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>543</th>\n",
" <td>2</td>\n",
" <td>32.000000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>845</th>\n",
" <td>3</td>\n",
" <td>42.000000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>545</th>\n",
" <td>1</td>\n",
" <td>64.000000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>803</th>\n",
" <td>3</td>\n",
" <td>0.420000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>813</th>\n",
" <td>3</td>\n",
" <td>6.000000</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>398</th>\n",
" <td>2</td>\n",
" <td>23.000000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>723</th>\n",
" <td>2</td>\n",
" <td>50.000000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>61</th>\n",
" <td>1</td>\n",
" <td>38.000000</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>102</th>\n",
" <td>1</td>\n",
" <td>21.000000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>195</th>\n",
" <td>1</td>\n",
" <td>58.000000</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>57</th>\n",
" <td>3</td>\n",
" <td>28.500000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>201</th>\n",
" <td>3</td>\n",
" <td>29.699118</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>658</th>\n",
" <td>2</td>\n",
" <td>23.000000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>578</th>\n",
" <td>3</td>\n",
" <td>29.699118</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>728</th>\n",
" <td>2</td>\n",
" <td>25.000000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>391</th>\n",
" <td>3</td>\n",
" <td>21.000000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>2</td>\n",
" <td>35.000000</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>668 rows × 3 columns</p>\n",
"</div>"
],
"text/plain": [
" Pclass Age Sex\n",
"110 1 47.000000 0\n",
"360 3 40.000000 0\n",
"364 3 29.699118 0\n",
"320 3 22.000000 0\n",
"296 3 23.500000 0\n",
"586 2 47.000000 0\n",
"321 3 27.000000 0\n",
"615 2 24.000000 1\n",
"107 3 29.699118 0\n",
"611 3 29.699118 0\n",
"613 3 29.699118 0\n",
"339 1 45.000000 0\n",
"695 2 52.000000 0\n",
"252 1 62.000000 0\n",
"301 3 29.699118 0\n",
"690 1 31.000000 0\n",
"779 1 43.000000 1\n",
"834 3 18.000000 0\n",
"196 3 29.699118 0\n",
"176 3 29.699118 0\n",
"781 1 17.000000 1\n",
"608 2 22.000000 1\n",
"655 2 24.000000 0\n",
"384 3 29.699118 0\n",
"177 1 50.000000 1\n",
"516 2 34.000000 1\n",
"67 3 19.000000 0\n",
"653 3 29.699118 1\n",
"596 2 29.699118 1\n",
"85 3 33.000000 1\n",
".. ... ... ...\n",
"403 3 28.000000 0\n",
"744 3 31.000000 0\n",
"344 2 36.000000 0\n",
"84 2 17.000000 1\n",
"528 3 39.000000 0\n",
"246 3 25.000000 1\n",
"614 3 35.000000 0\n",
"592 3 47.000000 0\n",
"848 2 28.000000 0\n",
"662 1 47.000000 0\n",
"395 3 22.000000 0\n",
"743 3 24.000000 0\n",
"172 3 1.000000 1\n",
"543 2 32.000000 0\n",
"845 3 42.000000 0\n",
"545 1 64.000000 0\n",
"803 3 0.420000 0\n",
"813 3 6.000000 1\n",
"398 2 23.000000 0\n",
"723 2 50.000000 0\n",
"61 1 38.000000 1\n",
"102 1 21.000000 0\n",
"195 1 58.000000 1\n",
"57 3 28.500000 0\n",
"201 3 29.699118 0\n",
"658 2 23.000000 0\n",
"578 3 29.699118 1\n",
"728 2 25.000000 0\n",
"391 3 21.000000 0\n",
"20 2 35.000000 0\n",
"\n",
"[668 rows x 3 columns]"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_train"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from sklearn.ensemble import RandomForestClassifier"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"rfc = RandomForestClassifier()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n",
" max_depth=None, max_features='auto', max_leaf_nodes=None,\n",
" min_impurity_split=1e-07, min_samples_leaf=1,\n",
" min_samples_split=2, min_weight_fraction_leaf=0.0,\n",
" n_estimators=10, n_jobs=1, oob_score=False, random_state=None,\n",
" verbose=0, warm_start=False)"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"rfc.fit(X_train, y_train)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The accuracy of Random Forest Classifier on testing set: 0.816143497758\n"
]
}
],
"source": [
"print('The accuracy of Random Forest Classifier on testing set:', rfc.score(X_test, y_test))"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"from xgboost import XGBClassifier"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"xgbc = XGBClassifier()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"XGBClassifier(base_score=0.5, colsample_bylevel=1, colsample_bytree=1,\n",
" gamma=0, learning_rate=0.1, max_delta_step=0, max_depth=3,\n",
" min_child_weight=1, missing=None, n_estimators=100, nthread=-1,\n",
" objective='binary:logistic', reg_alpha=0, reg_lambda=1,\n",
" scale_pos_weight=1, seed=0, silent=True, subsample=1)"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"xgbc.fit(X_train, y_train)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The accuracy of eXtreme Gradient Boosting Classifier on testing set: 0.838565022422\n"
]
}
],
"source": [
"print('The accuracy of eXtreme Gradient Boosting Classifier on testing set:', xgbc.score(X_test, y_test))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment