Skip to content

Instantly share code, notes, and snippets.

@leighajarett
Last active November 17, 2016 04:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save leighajarett/586fa667851f4b50e68de773c2ce8768 to your computer and use it in GitHub Desktop.
Save leighajarett/586fa667851f4b50e68de773c2ce8768 to your computer and use it in GitHub Desktop.
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"## Predicting if a passenger will die on the titanic ##"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Populating the interactive namespace from numpy and matplotlib\n"
]
}
],
"source": [
"%pylab inline"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import csv\n",
"from sklearn import linear_model\n",
"import numpy as np\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"#Load Data\n",
"data = pd.DataFrame.from_csv('/Users/leighajarett/Desktop/titanic_train.csv')\n",
"test_data = pd.DataFrame.from_csv('/Users/leighajarett/Desktop/titanic_test.csv')"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Survived</th>\n",
" <th>Pclass</th>\n",
" <th>Name</th>\n",
" <th>Sex</th>\n",
" <th>Age</th>\n",
" <th>SibSp</th>\n",
" <th>Parch</th>\n",
" <th>Ticket</th>\n",
" <th>Fare</th>\n",
" <th>Cabin</th>\n",
" <th>Embarked</th>\n",
" </tr>\n",
" <tr>\n",
" <th>PassengerId</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>Braund, Mr. Owen Harris</td>\n",
" <td>male</td>\n",
" <td>22.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>A/5 21171</td>\n",
" <td>7.2500</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n",
" <td>female</td>\n",
" <td>38.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>PC 17599</td>\n",
" <td>71.2833</td>\n",
" <td>C85</td>\n",
" <td>C</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>Heikkinen, Miss. Laina</td>\n",
" <td>female</td>\n",
" <td>26.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>STON/O2. 3101282</td>\n",
" <td>7.9250</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Futrelle, Mrs. Jacques Heath (Lily May Peel)</td>\n",
" <td>female</td>\n",
" <td>35.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>113803</td>\n",
" <td>53.1000</td>\n",
" <td>C123</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>Allen, Mr. William Henry</td>\n",
" <td>male</td>\n",
" <td>35.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>373450</td>\n",
" <td>8.0500</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>Moran, Mr. James</td>\n",
" <td>male</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>330877</td>\n",
" <td>8.4583</td>\n",
" <td>NaN</td>\n",
" <td>Q</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>McCarthy, Mr. Timothy J</td>\n",
" <td>male</td>\n",
" <td>54.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>17463</td>\n",
" <td>51.8625</td>\n",
" <td>E46</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>Palsson, Master. Gosta Leonard</td>\n",
" <td>male</td>\n",
" <td>2.0</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>349909</td>\n",
" <td>21.0750</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)</td>\n",
" <td>female</td>\n",
" <td>27.0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>347742</td>\n",
" <td>11.1333</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>Nasser, Mrs. Nicholas (Adele Achem)</td>\n",
" <td>female</td>\n",
" <td>14.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>237736</td>\n",
" <td>30.0708</td>\n",
" <td>NaN</td>\n",
" <td>C</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Survived Pclass \\\n",
"PassengerId \n",
"1 0 3 \n",
"2 1 1 \n",
"3 1 3 \n",
"4 1 1 \n",
"5 0 3 \n",
"6 0 3 \n",
"7 0 1 \n",
"8 0 3 \n",
"9 1 3 \n",
"10 1 2 \n",
"\n",
" Name Sex Age \\\n",
"PassengerId \n",
"1 Braund, Mr. Owen Harris male 22.0 \n",
"2 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 \n",
"3 Heikkinen, Miss. Laina female 26.0 \n",
"4 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 \n",
"5 Allen, Mr. William Henry male 35.0 \n",
"6 Moran, Mr. James male NaN \n",
"7 McCarthy, Mr. Timothy J male 54.0 \n",
"8 Palsson, Master. Gosta Leonard male 2.0 \n",
"9 Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) female 27.0 \n",
"10 Nasser, Mrs. Nicholas (Adele Achem) female 14.0 \n",
"\n",
" SibSp Parch Ticket Fare Cabin Embarked \n",
"PassengerId \n",
"1 1 0 A/5 21171 7.2500 NaN S \n",
"2 1 0 PC 17599 71.2833 C85 C \n",
"3 0 0 STON/O2. 3101282 7.9250 NaN S \n",
"4 1 0 113803 53.1000 C123 S \n",
"5 0 0 373450 8.0500 NaN S \n",
"6 0 0 330877 8.4583 NaN Q \n",
"7 0 0 17463 51.8625 E46 S \n",
"8 3 1 349909 21.0750 NaN S \n",
"9 0 2 347742 11.1333 NaN S \n",
"10 1 0 237736 30.0708 NaN C "
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.head(10)"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Pclass</th>\n",
" <th>Name</th>\n",
" <th>Sex</th>\n",
" <th>Age</th>\n",
" <th>SibSp</th>\n",
" <th>Parch</th>\n",
" <th>Ticket</th>\n",
" <th>Fare</th>\n",
" <th>Cabin</th>\n",
" <th>Embarked</th>\n",
" </tr>\n",
" <tr>\n",
" <th>PassengerId</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>892</th>\n",
" <td>3</td>\n",
" <td>Kelly, Mr. James</td>\n",
" <td>male</td>\n",
" <td>34.5</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>330911</td>\n",
" <td>7.8292</td>\n",
" <td>NaN</td>\n",
" <td>Q</td>\n",
" </tr>\n",
" <tr>\n",
" <th>893</th>\n",
" <td>3</td>\n",
" <td>Wilkes, Mrs. James (Ellen Needs)</td>\n",
" <td>female</td>\n",
" <td>47.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>363272</td>\n",
" <td>7.0000</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>894</th>\n",
" <td>2</td>\n",
" <td>Myles, Mr. Thomas Francis</td>\n",
" <td>male</td>\n",
" <td>62.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>240276</td>\n",
" <td>9.6875</td>\n",
" <td>NaN</td>\n",
" <td>Q</td>\n",
" </tr>\n",
" <tr>\n",
" <th>895</th>\n",
" <td>3</td>\n",
" <td>Wirz, Mr. Albert</td>\n",
" <td>male</td>\n",
" <td>27.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>315154</td>\n",
" <td>8.6625</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>896</th>\n",
" <td>3</td>\n",
" <td>Hirvonen, Mrs. Alexander (Helga E Lindqvist)</td>\n",
" <td>female</td>\n",
" <td>22.0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>3101298</td>\n",
" <td>12.2875</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>897</th>\n",
" <td>3</td>\n",
" <td>Svensson, Mr. Johan Cervin</td>\n",
" <td>male</td>\n",
" <td>14.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>7538</td>\n",
" <td>9.2250</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>898</th>\n",
" <td>3</td>\n",
" <td>Connolly, Miss. Kate</td>\n",
" <td>female</td>\n",
" <td>30.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>330972</td>\n",
" <td>7.6292</td>\n",
" <td>NaN</td>\n",
" <td>Q</td>\n",
" </tr>\n",
" <tr>\n",
" <th>899</th>\n",
" <td>2</td>\n",
" <td>Caldwell, Mr. Albert Francis</td>\n",
" <td>male</td>\n",
" <td>26.0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>248738</td>\n",
" <td>29.0000</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>900</th>\n",
" <td>3</td>\n",
" <td>Abrahim, Mrs. Joseph (Sophie Halaut Easu)</td>\n",
" <td>female</td>\n",
" <td>18.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2657</td>\n",
" <td>7.2292</td>\n",
" <td>NaN</td>\n",
" <td>C</td>\n",
" </tr>\n",
" <tr>\n",
" <th>901</th>\n",
" <td>3</td>\n",
" <td>Davies, Mr. John Samuel</td>\n",
" <td>male</td>\n",
" <td>21.0</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>A/4 48871</td>\n",
" <td>24.1500</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Pclass Name Sex \\\n",
"PassengerId \n",
"892 3 Kelly, Mr. James male \n",
"893 3 Wilkes, Mrs. James (Ellen Needs) female \n",
"894 2 Myles, Mr. Thomas Francis male \n",
"895 3 Wirz, Mr. Albert male \n",
"896 3 Hirvonen, Mrs. Alexander (Helga E Lindqvist) female \n",
"897 3 Svensson, Mr. Johan Cervin male \n",
"898 3 Connolly, Miss. Kate female \n",
"899 2 Caldwell, Mr. Albert Francis male \n",
"900 3 Abrahim, Mrs. Joseph (Sophie Halaut Easu) female \n",
"901 3 Davies, Mr. John Samuel male \n",
"\n",
" Age SibSp Parch Ticket Fare Cabin Embarked \n",
"PassengerId \n",
"892 34.5 0 0 330911 7.8292 NaN Q \n",
"893 47.0 1 0 363272 7.0000 NaN S \n",
"894 62.0 0 0 240276 9.6875 NaN Q \n",
"895 27.0 0 0 315154 8.6625 NaN S \n",
"896 22.0 1 1 3101298 12.2875 NaN S \n",
"897 14.0 0 0 7538 9.2250 NaN S \n",
"898 30.0 0 0 330972 7.6292 NaN Q \n",
"899 26.0 1 1 248738 29.0000 NaN S \n",
"900 18.0 0 0 2657 7.2292 NaN C \n",
"901 21.0 2 0 A/4 48871 24.1500 NaN S "
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"test_data.head(10)"
]
},
{
"cell_type": "code",
"execution_count": 114,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#data.Sex.replace(['female','male'],['0','1'],inplace=True) #make gender binary\n",
"#test_data.Sex.replace(['female','male'],['0','1'],inplace=True)\n",
"\n",
"X_train = [data.Pclass, data.Sex, data.SibSp, data.Parch] #include passenger class and sex \n",
"X_train = np.asarray(X_train) \n",
"X_train = X_train.transpose()\n",
"Y_train = np.asarray(data.Survived)\n",
"\n",
"X_test = [test_data.Pclass, test_data.Sex, test_data.SibSp, test_data.Parch]\n",
"X_test = np.asarray(X_test)\n",
"X_test = X_test.transpose()"
]
},
{
"cell_type": "code",
"execution_count": 115,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"cls = linear_model.LogisticRegression()\n",
"fit = cls.fit(X_train, Y_train)"
]
},
{
"cell_type": "code",
"execution_count": 116,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1,\n",
" 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0,\n",
" 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0,\n",
" 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0,\n",
" 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,\n",
" 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,\n",
" 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1,\n",
" 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0,\n",
" 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1,\n",
" 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0,\n",
" 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0,\n",
" 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1,\n",
" 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0,\n",
" 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,\n",
" 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1,\n",
" 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0,\n",
" 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0,\n",
" 1, 0, 0, 0])"
]
},
"execution_count": 116,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fit.predict(X_test)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python [Root]",
"language": "python",
"name": "Python [Root]"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment