Skip to content

Instantly share code, notes, and snippets.

@kylekyle
Created April 20, 2020 17:31
Show Gist options
  • Save kylekyle/02586613790b4ce462bca774abf1451f to your computer and use it in GitHub Desktop.
Save kylekyle/02586613790b4ce462bca774abf1451f to your computer and use it in GitHub Desktop.
titanic.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "titanic.ipynb",
"provenance": [],
"collapsed_sections": [],
"authorship_tag": "ABX9TyNs/ZVIm71p+sEbzc0kG0ac",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/kylekyle/02586613790b4ce462bca774abf1451f/titanic.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "T-k5daHhc-ik",
"colab_type": "text"
},
"source": [
"# Load dataset"
]
},
{
"cell_type": "code",
"metadata": {
"id": "ou8rlGSWcaPp",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
},
"outputId": "334ded62-fa1b-4984-aa96-2669ec214de2"
},
"source": [
"import pandas as pd\n",
"\n",
"df = pd.read_csv(\"https://drive.google.com/uc?export=download&id=15eLEu3cR96AwP6k9A0Oav7RLI7Zq9D1c\")\n",
"\n",
"df = df.drop(columns=['Name','Ticket','Embarked', 'Cabin', 'PassengerId'])\n",
"df = pd.get_dummies(data=df, columns=['Pclass', 'Sex'], drop_first=True)\n",
"\n",
"df.head()"
],
"execution_count": 1,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Survived</th>\n",
" <th>Age</th>\n",
" <th>SibSp</th>\n",
" <th>Parch</th>\n",
" <th>Fare</th>\n",
" <th>Pclass_2</th>\n",
" <th>Pclass_3</th>\n",
" <th>Sex_male</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>22.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>7.2500</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>38.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>71.2833</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>26.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>7.9250</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>35.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>53.1000</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0</td>\n",
" <td>35.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>8.0500</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Survived Age SibSp Parch Fare Pclass_2 Pclass_3 Sex_male\n",
"0 0 22.0 1 0 7.2500 0 1 1\n",
"1 1 38.0 1 0 71.2833 0 0 0\n",
"2 1 26.0 0 0 7.9250 0 1 0\n",
"3 1 35.0 1 0 53.1000 0 0 0\n",
"4 0 35.0 0 0 8.0500 0 1 1"
]
},
"metadata": {
"tags": []
},
"execution_count": 1
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "IBtHAKmrdCHW",
"colab_type": "text"
},
"source": [
"# Split"
]
},
{
"cell_type": "code",
"metadata": {
"id": "LrPLCj01c2dj",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
},
"outputId": "5da2b98f-595e-4a33-eec9-c3dd3130b8cf"
},
"source": [
"from sklearn.model_selection import train_test_split\n",
"\n",
"X, y = df.drop(columns='Survived'), df['Survived']\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)\n",
"\n",
"X_train.head()"
],
"execution_count": 2,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Age</th>\n",
" <th>SibSp</th>\n",
" <th>Parch</th>\n",
" <th>Fare</th>\n",
" <th>Pclass_2</th>\n",
" <th>Pclass_3</th>\n",
" <th>Sex_male</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>217</th>\n",
" <td>42.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>27.0000</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>480</th>\n",
" <td>9.0</td>\n",
" <td>5</td>\n",
" <td>2</td>\n",
" <td>46.9000</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>590</th>\n",
" <td>35.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>7.1250</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>713</th>\n",
" <td>29.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>9.4833</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>258</th>\n",
" <td>35.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>512.3292</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Age SibSp Parch Fare Pclass_2 Pclass_3 Sex_male\n",
"217 42.0 1 0 27.0000 1 0 1\n",
"480 9.0 5 2 46.9000 0 1 1\n",
"590 35.0 0 0 7.1250 0 1 1\n",
"713 29.0 0 0 9.4833 0 1 1\n",
"258 35.0 0 0 512.3292 0 0 0"
]
},
"metadata": {
"tags": []
},
"execution_count": 2
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "XWLue-hGjiWL",
"colab_type": "text"
},
"source": [
"# Scale\n",
"\n",
"Remember, you must scale *after* you split, or you'll leak information between the test and validation sets. For instance, the age of people in your validation set will be reflected in the mean and standard deviation in your test set. "
]
},
{
"cell_type": "code",
"metadata": {
"id": "5HLePtb3cjmZ",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
},
"outputId": "68a4f030-14dd-4c4b-d556-ed9ae0467d8f"
},
"source": [
"from sklearn.preprocessing import scale\n",
"\n",
"df[['Age', 'Fare']] = scale(df[['Age', 'Fare']])\n",
"df.head()"
],
"execution_count": 3,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Survived</th>\n",
" <th>Age</th>\n",
" <th>SibSp</th>\n",
" <th>Parch</th>\n",
" <th>Fare</th>\n",
" <th>Pclass_2</th>\n",
" <th>Pclass_3</th>\n",
" <th>Sex_male</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>-0.530377</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>-0.502445</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>0.571831</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.786845</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>-0.254825</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>-0.488854</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>0.365167</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0.420730</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0</td>\n",
" <td>0.365167</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>-0.486337</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Survived Age SibSp Parch Fare Pclass_2 Pclass_3 Sex_male\n",
"0 0 -0.530377 1 0 -0.502445 0 1 1\n",
"1 1 0.571831 1 0 0.786845 0 0 0\n",
"2 1 -0.254825 0 0 -0.488854 0 1 0\n",
"3 1 0.365167 1 0 0.420730 0 0 0\n",
"4 0 0.365167 0 0 -0.486337 0 1 1"
]
},
"metadata": {
"tags": []
},
"execution_count": 3
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "BqBMRudwkRUy",
"colab_type": "text"
},
"source": [
"# Train"
]
},
{
"cell_type": "code",
"metadata": {
"id": "7T3IP3RDkNFP",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 187
},
"outputId": "37056439-51f3-4431-d020-ccb64512cfa1"
},
"source": [
"import tensorflow\n",
"from tensorflow.keras.layers import *\n",
"from tensorflow.keras.models import Sequential\n",
"\n",
"model = Sequential()\n",
"model.add(Dense(64, activation='relu', input_dim=X_train.shape[1]))\n",
"model.add(Dense(64, activation='relu'))\n",
"model.add(Dense(1, activation='sigmoid'))\n",
"\n",
"model.compile(loss='binary_crossentropy', optimizer='RMSProp', metrics=['acc'])\n",
"history = model.fit(X_train, y_train, batch_size=1, epochs=5, validation_data=(X_test,y_test))"
],
"execution_count": 5,
"outputs": [
{
"output_type": "stream",
"text": [
"Epoch 1/5\n",
"801/801 [==============================] - 1s 1ms/step - loss: nan - acc: 0.6192 - val_loss: nan - val_acc: 0.6222\n",
"Epoch 2/5\n",
"801/801 [==============================] - 1s 1ms/step - loss: nan - acc: 0.6155 - val_loss: nan - val_acc: 0.6222\n",
"Epoch 3/5\n",
"801/801 [==============================] - 1s 1ms/step - loss: nan - acc: 0.6155 - val_loss: nan - val_acc: 0.6222\n",
"Epoch 4/5\n",
"801/801 [==============================] - 1s 1ms/step - loss: nan - acc: 0.6155 - val_loss: nan - val_acc: 0.6222\n",
"Epoch 5/5\n",
"801/801 [==============================] - 1s 1ms/step - loss: nan - acc: 0.6155 - val_loss: nan - val_acc: 0.6222\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "fKgEbWr6l5bL",
"colab_type": "text"
},
"source": [
"# Plot"
]
},
{
"cell_type": "code",
"metadata": {
"id": "YlVcKPCpl4QZ",
"colab_type": "code",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 295
},
"outputId": "4a8e50e9-cfc9-4718-c273-98f582ec23a4"
},
"source": [
"import matplotlib.pyplot as plt\n",
"\n",
"plt.plot(history.history['acc'])\n",
"plt.plot(history.history['val_acc'])\n",
"plt.title('Model accuracy')\n",
"plt.ylabel('Accuracy')\n",
"plt.xlabel('Epoch')\n",
"plt.legend(['Train', 'Test'], loc='upper left')\n",
"plt.show()"
],
"execution_count": 6,
"outputs": [
{
"output_type": "display_data",
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"tags": [],
"needs_background": "light"
}
}
]
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment