Created
April 12, 2020 22:23
-
-
Save craine/3a0a3b3c9adbf9f7b43d7c297d5f38e0 to your computer and use it in GitHub Desktop.
Titanic - how to fillna in pandas.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "Titanic - how to fillna in pandas.ipynb", | |
"provenance": [], | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/craine/3a0a3b3c9adbf9f7b43d7c297d5f38e0/titanic-how-to-fillna-in-pandas.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "6DSpykHJR20e", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"**Goal**:\n", | |
"Learn how to fillna in pandas" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "m5w8W0PrSK-O", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"We need data. Titanic data works great for this exercise. " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "XMDAawmHRtAc", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"#load pandas library\n", | |
"import pandas as pd\n", | |
"\n", | |
"#load the Titanic CSV to the dataset\n", | |
"titanic = pd.read_csv('https://raw.githubusercontent.com/pcsanwald/kaggle-titanic/master/train.csv')" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "SOZaOI2FU9mH", | |
"colab_type": "code", | |
"outputId": "7a5d4e47-1a96-4c9a-916f-bc37dc503a22", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 204 | |
} | |
}, | |
"source": [ | |
"titanic.head()" | |
], | |
"execution_count": 9, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>survived</th>\n", | |
" <th>pclass</th>\n", | |
" <th>name</th>\n", | |
" <th>sex</th>\n", | |
" <th>age</th>\n", | |
" <th>sibsp</th>\n", | |
" <th>parch</th>\n", | |
" <th>ticket</th>\n", | |
" <th>fare</th>\n", | |
" <th>cabin</th>\n", | |
" <th>embarked</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0</td>\n", | |
" <td>3</td>\n", | |
" <td>Braund, Mr. Owen Harris</td>\n", | |
" <td>male</td>\n", | |
" <td>22.0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>A/5 21171</td>\n", | |
" <td>7.2500</td>\n", | |
" <td>NaN</td>\n", | |
" <td>S</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n", | |
" <td>female</td>\n", | |
" <td>38.0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>PC 17599</td>\n", | |
" <td>71.2833</td>\n", | |
" <td>C85</td>\n", | |
" <td>C</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>1</td>\n", | |
" <td>3</td>\n", | |
" <td>Heikkinen, Miss. Laina</td>\n", | |
" <td>female</td>\n", | |
" <td>26.0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>STON/O2. 3101282</td>\n", | |
" <td>7.9250</td>\n", | |
" <td>NaN</td>\n", | |
" <td>S</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>Futrelle, Mrs. Jacques Heath (Lily May Peel)</td>\n", | |
" <td>female</td>\n", | |
" <td>35.0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>113803</td>\n", | |
" <td>53.1000</td>\n", | |
" <td>C123</td>\n", | |
" <td>S</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>0</td>\n", | |
" <td>3</td>\n", | |
" <td>Allen, Mr. William Henry</td>\n", | |
" <td>male</td>\n", | |
" <td>35.0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>373450</td>\n", | |
" <td>8.0500</td>\n", | |
" <td>NaN</td>\n", | |
" <td>S</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" survived pclass ... cabin embarked\n", | |
"0 0 3 ... NaN S\n", | |
"1 1 1 ... C85 C\n", | |
"2 1 3 ... NaN S\n", | |
"3 1 1 ... C123 S\n", | |
"4 0 3 ... NaN S\n", | |
"\n", | |
"[5 rows x 11 columns]" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 9 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "5UMDom5BWrpM", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 221 | |
}, | |
"outputId": "3205921c-f37d-4514-c336-6cc04f7ed37f" | |
}, | |
"source": [ | |
"titanic.isnull().sum(axis = 0)" | |
], | |
"execution_count": 12, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"survived 0\n", | |
"pclass 0\n", | |
"name 0\n", | |
"sex 0\n", | |
"age 177\n", | |
"sibsp 0\n", | |
"parch 0\n", | |
"ticket 0\n", | |
"fare 0\n", | |
"cabin 687\n", | |
"embarked 2\n", | |
"dtype: int64" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 12 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "ogvHOHNYVGRM", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"titanic['age']=titanic['age'].fillna(titanic['age'].mean())" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "mjjzdlZXcWXW", | |
"colab_type": "code", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 221 | |
}, | |
"outputId": "ef7be642-d90c-4942-9aac-75d5cacefa43" | |
}, | |
"source": [ | |
"titanic.isnull().sum(axis = 0)" | |
], | |
"execution_count": 14, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"survived 0\n", | |
"pclass 0\n", | |
"name 0\n", | |
"sex 0\n", | |
"age 0\n", | |
"sibsp 0\n", | |
"parch 0\n", | |
"ticket 0\n", | |
"fare 0\n", | |
"cabin 687\n", | |
"embarked 2\n", | |
"dtype: int64" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 14 | |
} | |
] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment