Last active
June 28, 2020 04:36
-
-
Save slingam00/fe219615301fe41fb4ffc6a1267b5d84 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Pandas Scrub Data" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Import Statement" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd # import pandas library" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Load in Iris Dataset\n", | |
"\n", | |
"Download the Iris Dataset here:\n\n", | |
"https://github.com/slingam00/Introduction_to_Data_Science/blob/master/iris.csv" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>sepal_length</th>\n", | |
" <th>sepal_width</th>\n", | |
" <th>petal_length</th>\n", | |
" <th>petal_width</th>\n", | |
" <th>species</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>5.1</td>\n", | |
" <td>3.5</td>\n", | |
" <td>1.4</td>\n", | |
" <td>0.2</td>\n", | |
" <td>setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>4.9</td>\n", | |
" <td>3.0</td>\n", | |
" <td>1.4</td>\n", | |
" <td>0.2</td>\n", | |
" <td>setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>4.7</td>\n", | |
" <td>3.2</td>\n", | |
" <td>1.3</td>\n", | |
" <td>0.2</td>\n", | |
" <td>setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>4.6</td>\n", | |
" <td>3.1</td>\n", | |
" <td>1.5</td>\n", | |
" <td>0.2</td>\n", | |
" <td>setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>5.0</td>\n", | |
" <td>3.6</td>\n", | |
" <td>1.4</td>\n", | |
" <td>0.2</td>\n", | |
" <td>setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>145</th>\n", | |
" <td>6.7</td>\n", | |
" <td>3.0</td>\n", | |
" <td>5.2</td>\n", | |
" <td>2.3</td>\n", | |
" <td>virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>146</th>\n", | |
" <td>6.3</td>\n", | |
" <td>2.5</td>\n", | |
" <td>5.0</td>\n", | |
" <td>1.9</td>\n", | |
" <td>virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>147</th>\n", | |
" <td>6.5</td>\n", | |
" <td>3.0</td>\n", | |
" <td>5.2</td>\n", | |
" <td>2.0</td>\n", | |
" <td>virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>148</th>\n", | |
" <td>6.2</td>\n", | |
" <td>3.4</td>\n", | |
" <td>5.4</td>\n", | |
" <td>2.3</td>\n", | |
" <td>virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>149</th>\n", | |
" <td>5.9</td>\n", | |
" <td>3.0</td>\n", | |
" <td>5.1</td>\n", | |
" <td>1.8</td>\n", | |
" <td>virginica</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>150 rows × 5 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" sepal_length sepal_width petal_length petal_width species\n", | |
"0 5.1 3.5 1.4 0.2 setosa\n", | |
"1 4.9 3.0 1.4 0.2 setosa\n", | |
"2 4.7 3.2 1.3 0.2 setosa\n", | |
"3 4.6 3.1 1.5 0.2 setosa\n", | |
"4 5.0 3.6 1.4 0.2 setosa\n", | |
".. ... ... ... ... ...\n", | |
"145 6.7 3.0 5.2 2.3 virginica\n", | |
"146 6.3 2.5 5.0 1.9 virginica\n", | |
"147 6.5 3.0 5.2 2.0 virginica\n", | |
"148 6.2 3.4 5.4 2.3 virginica\n", | |
"149 5.9 3.0 5.1 1.8 virginica\n", | |
"\n", | |
"[150 rows x 5 columns]" | |
] | |
}, | |
"execution_count": 16, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"data = pd.read_csv(\"iris.csv\")\n", | |
"data" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Dropping Columns" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>sepal_length</th>\n", | |
" <th>sepal_width</th>\n", | |
" <th>petal_length</th>\n", | |
" <th>species</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>5.1</td>\n", | |
" <td>3.5</td>\n", | |
" <td>1.4</td>\n", | |
" <td>setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>4.9</td>\n", | |
" <td>3.0</td>\n", | |
" <td>1.4</td>\n", | |
" <td>setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>4.7</td>\n", | |
" <td>3.2</td>\n", | |
" <td>1.3</td>\n", | |
" <td>setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>4.6</td>\n", | |
" <td>3.1</td>\n", | |
" <td>1.5</td>\n", | |
" <td>setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>5.0</td>\n", | |
" <td>3.6</td>\n", | |
" <td>1.4</td>\n", | |
" <td>setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>145</th>\n", | |
" <td>6.7</td>\n", | |
" <td>3.0</td>\n", | |
" <td>5.2</td>\n", | |
" <td>virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>146</th>\n", | |
" <td>6.3</td>\n", | |
" <td>2.5</td>\n", | |
" <td>5.0</td>\n", | |
" <td>virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>147</th>\n", | |
" <td>6.5</td>\n", | |
" <td>3.0</td>\n", | |
" <td>5.2</td>\n", | |
" <td>virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>148</th>\n", | |
" <td>6.2</td>\n", | |
" <td>3.4</td>\n", | |
" <td>5.4</td>\n", | |
" <td>virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>149</th>\n", | |
" <td>5.9</td>\n", | |
" <td>3.0</td>\n", | |
" <td>5.1</td>\n", | |
" <td>virginica</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>150 rows × 4 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" sepal_length sepal_width petal_length species\n", | |
"0 5.1 3.5 1.4 setosa\n", | |
"1 4.9 3.0 1.4 setosa\n", | |
"2 4.7 3.2 1.3 setosa\n", | |
"3 4.6 3.1 1.5 setosa\n", | |
"4 5.0 3.6 1.4 setosa\n", | |
".. ... ... ... ...\n", | |
"145 6.7 3.0 5.2 virginica\n", | |
"146 6.3 2.5 5.0 virginica\n", | |
"147 6.5 3.0 5.2 virginica\n", | |
"148 6.2 3.4 5.4 virginica\n", | |
"149 5.9 3.0 5.1 virginica\n", | |
"\n", | |
"[150 rows x 4 columns]" | |
] | |
}, | |
"execution_count": 17, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"data.drop('petal_width', axis = 1)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Dealing with NaN values" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Filling NaN with mean of column" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"C:\\Users\\13604\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: SettingWithCopyWarning: \n", | |
"A value is trying to be set on a copy of a slice from a DataFrame\n", | |
"\n", | |
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", | |
" \"\"\"Entry point for launching an IPython kernel.\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>sepal_length</th>\n", | |
" <th>sepal_width</th>\n", | |
" <th>petal_length</th>\n", | |
" <th>petal_width</th>\n", | |
" <th>species</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>5.1</td>\n", | |
" <td>3.5</td>\n", | |
" <td>3.774497</td>\n", | |
" <td>0.2</td>\n", | |
" <td>setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>4.9</td>\n", | |
" <td>3.0</td>\n", | |
" <td>1.400000</td>\n", | |
" <td>0.2</td>\n", | |
" <td>setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>4.7</td>\n", | |
" <td>3.2</td>\n", | |
" <td>1.300000</td>\n", | |
" <td>0.2</td>\n", | |
" <td>setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>4.6</td>\n", | |
" <td>3.1</td>\n", | |
" <td>1.500000</td>\n", | |
" <td>0.2</td>\n", | |
" <td>setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>5.0</td>\n", | |
" <td>3.6</td>\n", | |
" <td>1.400000</td>\n", | |
" <td>0.2</td>\n", | |
" <td>setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>145</th>\n", | |
" <td>6.7</td>\n", | |
" <td>3.0</td>\n", | |
" <td>5.200000</td>\n", | |
" <td>2.3</td>\n", | |
" <td>virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>146</th>\n", | |
" <td>6.3</td>\n", | |
" <td>2.5</td>\n", | |
" <td>5.000000</td>\n", | |
" <td>1.9</td>\n", | |
" <td>virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>147</th>\n", | |
" <td>6.5</td>\n", | |
" <td>3.0</td>\n", | |
" <td>5.200000</td>\n", | |
" <td>2.0</td>\n", | |
" <td>virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>148</th>\n", | |
" <td>6.2</td>\n", | |
" <td>3.4</td>\n", | |
" <td>5.400000</td>\n", | |
" <td>2.3</td>\n", | |
" <td>virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>149</th>\n", | |
" <td>5.9</td>\n", | |
" <td>3.0</td>\n", | |
" <td>5.100000</td>\n", | |
" <td>1.8</td>\n", | |
" <td>virginica</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>150 rows × 5 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" sepal_length sepal_width petal_length petal_width species\n", | |
"0 5.1 3.5 3.774497 0.2 setosa\n", | |
"1 4.9 3.0 1.400000 0.2 setosa\n", | |
"2 4.7 3.2 1.300000 0.2 setosa\n", | |
"3 4.6 3.1 1.500000 0.2 setosa\n", | |
"4 5.0 3.6 1.400000 0.2 setosa\n", | |
".. ... ... ... ... ...\n", | |
"145 6.7 3.0 5.200000 2.3 virginica\n", | |
"146 6.3 2.5 5.000000 1.9 virginica\n", | |
"147 6.5 3.0 5.200000 2.0 virginica\n", | |
"148 6.2 3.4 5.400000 2.3 virginica\n", | |
"149 5.9 3.0 5.100000 1.8 virginica\n", | |
"\n", | |
"[150 rows x 5 columns]" | |
] | |
}, | |
"execution_count": 22, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"data[\"petal_length\"][0] = 'NaN'\n", | |
"data.fillna(data[\"petal_length\"].mean()) # fills NaN value with the mean of the rest of the column" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Fill NaN with 0" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"C:\\Users\\13604\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: SettingWithCopyWarning: \n", | |
"A value is trying to be set on a copy of a slice from a DataFrame\n", | |
"\n", | |
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", | |
" \"\"\"Entry point for launching an IPython kernel.\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>sepal_length</th>\n", | |
" <th>sepal_width</th>\n", | |
" <th>petal_length</th>\n", | |
" <th>petal_width</th>\n", | |
" <th>species</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>5.1</td>\n", | |
" <td>3.5</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.2</td>\n", | |
" <td>setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>4.9</td>\n", | |
" <td>3.0</td>\n", | |
" <td>1.4</td>\n", | |
" <td>0.2</td>\n", | |
" <td>setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>4.7</td>\n", | |
" <td>3.2</td>\n", | |
" <td>1.3</td>\n", | |
" <td>0.2</td>\n", | |
" <td>setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>4.6</td>\n", | |
" <td>3.1</td>\n", | |
" <td>1.5</td>\n", | |
" <td>0.2</td>\n", | |
" <td>setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>5.0</td>\n", | |
" <td>3.6</td>\n", | |
" <td>1.4</td>\n", | |
" <td>0.2</td>\n", | |
" <td>setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>145</th>\n", | |
" <td>6.7</td>\n", | |
" <td>3.0</td>\n", | |
" <td>5.2</td>\n", | |
" <td>2.3</td>\n", | |
" <td>virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>146</th>\n", | |
" <td>6.3</td>\n", | |
" <td>2.5</td>\n", | |
" <td>5.0</td>\n", | |
" <td>1.9</td>\n", | |
" <td>virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>147</th>\n", | |
" <td>6.5</td>\n", | |
" <td>3.0</td>\n", | |
" <td>5.2</td>\n", | |
" <td>2.0</td>\n", | |
" <td>virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>148</th>\n", | |
" <td>6.2</td>\n", | |
" <td>3.4</td>\n", | |
" <td>5.4</td>\n", | |
" <td>2.3</td>\n", | |
" <td>virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>149</th>\n", | |
" <td>5.9</td>\n", | |
" <td>3.0</td>\n", | |
" <td>5.1</td>\n", | |
" <td>1.8</td>\n", | |
" <td>virginica</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>150 rows × 5 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" sepal_length sepal_width petal_length petal_width species\n", | |
"0 5.1 3.5 0.0 0.2 setosa\n", | |
"1 4.9 3.0 1.4 0.2 setosa\n", | |
"2 4.7 3.2 1.3 0.2 setosa\n", | |
"3 4.6 3.1 1.5 0.2 setosa\n", | |
"4 5.0 3.6 1.4 0.2 setosa\n", | |
".. ... ... ... ... ...\n", | |
"145 6.7 3.0 5.2 2.3 virginica\n", | |
"146 6.3 2.5 5.0 1.9 virginica\n", | |
"147 6.5 3.0 5.2 2.0 virginica\n", | |
"148 6.2 3.4 5.4 2.3 virginica\n", | |
"149 5.9 3.0 5.1 1.8 virginica\n", | |
"\n", | |
"[150 rows x 5 columns]" | |
] | |
}, | |
"execution_count": 20, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"data['petal_length'][0] = 'NaN'\n", | |
"data.fillna(0) # fill NaN values with 0" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Drop NaN Row" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"C:\\Users\\13604\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: SettingWithCopyWarning: \n", | |
"A value is trying to be set on a copy of a slice from a DataFrame\n", | |
"\n", | |
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", | |
" \"\"\"Entry point for launching an IPython kernel.\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>sepal_length</th>\n", | |
" <th>sepal_width</th>\n", | |
" <th>petal_length</th>\n", | |
" <th>petal_width</th>\n", | |
" <th>species</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>4.9</td>\n", | |
" <td>3.0</td>\n", | |
" <td>1.4</td>\n", | |
" <td>0.2</td>\n", | |
" <td>setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>4.7</td>\n", | |
" <td>3.2</td>\n", | |
" <td>1.3</td>\n", | |
" <td>0.2</td>\n", | |
" <td>setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>4.6</td>\n", | |
" <td>3.1</td>\n", | |
" <td>1.5</td>\n", | |
" <td>0.2</td>\n", | |
" <td>setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>5.0</td>\n", | |
" <td>3.6</td>\n", | |
" <td>1.4</td>\n", | |
" <td>0.2</td>\n", | |
" <td>setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>5.4</td>\n", | |
" <td>3.9</td>\n", | |
" <td>1.7</td>\n", | |
" <td>0.4</td>\n", | |
" <td>setosa</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>145</th>\n", | |
" <td>6.7</td>\n", | |
" <td>3.0</td>\n", | |
" <td>5.2</td>\n", | |
" <td>2.3</td>\n", | |
" <td>virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>146</th>\n", | |
" <td>6.3</td>\n", | |
" <td>2.5</td>\n", | |
" <td>5.0</td>\n", | |
" <td>1.9</td>\n", | |
" <td>virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>147</th>\n", | |
" <td>6.5</td>\n", | |
" <td>3.0</td>\n", | |
" <td>5.2</td>\n", | |
" <td>2.0</td>\n", | |
" <td>virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>148</th>\n", | |
" <td>6.2</td>\n", | |
" <td>3.4</td>\n", | |
" <td>5.4</td>\n", | |
" <td>2.3</td>\n", | |
" <td>virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>149</th>\n", | |
" <td>5.9</td>\n", | |
" <td>3.0</td>\n", | |
" <td>5.1</td>\n", | |
" <td>1.8</td>\n", | |
" <td>virginica</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>149 rows × 5 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" sepal_length sepal_width petal_length petal_width species\n", | |
"1 4.9 3.0 1.4 0.2 setosa\n", | |
"2 4.7 3.2 1.3 0.2 setosa\n", | |
"3 4.6 3.1 1.5 0.2 setosa\n", | |
"4 5.0 3.6 1.4 0.2 setosa\n", | |
"5 5.4 3.9 1.7 0.4 setosa\n", | |
".. ... ... ... ... ...\n", | |
"145 6.7 3.0 5.2 2.3 virginica\n", | |
"146 6.3 2.5 5.0 1.9 virginica\n", | |
"147 6.5 3.0 5.2 2.0 virginica\n", | |
"148 6.2 3.4 5.4 2.3 virginica\n", | |
"149 5.9 3.0 5.1 1.8 virginica\n", | |
"\n", | |
"[149 rows x 5 columns]" | |
] | |
}, | |
"execution_count": 21, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"data['petal_length'][0] = 'NaN'\n", | |
"data.dropna() # removes any row with NaN value" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.4" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment