Created
September 20, 2018 07:43
-
-
Save dharmeshdev19/2a54e69bc17c91dfd5814c40c8222bca to your computer and use it in GitHub Desktop.
Pandas / 02 - Learning Pandas / 04 - Dealing with Missing Data in Pandas
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>A</th>\n", | |
" <th>B</th>\n", | |
" <th>C</th>\n", | |
" <th>D</th>\n", | |
" <th>Fruits</th>\n", | |
" <th>Extra Data</th>\n", | |
" <th>G</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>2016-07-01</th>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" <td>apple</td>\n", | |
" <td>4</td>\n", | |
" <td>7</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2016-07-02</th>\n", | |
" <td>4</td>\n", | |
" <td>5</td>\n", | |
" <td>6</td>\n", | |
" <td>7</td>\n", | |
" <td>orange</td>\n", | |
" <td>7</td>\n", | |
" <td>107</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2016-07-03</th>\n", | |
" <td>8</td>\n", | |
" <td>9</td>\n", | |
" <td>10</td>\n", | |
" <td>11</td>\n", | |
" <td>banana</td>\n", | |
" <td>10</td>\n", | |
" <td>207</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2016-07-04</th>\n", | |
" <td>12</td>\n", | |
" <td>13</td>\n", | |
" <td>14</td>\n", | |
" <td>15</td>\n", | |
" <td>strawberry</td>\n", | |
" <td>13</td>\n", | |
" <td>307</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2016-07-05</th>\n", | |
" <td>16</td>\n", | |
" <td>17</td>\n", | |
" <td>18</td>\n", | |
" <td>19</td>\n", | |
" <td>blueberry</td>\n", | |
" <td>16</td>\n", | |
" <td>407</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2016-07-06</th>\n", | |
" <td>20</td>\n", | |
" <td>21</td>\n", | |
" <td>22</td>\n", | |
" <td>23</td>\n", | |
" <td>pineapple</td>\n", | |
" <td>19</td>\n", | |
" <td>507</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" A B C D Fruits Extra Data G\n", | |
"2016-07-01 0 1 2 3 apple 4 7\n", | |
"2016-07-02 4 5 6 7 orange 7 107\n", | |
"2016-07-03 8 9 10 11 banana 10 207\n", | |
"2016-07-04 12 13 14 15 strawberry 13 307\n", | |
"2016-07-05 16 17 18 19 blueberry 16 407\n", | |
"2016-07-06 20 21 22 23 pineapple 19 507" | |
] | |
}, | |
"execution_count": 1, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"import pandas as pd\n", | |
"import numpy as np\n", | |
"\n", | |
"\n", | |
"starting_date = '20160701'\n", | |
"sample_numpy_data = np.array(np.arange(24)).reshape((6,4))\n", | |
"dates_index = pd.date_range(starting_date, periods=6)\n", | |
"sample_df = pd.DataFrame(sample_numpy_data, index=dates_index, columns=list('ABCD'))\n", | |
"\n", | |
"sample_df_2 = sample_df.copy()\n", | |
"sample_df_2['Fruits'] = ['apple', 'orange','banana','strawberry','blueberry','pineapple']\n", | |
"\n", | |
"sample_series = pd.Series([1,2,3,4,5,6], index=pd.date_range(starting_date, periods=6))\n", | |
"sample_df_2['Extra Data'] = sample_series *3 +1\n", | |
"\n", | |
"second_numpy_array = np.array(np.arange(len(sample_df_2))) *100 + 7\n", | |
"sample_df_2['G'] = second_numpy_array\n", | |
"\n", | |
"sample_df_2" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Missing Data\n", | |
"pandas uses Numpy library np.nan to represent missing data. By default, it is not included in computations." | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Using reindex() method" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>http_status</th>\n", | |
" <th>response_time</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>Firefox</th>\n", | |
" <td>200</td>\n", | |
" <td>0.04</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Chrome</th>\n", | |
" <td>200</td>\n", | |
" <td>0.02</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Safari</th>\n", | |
" <td>404</td>\n", | |
" <td>0.07</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>IE10</th>\n", | |
" <td>404</td>\n", | |
" <td>0.08</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Konqueror</th>\n", | |
" <td>301</td>\n", | |
" <td>1.00</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" http_status response_time\n", | |
"Firefox 200 0.04\n", | |
"Chrome 200 0.02\n", | |
"Safari 404 0.07\n", | |
"IE10 404 0.08\n", | |
"Konqueror 301 1.00" | |
] | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"browser_index = ['Firefox', 'Chrome', 'Safari', 'IE10', 'Konqueror']\n", | |
"\n", | |
"browser_df = pd.DataFrame({\n", | |
" 'http_status': [200,200,404,404,301],\n", | |
" 'response_time': [0.04, 0.02, 0.07, 0.08, 1.0]},\n", | |
" index=browser_index)\n", | |
"browser_df" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"collapsed": true | |
}, | |
"source": [ | |
"### reindex() creates a copy (not a view)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>http_status</th>\n", | |
" <th>response_time</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>Safari</th>\n", | |
" <td>404.0</td>\n", | |
" <td>0.07</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Iceweasel</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Comodo Dragon</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>IE10</th>\n", | |
" <td>404.0</td>\n", | |
" <td>0.08</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Chrome</th>\n", | |
" <td>200.0</td>\n", | |
" <td>0.02</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" http_status response_time\n", | |
"Safari 404.0 0.07\n", | |
"Iceweasel NaN NaN\n", | |
"Comodo Dragon NaN NaN\n", | |
"IE10 404.0 0.08\n", | |
"Chrome 200.0 0.02" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"new_index= ['Safari', 'Iceweasel', 'Comodo Dragon', 'IE10', 'Chrome']\n", | |
"browser_df_2 = browser_df.reindex(new_index) # assigns NaN to cell for which it does not find the matching index.\n", | |
"browser_df_2" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>http_status</th>\n", | |
" <th>response_time</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>Firefox</th>\n", | |
" <td>200</td>\n", | |
" <td>0.04</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Chrome</th>\n", | |
" <td>200</td>\n", | |
" <td>0.02</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Safari</th>\n", | |
" <td>404</td>\n", | |
" <td>0.07</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>IE10</th>\n", | |
" <td>404</td>\n", | |
" <td>0.08</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Konqueror</th>\n", | |
" <td>301</td>\n", | |
" <td>1.00</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" http_status response_time\n", | |
"Firefox 200 0.04\n", | |
"Chrome 200 0.02\n", | |
"Safari 404 0.07\n", | |
"IE10 404 0.08\n", | |
"Konqueror 301 1.00" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"browser_df # comparing above code we can see that the original browser_df dataframe object is still intact with its original indexes" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### drop rows that have missing data\n", | |
"documentation: http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.dropna.html" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>http_status</th>\n", | |
" <th>response_time</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>Safari</th>\n", | |
" <td>404.0</td>\n", | |
" <td>0.07</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>IE10</th>\n", | |
" <td>404.0</td>\n", | |
" <td>0.08</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Chrome</th>\n", | |
" <td>200.0</td>\n", | |
" <td>0.02</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" http_status response_time\n", | |
"Safari 404.0 0.07\n", | |
"IE10 404.0 0.08\n", | |
"Chrome 200.0 0.02" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"browser_df_3 = browser_df_2.dropna(how='any') # ‘any’ : If any NA values are present, drop that row or column.\n", | |
"browser_df_3" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>http_status</th>\n", | |
" <th>response_time</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>Firefox</th>\n", | |
" <td>200.0</td>\n", | |
" <td>0.04</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Chrome</th>\n", | |
" <td>200.0</td>\n", | |
" <td>0.02</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Safari</th>\n", | |
" <td>NaN</td>\n", | |
" <td>0.07</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>IE10</th>\n", | |
" <td>404.0</td>\n", | |
" <td>0.08</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Konqueror</th>\n", | |
" <td>301.0</td>\n", | |
" <td>1.00</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" http_status response_time\n", | |
"Firefox 200.0 0.04\n", | |
"Chrome 200.0 0.02\n", | |
"Safari NaN 0.07\n", | |
"IE10 404.0 0.08\n", | |
"Konqueror 301.0 1.00" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"browser_df_test = pd.DataFrame({\n", | |
" 'http_status': [200,200,np.nan,404,301],\n", | |
" 'response_time': [0.04, 0.02, 0.07, 0.08, 1.0]},\n", | |
" index=browser_index)\n", | |
"browser_df_test" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>http_status</th>\n", | |
" <th>response_time</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>Firefox</th>\n", | |
" <td>200.0</td>\n", | |
" <td>0.04</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Chrome</th>\n", | |
" <td>200.0</td>\n", | |
" <td>0.02</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Safari</th>\n", | |
" <td>NaN</td>\n", | |
" <td>0.07</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>IE10</th>\n", | |
" <td>404.0</td>\n", | |
" <td>0.08</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Konqueror</th>\n", | |
" <td>301.0</td>\n", | |
" <td>1.00</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" http_status response_time\n", | |
"Firefox 200.0 0.04\n", | |
"Chrome 200.0 0.02\n", | |
"Safari NaN 0.07\n", | |
"IE10 404.0 0.08\n", | |
"Konqueror 301.0 1.00" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"browser_df_test.dropna(how='all') # does not drop the row as all the values of the row should be NA" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>http_status</th>\n", | |
" <th>response_time</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>Firefox</th>\n", | |
" <td>200.0</td>\n", | |
" <td>0.04</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Chrome</th>\n", | |
" <td>200.0</td>\n", | |
" <td>0.02</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>IE10</th>\n", | |
" <td>404.0</td>\n", | |
" <td>0.08</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Konqueror</th>\n", | |
" <td>301.0</td>\n", | |
" <td>1.00</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" http_status response_time\n", | |
"Firefox 200.0 0.04\n", | |
"Chrome 200.0 0.02\n", | |
"IE10 404.0 0.08\n", | |
"Konqueror 301.0 1.00" | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"browser_df_test.dropna(how='any')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### fill-in missing data\n", | |
"documentation: http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.fillna.html" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>http_status</th>\n", | |
" <th>response_time</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>Safari</th>\n", | |
" <td>404.00000</td>\n", | |
" <td>0.07000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Iceweasel</th>\n", | |
" <td>-0.05555</td>\n", | |
" <td>-0.05555</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Comodo Dragon</th>\n", | |
" <td>-0.05555</td>\n", | |
" <td>-0.05555</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>IE10</th>\n", | |
" <td>404.00000</td>\n", | |
" <td>0.08000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Chrome</th>\n", | |
" <td>200.00000</td>\n", | |
" <td>0.02000</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" http_status response_time\n", | |
"Safari 404.00000 0.07000\n", | |
"Iceweasel -0.05555 -0.05555\n", | |
"Comodo Dragon -0.05555 -0.05555\n", | |
"IE10 404.00000 0.08000\n", | |
"Chrome 200.00000 0.02000" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"browser_df_2.fillna(value=-0.05555)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### get boolean mask where values are NaN" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>http_status</th>\n", | |
" <th>response_time</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>Safari</th>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Iceweasel</th>\n", | |
" <td>True</td>\n", | |
" <td>True</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Comodo Dragon</th>\n", | |
" <td>True</td>\n", | |
" <td>True</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>IE10</th>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Chrome</th>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" http_status response_time\n", | |
"Safari False False\n", | |
"Iceweasel True True\n", | |
"Comodo Dragon True True\n", | |
"IE10 False False\n", | |
"Chrome False False" | |
] | |
}, | |
"execution_count": 10, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"pd.isnull(browser_df_2)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### NaN propagates during arithmetic operations" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>http_status</th>\n", | |
" <th>response_time</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>Safari</th>\n", | |
" <td>6868.0</td>\n", | |
" <td>1.19</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Iceweasel</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Comodo Dragon</th>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>IE10</th>\n", | |
" <td>6868.0</td>\n", | |
" <td>1.36</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>Chrome</th>\n", | |
" <td>3400.0</td>\n", | |
" <td>0.34</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" http_status response_time\n", | |
"Safari 6868.0 1.19\n", | |
"Iceweasel NaN NaN\n", | |
"Comodo Dragon NaN NaN\n", | |
"IE10 6868.0 1.36\n", | |
"Chrome 3400.0 0.34" | |
] | |
}, | |
"execution_count": 11, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"browser_df_2 * 17" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.2" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 1 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment