-
-
Save pavithraes/546d3d35165d2925be75f53f0387008d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "b92238fb-9019-4d33-b5f6-827d639572f1", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import dask.dataframe as dd" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"id": "4d7014ee-89df-44e1-9181-61b23acbee78", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"dtype = {\n", | |
" 'IMD_Decile_From_LSOA': 'object',\n", | |
" 'Age_Band': 'object',\n", | |
" 'Sex': 'object',\n", | |
" 'AE_Arrive_Date': 'object',\n", | |
" 'AE_Arrive_HourOfDay': 'object',\n", | |
" 'AE_Time_Mins': 'object',\n", | |
" 'AE_HRG': 'object',\n", | |
" 'AE_Num_Diagnoses': 'object',\n", | |
" 'AE_Num_Investigations': 'object',\n", | |
" 'AE_Num_Treatments': 'object',\n", | |
" 'AE_Arrival_Mode': 'object',\n", | |
" 'Provider_Patient_Distance_Miles': 'object',\n", | |
" 'ProvID': 'object',\n", | |
" 'Admitted_Flag': 'object',\n", | |
" 'Admission_Method': 'object',\n", | |
" 'ICD10_Chapter_Code': 'object',\n", | |
" 'Treatment_Function_Code': 'object',\n", | |
" 'Length_Of_Stay_Days': 'object',\n", | |
"}" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"id": "1a54b556-73ea-4f47-9d7b-e2a650d92f75", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"ddf1 = dd.read_csv(\"data/AESyntheticData.csv\", dtype=dtype)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"id": "dd5094b0-d350-4e4f-b940-87c3723a1159", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>IMD_Decile_From_LSOA</th>\n", | |
" <th>Age_Band</th>\n", | |
" <th>Sex</th>\n", | |
" <th>AE_Arrive_Date</th>\n", | |
" <th>AE_Arrive_HourOfDay</th>\n", | |
" <th>AE_Time_Mins</th>\n", | |
" <th>AE_HRG</th>\n", | |
" <th>AE_Num_Diagnoses</th>\n", | |
" <th>AE_Num_Investigations</th>\n", | |
" <th>AE_Num_Treatments</th>\n", | |
" <th>AE_Arrival_Mode</th>\n", | |
" <th>Provider_Patient_Distance_Miles</th>\n", | |
" <th>ProvID</th>\n", | |
" <th>Admitted_Flag</th>\n", | |
" <th>Admission_Method</th>\n", | |
" <th>ICD10_Chapter_Code</th>\n", | |
" <th>Treatment_Function_Code</th>\n", | |
" <th>Length_Of_Stay_Days</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>2</td>\n", | |
" <td>65-84</td>\n", | |
" <td>1</td>\n", | |
" <td>2015-07-02 00:00:00</td>\n", | |
" <td>17-20</td>\n", | |
" <td>210</td>\n", | |
" <td>High</td>\n", | |
" <td>1</td>\n", | |
" <td>5</td>\n", | |
" <td>3</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>15207</td>\n", | |
" <td>1</td>\n", | |
" <td>21</td>\n", | |
" <td>XVIII</td>\n", | |
" <td>180</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>6</td>\n", | |
" <td>18-24</td>\n", | |
" <td>1</td>\n", | |
" <td>2017-05-31 00:00:00</td>\n", | |
" <td>17-20</td>\n", | |
" <td>20</td>\n", | |
" <td>Low</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>5</td>\n", | |
" <td>15321</td>\n", | |
" <td>0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>2</td>\n", | |
" <td>65-84</td>\n", | |
" <td>2</td>\n", | |
" <td>2015-10-25 00:00:00</td>\n", | |
" <td>13-16</td>\n", | |
" <td>280</td>\n", | |
" <td>Nothing</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>15269</td>\n", | |
" <td>0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>4</td>\n", | |
" <td>25-44</td>\n", | |
" <td>1</td>\n", | |
" <td>2014-06-16 00:00:00</td>\n", | |
" <td>21-24</td>\n", | |
" <td>150</td>\n", | |
" <td>Low</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>15239</td>\n", | |
" <td>0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>7</td>\n", | |
" <td>18-24</td>\n", | |
" <td>2</td>\n", | |
" <td>2017-11-18 00:00:00</td>\n", | |
" <td>13-16</td>\n", | |
" <td>180</td>\n", | |
" <td>Low</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>15286</td>\n", | |
" <td>0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" IMD_Decile_From_LSOA Age_Band Sex AE_Arrive_Date AE_Arrive_HourOfDay \\\n", | |
"0 2 65-84 1 2015-07-02 00:00:00 17-20 \n", | |
"1 6 18-24 1 2017-05-31 00:00:00 17-20 \n", | |
"2 2 65-84 2 2015-10-25 00:00:00 13-16 \n", | |
"3 4 25-44 1 2014-06-16 00:00:00 21-24 \n", | |
"4 7 18-24 2 2017-11-18 00:00:00 13-16 \n", | |
"\n", | |
" AE_Time_Mins AE_HRG AE_Num_Diagnoses AE_Num_Investigations \\\n", | |
"0 210 High 1 5 \n", | |
"1 20 Low 0 1 \n", | |
"2 280 Nothing 1 0 \n", | |
"3 150 Low 1 2 \n", | |
"4 180 Low 1 1 \n", | |
"\n", | |
" AE_Num_Treatments AE_Arrival_Mode Provider_Patient_Distance_Miles ProvID \\\n", | |
"0 3 2 1 15207 \n", | |
"1 1 2 5 15321 \n", | |
"2 1 1 2 15269 \n", | |
"3 2 2 1 15239 \n", | |
"4 2 2 1 15286 \n", | |
"\n", | |
" Admitted_Flag Admission_Method ICD10_Chapter_Code Treatment_Function_Code \\\n", | |
"0 1 21 XVIII 180 \n", | |
"1 0 NaN NaN NaN \n", | |
"2 0 NaN NaN NaN \n", | |
"3 0 NaN NaN NaN \n", | |
"4 0 NaN NaN NaN \n", | |
"\n", | |
" Length_Of_Stay_Days \n", | |
"0 1 \n", | |
"1 NaN \n", | |
"2 NaN \n", | |
"3 NaN \n", | |
"4 NaN " | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"ddf1.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"id": "000770c6-0c0e-4a95-88a0-6d018a828941", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>3</th>\n", | |
" <th>45-64</th>\n", | |
" <th>2</th>\n", | |
" <th>2016-06-08 00:00:00</th>\n", | |
" <th>17-20</th>\n", | |
" <th>100</th>\n", | |
" <th>Nothing</th>\n", | |
" <th>1</th>\n", | |
" <th>1.1</th>\n", | |
" <th>1.2</th>\n", | |
" <th>2.1</th>\n", | |
" <th>1.3</th>\n", | |
" <th>15371</th>\n", | |
" <th>0</th>\n", | |
" <th>Unnamed: 14</th>\n", | |
" <th>Unnamed: 15</th>\n", | |
" <th>Unnamed: 16</th>\n", | |
" <th>Unnamed: 17</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>927216</th>\n", | |
" <td>7.0</td>\n", | |
" <td>1-17</td>\n", | |
" <td>1.0</td>\n", | |
" <td>2016-03-01 00:00:00</td>\n", | |
" <td>13-16</td>\n", | |
" <td>170</td>\n", | |
" <td>Medium</td>\n", | |
" <td>1</td>\n", | |
" <td>3</td>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" <td>3.0</td>\n", | |
" <td>15264</td>\n", | |
" <td>0.0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>927217</th>\n", | |
" <td>4.0</td>\n", | |
" <td>18-24</td>\n", | |
" <td>2.0</td>\n", | |
" <td>2015-03-07 00:00:00</td>\n", | |
" <td>01-04</td>\n", | |
" <td>140</td>\n", | |
" <td>Low</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>1.0</td>\n", | |
" <td>15183</td>\n", | |
" <td>0.0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>927218</th>\n", | |
" <td>7.0</td>\n", | |
" <td>18-24</td>\n", | |
" <td>2.0</td>\n", | |
" <td>2014-05-03 00:00:00</td>\n", | |
" <td>13-16</td>\n", | |
" <td>100</td>\n", | |
" <td>Nothing</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>4.0</td>\n", | |
" <td>15206</td>\n", | |
" <td>0.0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>927219</th>\n", | |
" <td>7.0</td>\n", | |
" <td>45-64</td>\n", | |
" <td>1.0</td>\n", | |
" <td>2016-02-11 00:00:00</td>\n", | |
" <td>09-12</td>\n", | |
" <td>80</td>\n", | |
" <td>Medium</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>5</td>\n", | |
" <td>2</td>\n", | |
" <td>2.0</td>\n", | |
" <td>15187</td>\n", | |
" <td>0.0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>927220</th>\n", | |
" <td>4.0</td>\n", | |
" <td>1-17</td>\n", | |
" <td>2.0</td>\n", | |
" <td>2017-12-04 00:00:00</td>\n", | |
" <td>17-20</td>\n", | |
" <td>130</td>\n", | |
" <td>Nothing</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>2.0</td>\n", | |
" <td>15109</td>\n", | |
" <td>0.0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" 3 45-64 2 2016-06-08 00:00:00 17-20 100 Nothing 1 1.1 \\\n", | |
"927216 7.0 1-17 1.0 2016-03-01 00:00:00 13-16 170 Medium 1 3 \n", | |
"927217 4.0 18-24 2.0 2015-03-07 00:00:00 01-04 140 Low 1 0 \n", | |
"927218 7.0 18-24 2.0 2014-05-03 00:00:00 13-16 100 Nothing 1 1 \n", | |
"927219 7.0 45-64 1.0 2016-02-11 00:00:00 09-12 80 Medium 0 1 \n", | |
"927220 4.0 1-17 2.0 2017-12-04 00:00:00 17-20 130 Nothing 0 1 \n", | |
"\n", | |
" 1.2 2.1 1.3 15371 0 Unnamed: 14 Unnamed: 15 Unnamed: 16 \\\n", | |
"927216 3 1 3.0 15264 0.0 NaN NaN NaN \n", | |
"927217 1 2 1.0 15183 0.0 NaN NaN NaN \n", | |
"927218 1 2 4.0 15206 0.0 NaN NaN NaN \n", | |
"927219 5 2 2.0 15187 0.0 NaN NaN NaN \n", | |
"927220 1 2 2.0 15109 0.0 NaN NaN NaN \n", | |
"\n", | |
" Unnamed: 17 \n", | |
"927216 NaN \n", | |
"927217 NaN \n", | |
"927218 NaN \n", | |
"927219 NaN \n", | |
"927220 NaN " | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"ddf1.tail()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"id": "c4f1f865-a090-49a5-a958-9d8689b9fbe5", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def generalize_timestamp(timestamp):\n", | |
" return timestamp[:10]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "bc1de1ad-3dc3-42c1-a6c1-768bf62cce1b", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [], | |
"source": [ | |
"# KeyError\n", | |
"ddf1['AE_Arrive_Date'].apply(generalize_timestamp, meta=('AE_Arrive_Date', 'object')).persist()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"id": "c3d073d5-eaef-4591-a9c4-97f7b47e706f", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"ddf2 = dd.read_csv(\"data/AESyntheticData.csv\", header=0, dtype=dtype)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"id": "b9835438-3686-420f-bc94-333f8ff676ff", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>IMD_Decile_From_LSOA</th>\n", | |
" <th>Age_Band</th>\n", | |
" <th>Sex</th>\n", | |
" <th>AE_Arrive_Date</th>\n", | |
" <th>AE_Arrive_HourOfDay</th>\n", | |
" <th>AE_Time_Mins</th>\n", | |
" <th>AE_HRG</th>\n", | |
" <th>AE_Num_Diagnoses</th>\n", | |
" <th>AE_Num_Investigations</th>\n", | |
" <th>AE_Num_Treatments</th>\n", | |
" <th>AE_Arrival_Mode</th>\n", | |
" <th>Provider_Patient_Distance_Miles</th>\n", | |
" <th>ProvID</th>\n", | |
" <th>Admitted_Flag</th>\n", | |
" <th>Admission_Method</th>\n", | |
" <th>ICD10_Chapter_Code</th>\n", | |
" <th>Treatment_Function_Code</th>\n", | |
" <th>Length_Of_Stay_Days</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>2</td>\n", | |
" <td>65-84</td>\n", | |
" <td>1</td>\n", | |
" <td>2015-07-02 00:00:00</td>\n", | |
" <td>17-20</td>\n", | |
" <td>210</td>\n", | |
" <td>High</td>\n", | |
" <td>1</td>\n", | |
" <td>5</td>\n", | |
" <td>3</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>15207</td>\n", | |
" <td>1</td>\n", | |
" <td>21</td>\n", | |
" <td>XVIII</td>\n", | |
" <td>180</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>6</td>\n", | |
" <td>18-24</td>\n", | |
" <td>1</td>\n", | |
" <td>2017-05-31 00:00:00</td>\n", | |
" <td>17-20</td>\n", | |
" <td>20</td>\n", | |
" <td>Low</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>5</td>\n", | |
" <td>15321</td>\n", | |
" <td>0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>2</td>\n", | |
" <td>65-84</td>\n", | |
" <td>2</td>\n", | |
" <td>2015-10-25 00:00:00</td>\n", | |
" <td>13-16</td>\n", | |
" <td>280</td>\n", | |
" <td>Nothing</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>15269</td>\n", | |
" <td>0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>4</td>\n", | |
" <td>25-44</td>\n", | |
" <td>1</td>\n", | |
" <td>2014-06-16 00:00:00</td>\n", | |
" <td>21-24</td>\n", | |
" <td>150</td>\n", | |
" <td>Low</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>15239</td>\n", | |
" <td>0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>7</td>\n", | |
" <td>18-24</td>\n", | |
" <td>2</td>\n", | |
" <td>2017-11-18 00:00:00</td>\n", | |
" <td>13-16</td>\n", | |
" <td>180</td>\n", | |
" <td>Low</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>15286</td>\n", | |
" <td>0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" IMD_Decile_From_LSOA Age_Band Sex AE_Arrive_Date AE_Arrive_HourOfDay \\\n", | |
"0 2 65-84 1 2015-07-02 00:00:00 17-20 \n", | |
"1 6 18-24 1 2017-05-31 00:00:00 17-20 \n", | |
"2 2 65-84 2 2015-10-25 00:00:00 13-16 \n", | |
"3 4 25-44 1 2014-06-16 00:00:00 21-24 \n", | |
"4 7 18-24 2 2017-11-18 00:00:00 13-16 \n", | |
"\n", | |
" AE_Time_Mins AE_HRG AE_Num_Diagnoses AE_Num_Investigations \\\n", | |
"0 210 High 1 5 \n", | |
"1 20 Low 0 1 \n", | |
"2 280 Nothing 1 0 \n", | |
"3 150 Low 1 2 \n", | |
"4 180 Low 1 1 \n", | |
"\n", | |
" AE_Num_Treatments AE_Arrival_Mode Provider_Patient_Distance_Miles ProvID \\\n", | |
"0 3 2 1 15207 \n", | |
"1 1 2 5 15321 \n", | |
"2 1 1 2 15269 \n", | |
"3 2 2 1 15239 \n", | |
"4 2 2 1 15286 \n", | |
"\n", | |
" Admitted_Flag Admission_Method ICD10_Chapter_Code Treatment_Function_Code \\\n", | |
"0 1 21 XVIII 180 \n", | |
"1 0 NaN NaN NaN \n", | |
"2 0 NaN NaN NaN \n", | |
"3 0 NaN NaN NaN \n", | |
"4 0 NaN NaN NaN \n", | |
"\n", | |
" Length_Of_Stay_Days \n", | |
"0 1 \n", | |
"1 NaN \n", | |
"2 NaN \n", | |
"3 NaN \n", | |
"4 NaN " | |
] | |
}, | |
"execution_count": 10, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"ddf2.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"id": "e2ec5dd4-7332-40b5-82f0-3fa668777777", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>3</th>\n", | |
" <th>45-64</th>\n", | |
" <th>2</th>\n", | |
" <th>2016-06-08 00:00:00</th>\n", | |
" <th>17-20</th>\n", | |
" <th>100</th>\n", | |
" <th>Nothing</th>\n", | |
" <th>1</th>\n", | |
" <th>1.1</th>\n", | |
" <th>1.2</th>\n", | |
" <th>2.1</th>\n", | |
" <th>1.3</th>\n", | |
" <th>15371</th>\n", | |
" <th>0</th>\n", | |
" <th>Unnamed: 14</th>\n", | |
" <th>Unnamed: 15</th>\n", | |
" <th>Unnamed: 16</th>\n", | |
" <th>Unnamed: 17</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>927216</th>\n", | |
" <td>7.0</td>\n", | |
" <td>1-17</td>\n", | |
" <td>1.0</td>\n", | |
" <td>2016-03-01 00:00:00</td>\n", | |
" <td>13-16</td>\n", | |
" <td>170</td>\n", | |
" <td>Medium</td>\n", | |
" <td>1</td>\n", | |
" <td>3</td>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" <td>3.0</td>\n", | |
" <td>15264</td>\n", | |
" <td>0.0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>927217</th>\n", | |
" <td>4.0</td>\n", | |
" <td>18-24</td>\n", | |
" <td>2.0</td>\n", | |
" <td>2015-03-07 00:00:00</td>\n", | |
" <td>01-04</td>\n", | |
" <td>140</td>\n", | |
" <td>Low</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>1.0</td>\n", | |
" <td>15183</td>\n", | |
" <td>0.0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>927218</th>\n", | |
" <td>7.0</td>\n", | |
" <td>18-24</td>\n", | |
" <td>2.0</td>\n", | |
" <td>2014-05-03 00:00:00</td>\n", | |
" <td>13-16</td>\n", | |
" <td>100</td>\n", | |
" <td>Nothing</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>4.0</td>\n", | |
" <td>15206</td>\n", | |
" <td>0.0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>927219</th>\n", | |
" <td>7.0</td>\n", | |
" <td>45-64</td>\n", | |
" <td>1.0</td>\n", | |
" <td>2016-02-11 00:00:00</td>\n", | |
" <td>09-12</td>\n", | |
" <td>80</td>\n", | |
" <td>Medium</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>5</td>\n", | |
" <td>2</td>\n", | |
" <td>2.0</td>\n", | |
" <td>15187</td>\n", | |
" <td>0.0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>927220</th>\n", | |
" <td>4.0</td>\n", | |
" <td>1-17</td>\n", | |
" <td>2.0</td>\n", | |
" <td>2017-12-04 00:00:00</td>\n", | |
" <td>17-20</td>\n", | |
" <td>130</td>\n", | |
" <td>Nothing</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>2.0</td>\n", | |
" <td>15109</td>\n", | |
" <td>0.0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" 3 45-64 2 2016-06-08 00:00:00 17-20 100 Nothing 1 1.1 \\\n", | |
"927216 7.0 1-17 1.0 2016-03-01 00:00:00 13-16 170 Medium 1 3 \n", | |
"927217 4.0 18-24 2.0 2015-03-07 00:00:00 01-04 140 Low 1 0 \n", | |
"927218 7.0 18-24 2.0 2014-05-03 00:00:00 13-16 100 Nothing 1 1 \n", | |
"927219 7.0 45-64 1.0 2016-02-11 00:00:00 09-12 80 Medium 0 1 \n", | |
"927220 4.0 1-17 2.0 2017-12-04 00:00:00 17-20 130 Nothing 0 1 \n", | |
"\n", | |
" 1.2 2.1 1.3 15371 0 Unnamed: 14 Unnamed: 15 Unnamed: 16 \\\n", | |
"927216 3 1 3.0 15264 0.0 NaN NaN NaN \n", | |
"927217 1 2 1.0 15183 0.0 NaN NaN NaN \n", | |
"927218 1 2 4.0 15206 0.0 NaN NaN NaN \n", | |
"927219 5 2 2.0 15187 0.0 NaN NaN NaN \n", | |
"927220 1 2 2.0 15109 0.0 NaN NaN NaN \n", | |
"\n", | |
" Unnamed: 17 \n", | |
"927216 NaN \n", | |
"927217 NaN \n", | |
"927218 NaN \n", | |
"927219 NaN \n", | |
"927220 NaN " | |
] | |
}, | |
"execution_count": 11, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"ddf2.tail()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"id": "7d47dbf0-2a8d-49d6-ad58-1659284c62ba", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"cols = ['IMD_Decile_From_LSOA',\n", | |
" 'Age_Band',\n", | |
" 'Sex',\n", | |
" 'AE_Arrive_Date',\n", | |
" 'AE_Arrive_HourOfDay',\n", | |
" 'AE_Time_Mins',\n", | |
" 'AE_HRG',\n", | |
" 'AE_Num_Diagnoses',\n", | |
" 'AE_Num_Investigations',\n", | |
" 'AE_Num_Treatments',\n", | |
" 'AE_Arrival_Mode',\n", | |
" 'Provider_Patient_Distance_Miles',\n", | |
" 'ProvID',\n", | |
" 'Admitted_Flag',\n", | |
" 'Admission_Method',\n", | |
" 'ICD10_Chapter_Code',\n", | |
" 'Treatment_Function_Code',\n", | |
" 'Length_Of_Stay_Days']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"id": "8ca6336c-b7a6-47ca-b327-bb9a9f54b838", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"ddf3 = dd.read_csv(\"data/AESyntheticData.csv\", names=cols, dtype=dtype)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"id": "9ef10303-7a4e-4011-8aa9-c0cb8b65b9a7", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>IMD_Decile_From_LSOA</th>\n", | |
" <th>Age_Band</th>\n", | |
" <th>Sex</th>\n", | |
" <th>AE_Arrive_Date</th>\n", | |
" <th>AE_Arrive_HourOfDay</th>\n", | |
" <th>AE_Time_Mins</th>\n", | |
" <th>AE_HRG</th>\n", | |
" <th>AE_Num_Diagnoses</th>\n", | |
" <th>AE_Num_Investigations</th>\n", | |
" <th>AE_Num_Treatments</th>\n", | |
" <th>AE_Arrival_Mode</th>\n", | |
" <th>Provider_Patient_Distance_Miles</th>\n", | |
" <th>ProvID</th>\n", | |
" <th>Admitted_Flag</th>\n", | |
" <th>Admission_Method</th>\n", | |
" <th>ICD10_Chapter_Code</th>\n", | |
" <th>Treatment_Function_Code</th>\n", | |
" <th>Length_Of_Stay_Days</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>IMD_Decile_From_LSOA</td>\n", | |
" <td>Age_Band</td>\n", | |
" <td>Sex</td>\n", | |
" <td>AE_Arrive_Date</td>\n", | |
" <td>AE_Arrive_HourOfDay</td>\n", | |
" <td>AE_Time_Mins</td>\n", | |
" <td>AE_HRG</td>\n", | |
" <td>AE_Num_Diagnoses</td>\n", | |
" <td>AE_Num_Investigations</td>\n", | |
" <td>AE_Num_Treatments</td>\n", | |
" <td>AE_Arrival_Mode</td>\n", | |
" <td>Provider_Patient_Distance_Miles</td>\n", | |
" <td>ProvID</td>\n", | |
" <td>Admitted_Flag</td>\n", | |
" <td>Admission_Method</td>\n", | |
" <td>ICD10_Chapter_Code</td>\n", | |
" <td>Treatment_Function_Code</td>\n", | |
" <td>Length_Of_Stay_Days</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>2</td>\n", | |
" <td>65-84</td>\n", | |
" <td>1</td>\n", | |
" <td>2015-07-02 00:00:00</td>\n", | |
" <td>17-20</td>\n", | |
" <td>210</td>\n", | |
" <td>High</td>\n", | |
" <td>1</td>\n", | |
" <td>5</td>\n", | |
" <td>3</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>15207</td>\n", | |
" <td>1</td>\n", | |
" <td>21</td>\n", | |
" <td>XVIII</td>\n", | |
" <td>180</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>6</td>\n", | |
" <td>18-24</td>\n", | |
" <td>1</td>\n", | |
" <td>2017-05-31 00:00:00</td>\n", | |
" <td>17-20</td>\n", | |
" <td>20</td>\n", | |
" <td>Low</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>5</td>\n", | |
" <td>15321</td>\n", | |
" <td>0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>2</td>\n", | |
" <td>65-84</td>\n", | |
" <td>2</td>\n", | |
" <td>2015-10-25 00:00:00</td>\n", | |
" <td>13-16</td>\n", | |
" <td>280</td>\n", | |
" <td>Nothing</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>15269</td>\n", | |
" <td>0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>4</td>\n", | |
" <td>25-44</td>\n", | |
" <td>1</td>\n", | |
" <td>2014-06-16 00:00:00</td>\n", | |
" <td>21-24</td>\n", | |
" <td>150</td>\n", | |
" <td>Low</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>15239</td>\n", | |
" <td>0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" IMD_Decile_From_LSOA Age_Band Sex AE_Arrive_Date \\\n", | |
"0 IMD_Decile_From_LSOA Age_Band Sex AE_Arrive_Date \n", | |
"1 2 65-84 1 2015-07-02 00:00:00 \n", | |
"2 6 18-24 1 2017-05-31 00:00:00 \n", | |
"3 2 65-84 2 2015-10-25 00:00:00 \n", | |
"4 4 25-44 1 2014-06-16 00:00:00 \n", | |
"\n", | |
" AE_Arrive_HourOfDay AE_Time_Mins AE_HRG AE_Num_Diagnoses \\\n", | |
"0 AE_Arrive_HourOfDay AE_Time_Mins AE_HRG AE_Num_Diagnoses \n", | |
"1 17-20 210 High 1 \n", | |
"2 17-20 20 Low 0 \n", | |
"3 13-16 280 Nothing 1 \n", | |
"4 21-24 150 Low 1 \n", | |
"\n", | |
" AE_Num_Investigations AE_Num_Treatments AE_Arrival_Mode \\\n", | |
"0 AE_Num_Investigations AE_Num_Treatments AE_Arrival_Mode \n", | |
"1 5 3 2 \n", | |
"2 1 1 2 \n", | |
"3 0 1 1 \n", | |
"4 2 2 2 \n", | |
"\n", | |
" Provider_Patient_Distance_Miles ProvID Admitted_Flag Admission_Method \\\n", | |
"0 Provider_Patient_Distance_Miles ProvID Admitted_Flag Admission_Method \n", | |
"1 1 15207 1 21 \n", | |
"2 5 15321 0 NaN \n", | |
"3 2 15269 0 NaN \n", | |
"4 1 15239 0 NaN \n", | |
"\n", | |
" ICD10_Chapter_Code Treatment_Function_Code Length_Of_Stay_Days \n", | |
"0 ICD10_Chapter_Code Treatment_Function_Code Length_Of_Stay_Days \n", | |
"1 XVIII 180 1 \n", | |
"2 NaN NaN NaN \n", | |
"3 NaN NaN NaN \n", | |
"4 NaN NaN NaN " | |
] | |
}, | |
"execution_count": 14, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"ddf3.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"id": "89850644-abb6-45a8-822b-6d4ad7e1e4d2", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>IMD_Decile_From_LSOA</th>\n", | |
" <th>Age_Band</th>\n", | |
" <th>Sex</th>\n", | |
" <th>AE_Arrive_Date</th>\n", | |
" <th>AE_Arrive_HourOfDay</th>\n", | |
" <th>AE_Time_Mins</th>\n", | |
" <th>AE_HRG</th>\n", | |
" <th>AE_Num_Diagnoses</th>\n", | |
" <th>AE_Num_Investigations</th>\n", | |
" <th>AE_Num_Treatments</th>\n", | |
" <th>AE_Arrival_Mode</th>\n", | |
" <th>Provider_Patient_Distance_Miles</th>\n", | |
" <th>ProvID</th>\n", | |
" <th>Admitted_Flag</th>\n", | |
" <th>Admission_Method</th>\n", | |
" <th>ICD10_Chapter_Code</th>\n", | |
" <th>Treatment_Function_Code</th>\n", | |
" <th>Length_Of_Stay_Days</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>927217</th>\n", | |
" <td>7</td>\n", | |
" <td>1-17</td>\n", | |
" <td>1</td>\n", | |
" <td>2016-03-01 00:00:00</td>\n", | |
" <td>13-16</td>\n", | |
" <td>170</td>\n", | |
" <td>Medium</td>\n", | |
" <td>1</td>\n", | |
" <td>3</td>\n", | |
" <td>3</td>\n", | |
" <td>1</td>\n", | |
" <td>3</td>\n", | |
" <td>15264</td>\n", | |
" <td>0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>927218</th>\n", | |
" <td>4</td>\n", | |
" <td>18-24</td>\n", | |
" <td>2</td>\n", | |
" <td>2015-03-07 00:00:00</td>\n", | |
" <td>01-04</td>\n", | |
" <td>140</td>\n", | |
" <td>Low</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>1</td>\n", | |
" <td>15183</td>\n", | |
" <td>0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>927219</th>\n", | |
" <td>7</td>\n", | |
" <td>18-24</td>\n", | |
" <td>2</td>\n", | |
" <td>2014-05-03 00:00:00</td>\n", | |
" <td>13-16</td>\n", | |
" <td>100</td>\n", | |
" <td>Nothing</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>4</td>\n", | |
" <td>15206</td>\n", | |
" <td>0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>927220</th>\n", | |
" <td>7</td>\n", | |
" <td>45-64</td>\n", | |
" <td>1</td>\n", | |
" <td>2016-02-11 00:00:00</td>\n", | |
" <td>09-12</td>\n", | |
" <td>80</td>\n", | |
" <td>Medium</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>5</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" <td>15187</td>\n", | |
" <td>0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>927221</th>\n", | |
" <td>4</td>\n", | |
" <td>1-17</td>\n", | |
" <td>2</td>\n", | |
" <td>2017-12-04 00:00:00</td>\n", | |
" <td>17-20</td>\n", | |
" <td>130</td>\n", | |
" <td>Nothing</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" <td>15109</td>\n", | |
" <td>0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" IMD_Decile_From_LSOA Age_Band Sex AE_Arrive_Date \\\n", | |
"927217 7 1-17 1 2016-03-01 00:00:00 \n", | |
"927218 4 18-24 2 2015-03-07 00:00:00 \n", | |
"927219 7 18-24 2 2014-05-03 00:00:00 \n", | |
"927220 7 45-64 1 2016-02-11 00:00:00 \n", | |
"927221 4 1-17 2 2017-12-04 00:00:00 \n", | |
"\n", | |
" AE_Arrive_HourOfDay AE_Time_Mins AE_HRG AE_Num_Diagnoses \\\n", | |
"927217 13-16 170 Medium 1 \n", | |
"927218 01-04 140 Low 1 \n", | |
"927219 13-16 100 Nothing 1 \n", | |
"927220 09-12 80 Medium 0 \n", | |
"927221 17-20 130 Nothing 0 \n", | |
"\n", | |
" AE_Num_Investigations AE_Num_Treatments AE_Arrival_Mode \\\n", | |
"927217 3 3 1 \n", | |
"927218 0 1 2 \n", | |
"927219 1 1 2 \n", | |
"927220 1 5 2 \n", | |
"927221 1 1 2 \n", | |
"\n", | |
" Provider_Patient_Distance_Miles ProvID Admitted_Flag Admission_Method \\\n", | |
"927217 3 15264 0 NaN \n", | |
"927218 1 15183 0 NaN \n", | |
"927219 4 15206 0 NaN \n", | |
"927220 2 15187 0 NaN \n", | |
"927221 2 15109 0 NaN \n", | |
"\n", | |
" ICD10_Chapter_Code Treatment_Function_Code Length_Of_Stay_Days \n", | |
"927217 NaN NaN NaN \n", | |
"927218 NaN NaN NaN \n", | |
"927219 NaN NaN NaN \n", | |
"927220 NaN NaN NaN \n", | |
"927221 NaN NaN NaN " | |
] | |
}, | |
"execution_count": 15, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"ddf3.tail()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"id": "feb7b946-b8b1-4316-b2ab-f371ca394457", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Dask Series Structure:\n", | |
"npartitions=71\n", | |
" object\n", | |
" ...\n", | |
" ... \n", | |
" ...\n", | |
" ...\n", | |
"Name: AE_Arrive_Date, dtype: object\n", | |
"Dask Name: apply, 71 tasks" | |
] | |
}, | |
"execution_count": 16, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Works\n", | |
"ddf3['AE_Arrive_Date'].apply(generalize_timestamp, meta=('AE_Arrive_Date', 'object')).persist()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "94c50f04-265b-4972-bfae-d08fb62cdab6", | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.9.12" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment