Skip to content

Instantly share code, notes, and snippets.

@dalgu90
Created February 7, 2023 17:20
Show Gist options
  • Save dalgu90/0920903cb9d2faafd3e5137af9a0f172 to your computer and use it in GitHub Desktop.
Save dalgu90/0920903cb9d2faafd3e5137af9a0f172 to your computer and use it in GitHub Desktop.
Quick analysis on MIMIC-IV for automatic ICD coding
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "e192430f",
"metadata": {},
"outputs": [],
"source": [
"import collections\n",
"import csv\n",
"import json\n",
"import os\n",
"import pickle\n",
"import sys\n",
"import random\n",
"\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import pandas as pd\n",
"from tqdm.notebook import tqdm\n",
"import seaborn as sns"
]
},
{
"cell_type": "markdown",
"id": "f0c178dd",
"metadata": {},
"source": [
"### MIMIC-IV database"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "33080ef6",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"../mimic4/mimic-iv-2.2/\r\n",
"├── CHANGELOG.txt\r\n",
"├── hosp\r\n",
"│   ├── admissions.csv.gz\r\n",
"│   ├── d_hcpcs.csv.gz\r\n",
"│   ├── diagnoses_icd.csv.gz\r\n",
"│   ├── d_icd_diagnoses.csv.gz\r\n",
"│   ├── d_icd_procedures.csv.gz\r\n",
"│   ├── d_labitems.csv.gz\r\n",
"│   ├── drgcodes.csv.gz\r\n",
"│   ├── emar.csv.gz\r\n",
"│   ├── emar_detail.csv.gz\r\n",
"│   ├── hcpcsevents.csv.gz\r\n",
"│   ├── labevents.csv.gz\r\n",
"│   ├── microbiologyevents.csv.gz\r\n",
"│   ├── omr.csv.gz\r\n",
"│   ├── patients.csv.gz\r\n",
"│   ├── pharmacy.csv.gz\r\n",
"│   ├── poe.csv.gz\r\n",
"│   ├── poe_detail.csv.gz\r\n",
"│   ├── prescriptions.csv.gz\r\n",
"│   ├── procedures_icd.csv.gz\r\n",
"│   ├── provider.csv.gz\r\n",
"│   ├── services.csv.gz\r\n",
"│   └── transfers.csv.gz\r\n",
"├── icu\r\n",
"│   ├── caregiver.csv.gz\r\n",
"│   ├── chartevents.csv.gz\r\n",
"│   ├── datetimeevents.csv.gz\r\n",
"│   ├── d_items.csv.gz\r\n",
"│   ├── icustays.csv.gz\r\n",
"│   ├── ingredientevents.csv.gz\r\n",
"│   ├── inputevents.csv.gz\r\n",
"│   ├── outputevents.csv.gz\r\n",
"│   └── procedureevents.csv.gz\r\n",
"├── LICENSE.txt\r\n",
"├── note\r\n",
"│   ├── discharge.csv.gz\r\n",
"│   ├── discharge_detail.csv.gz\r\n",
"│   ├── radiology.csv.gz\r\n",
"│   └── radiology_detail.csv.gz\r\n",
"└── SHA256SUMS.txt\r\n",
"\r\n",
"3 directories, 38 files\r\n"
]
}
],
"source": [
"# File structure\n",
"mimic4_root = '../mimic4/mimic-iv-2.2/'\n",
"!tree -n {mimic4_root}"
]
},
{
"cell_type": "markdown",
"id": "831e4da8",
"metadata": {},
"source": [
"## 1. Patients & admissions"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "b937a721",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>subject_id</th>\n",
" <th>gender</th>\n",
" <th>anchor_age</th>\n",
" <th>anchor_year</th>\n",
" <th>anchor_year_group</th>\n",
" <th>dod</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10000032</td>\n",
" <td>F</td>\n",
" <td>52</td>\n",
" <td>2180</td>\n",
" <td>2014 - 2016</td>\n",
" <td>2180-09-09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10000048</td>\n",
" <td>F</td>\n",
" <td>23</td>\n",
" <td>2126</td>\n",
" <td>2008 - 2010</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>10000068</td>\n",
" <td>F</td>\n",
" <td>19</td>\n",
" <td>2160</td>\n",
" <td>2008 - 2010</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>10000084</td>\n",
" <td>M</td>\n",
" <td>72</td>\n",
" <td>2160</td>\n",
" <td>2017 - 2019</td>\n",
" <td>2161-02-13</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>10000102</td>\n",
" <td>F</td>\n",
" <td>27</td>\n",
" <td>2136</td>\n",
" <td>2008 - 2010</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>299707</th>\n",
" <td>19999828</td>\n",
" <td>F</td>\n",
" <td>46</td>\n",
" <td>2147</td>\n",
" <td>2017 - 2019</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>299708</th>\n",
" <td>19999829</td>\n",
" <td>F</td>\n",
" <td>28</td>\n",
" <td>2186</td>\n",
" <td>2008 - 2010</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>299709</th>\n",
" <td>19999840</td>\n",
" <td>M</td>\n",
" <td>58</td>\n",
" <td>2164</td>\n",
" <td>2008 - 2010</td>\n",
" <td>2164-09-17</td>\n",
" </tr>\n",
" <tr>\n",
" <th>299710</th>\n",
" <td>19999914</td>\n",
" <td>F</td>\n",
" <td>49</td>\n",
" <td>2158</td>\n",
" <td>2017 - 2019</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>299711</th>\n",
" <td>19999987</td>\n",
" <td>F</td>\n",
" <td>57</td>\n",
" <td>2145</td>\n",
" <td>2011 - 2013</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>299712 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" subject_id gender anchor_age anchor_year anchor_year_group \\\n",
"0 10000032 F 52 2180 2014 - 2016 \n",
"1 10000048 F 23 2126 2008 - 2010 \n",
"2 10000068 F 19 2160 2008 - 2010 \n",
"3 10000084 M 72 2160 2017 - 2019 \n",
"4 10000102 F 27 2136 2008 - 2010 \n",
"... ... ... ... ... ... \n",
"299707 19999828 F 46 2147 2017 - 2019 \n",
"299708 19999829 F 28 2186 2008 - 2010 \n",
"299709 19999840 M 58 2164 2008 - 2010 \n",
"299710 19999914 F 49 2158 2017 - 2019 \n",
"299711 19999987 F 57 2145 2011 - 2013 \n",
"\n",
" dod \n",
"0 2180-09-09 \n",
"1 NaN \n",
"2 NaN \n",
"3 2161-02-13 \n",
"4 NaN \n",
"... ... \n",
"299707 NaN \n",
"299708 NaN \n",
"299709 2164-09-17 \n",
"299710 NaN \n",
"299711 NaN \n",
"\n",
"[299712 rows x 6 columns]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Patient table (patients)\n",
"df_patient = pd.read_csv(os.path.join(mimic4_root, 'hosp', 'patients.csv.gz'),\n",
" dtype={'subject_id': 'string'})\n",
"df_patient"
]
},
{
"cell_type": "markdown",
"id": "1062a05d",
"metadata": {},
"source": [
"From the [MIMIC-IV data description page](https://mimic.mit.edu/docs/iv/),\n",
"- `anchor_year`: a deidentified shifted year between 2100 - 2200\n",
"- `anchor_year_group`: a three-year-long year range for actual year of anchor_year\n",
"- `anchor_age`: the patient's actual age in `anchor_year`\n",
"\n",
"For example, if a patient's `anchor_year` is 2153, `anchor_year_group` is 2008-2010, and `anchor_age` is 60:\n",
"- The year 2153 for the patient corresponds to 2008, 2009, or 2010.\n",
"- The patient was 60 in the shifted year of 2153, i.e. they were 60 in 2008, 2009, or 2010.\n",
"- A patient admission in 2154 will occur in 2009-2011, an admission in 2155 will occur in 2010-2012, and so on.\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "ad7c072f",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>subject_id</th>\n",
" <th>hadm_id</th>\n",
" <th>admittime</th>\n",
" <th>dischtime</th>\n",
" <th>deathtime</th>\n",
" <th>admission_type</th>\n",
" <th>admit_provider_id</th>\n",
" <th>admission_location</th>\n",
" <th>discharge_location</th>\n",
" <th>insurance</th>\n",
" <th>language</th>\n",
" <th>marital_status</th>\n",
" <th>race</th>\n",
" <th>edregtime</th>\n",
" <th>edouttime</th>\n",
" <th>hospital_expire_flag</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10000032</td>\n",
" <td>22595853</td>\n",
" <td>2180-05-06 22:23:00</td>\n",
" <td>2180-05-07 17:15:00</td>\n",
" <td>NaN</td>\n",
" <td>URGENT</td>\n",
" <td>P874LG</td>\n",
" <td>TRANSFER FROM HOSPITAL</td>\n",
" <td>HOME</td>\n",
" <td>Other</td>\n",
" <td>ENGLISH</td>\n",
" <td>WIDOWED</td>\n",
" <td>WHITE</td>\n",
" <td>2180-05-06 19:17:00</td>\n",
" <td>2180-05-06 23:30:00</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10000032</td>\n",
" <td>22841357</td>\n",
" <td>2180-06-26 18:27:00</td>\n",
" <td>2180-06-27 18:49:00</td>\n",
" <td>NaN</td>\n",
" <td>EW EMER.</td>\n",
" <td>P09Q6Y</td>\n",
" <td>EMERGENCY ROOM</td>\n",
" <td>HOME</td>\n",
" <td>Medicaid</td>\n",
" <td>ENGLISH</td>\n",
" <td>WIDOWED</td>\n",
" <td>WHITE</td>\n",
" <td>2180-06-26 15:54:00</td>\n",
" <td>2180-06-26 21:31:00</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>10000032</td>\n",
" <td>25742920</td>\n",
" <td>2180-08-05 23:44:00</td>\n",
" <td>2180-08-07 17:50:00</td>\n",
" <td>NaN</td>\n",
" <td>EW EMER.</td>\n",
" <td>P60CC5</td>\n",
" <td>EMERGENCY ROOM</td>\n",
" <td>HOSPICE</td>\n",
" <td>Medicaid</td>\n",
" <td>ENGLISH</td>\n",
" <td>WIDOWED</td>\n",
" <td>WHITE</td>\n",
" <td>2180-08-05 20:58:00</td>\n",
" <td>2180-08-06 01:44:00</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>10000032</td>\n",
" <td>29079034</td>\n",
" <td>2180-07-23 12:35:00</td>\n",
" <td>2180-07-25 17:55:00</td>\n",
" <td>NaN</td>\n",
" <td>EW EMER.</td>\n",
" <td>P30KEH</td>\n",
" <td>EMERGENCY ROOM</td>\n",
" <td>HOME</td>\n",
" <td>Medicaid</td>\n",
" <td>ENGLISH</td>\n",
" <td>WIDOWED</td>\n",
" <td>WHITE</td>\n",
" <td>2180-07-23 05:54:00</td>\n",
" <td>2180-07-23 14:00:00</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>10000068</td>\n",
" <td>25022803</td>\n",
" <td>2160-03-03 23:16:00</td>\n",
" <td>2160-03-04 06:26:00</td>\n",
" <td>NaN</td>\n",
" <td>EU OBSERVATION</td>\n",
" <td>P51VDL</td>\n",
" <td>EMERGENCY ROOM</td>\n",
" <td>NaN</td>\n",
" <td>Other</td>\n",
" <td>ENGLISH</td>\n",
" <td>SINGLE</td>\n",
" <td>WHITE</td>\n",
" <td>2160-03-03 21:55:00</td>\n",
" <td>2160-03-04 06:26:00</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>431226</th>\n",
" <td>19999828</td>\n",
" <td>25744818</td>\n",
" <td>2149-01-08 16:44:00</td>\n",
" <td>2149-01-18 17:00:00</td>\n",
" <td>NaN</td>\n",
" <td>EW EMER.</td>\n",
" <td>P75BG6</td>\n",
" <td>TRANSFER FROM HOSPITAL</td>\n",
" <td>HOME HEALTH CARE</td>\n",
" <td>Other</td>\n",
" <td>ENGLISH</td>\n",
" <td>SINGLE</td>\n",
" <td>WHITE</td>\n",
" <td>2149-01-08 09:11:00</td>\n",
" <td>2149-01-08 18:12:00</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>431227</th>\n",
" <td>19999828</td>\n",
" <td>29734428</td>\n",
" <td>2147-07-18 16:23:00</td>\n",
" <td>2147-08-04 18:10:00</td>\n",
" <td>NaN</td>\n",
" <td>EW EMER.</td>\n",
" <td>P16C7J</td>\n",
" <td>PHYSICIAN REFERRAL</td>\n",
" <td>HOME HEALTH CARE</td>\n",
" <td>Other</td>\n",
" <td>ENGLISH</td>\n",
" <td>SINGLE</td>\n",
" <td>WHITE</td>\n",
" <td>2147-07-17 17:18:00</td>\n",
" <td>2147-07-18 17:34:00</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>431228</th>\n",
" <td>19999840</td>\n",
" <td>21033226</td>\n",
" <td>2164-09-10 13:47:00</td>\n",
" <td>2164-09-17 13:42:00</td>\n",
" <td>2164-09-17 13:42:00</td>\n",
" <td>EW EMER.</td>\n",
" <td>P58A9J</td>\n",
" <td>EMERGENCY ROOM</td>\n",
" <td>DIED</td>\n",
" <td>Other</td>\n",
" <td>ENGLISH</td>\n",
" <td>WIDOWED</td>\n",
" <td>WHITE</td>\n",
" <td>2164-09-10 11:09:00</td>\n",
" <td>2164-09-10 14:46:00</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>431229</th>\n",
" <td>19999840</td>\n",
" <td>26071774</td>\n",
" <td>2164-07-25 00:27:00</td>\n",
" <td>2164-07-28 12:15:00</td>\n",
" <td>NaN</td>\n",
" <td>EW EMER.</td>\n",
" <td>P506DE</td>\n",
" <td>EMERGENCY ROOM</td>\n",
" <td>HOME</td>\n",
" <td>Other</td>\n",
" <td>ENGLISH</td>\n",
" <td>WIDOWED</td>\n",
" <td>WHITE</td>\n",
" <td>2164-07-24 21:16:00</td>\n",
" <td>2164-07-25 01:20:00</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>431230</th>\n",
" <td>19999987</td>\n",
" <td>23865745</td>\n",
" <td>2145-11-02 21:38:00</td>\n",
" <td>2145-11-11 12:57:00</td>\n",
" <td>NaN</td>\n",
" <td>EW EMER.</td>\n",
" <td>P09IS0</td>\n",
" <td>EMERGENCY ROOM</td>\n",
" <td>REHAB</td>\n",
" <td>Other</td>\n",
" <td>ENGLISH</td>\n",
" <td>NaN</td>\n",
" <td>UNKNOWN</td>\n",
" <td>2145-11-02 19:28:00</td>\n",
" <td>2145-11-02 22:59:00</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>431231 rows × 16 columns</p>\n",
"</div>"
],
"text/plain": [
" subject_id hadm_id admittime dischtime \\\n",
"0 10000032 22595853 2180-05-06 22:23:00 2180-05-07 17:15:00 \n",
"1 10000032 22841357 2180-06-26 18:27:00 2180-06-27 18:49:00 \n",
"2 10000032 25742920 2180-08-05 23:44:00 2180-08-07 17:50:00 \n",
"3 10000032 29079034 2180-07-23 12:35:00 2180-07-25 17:55:00 \n",
"4 10000068 25022803 2160-03-03 23:16:00 2160-03-04 06:26:00 \n",
"... ... ... ... ... \n",
"431226 19999828 25744818 2149-01-08 16:44:00 2149-01-18 17:00:00 \n",
"431227 19999828 29734428 2147-07-18 16:23:00 2147-08-04 18:10:00 \n",
"431228 19999840 21033226 2164-09-10 13:47:00 2164-09-17 13:42:00 \n",
"431229 19999840 26071774 2164-07-25 00:27:00 2164-07-28 12:15:00 \n",
"431230 19999987 23865745 2145-11-02 21:38:00 2145-11-11 12:57:00 \n",
"\n",
" deathtime admission_type admit_provider_id \\\n",
"0 NaN URGENT P874LG \n",
"1 NaN EW EMER. P09Q6Y \n",
"2 NaN EW EMER. P60CC5 \n",
"3 NaN EW EMER. P30KEH \n",
"4 NaN EU OBSERVATION P51VDL \n",
"... ... ... ... \n",
"431226 NaN EW EMER. P75BG6 \n",
"431227 NaN EW EMER. P16C7J \n",
"431228 2164-09-17 13:42:00 EW EMER. P58A9J \n",
"431229 NaN EW EMER. P506DE \n",
"431230 NaN EW EMER. P09IS0 \n",
"\n",
" admission_location discharge_location insurance language \\\n",
"0 TRANSFER FROM HOSPITAL HOME Other ENGLISH \n",
"1 EMERGENCY ROOM HOME Medicaid ENGLISH \n",
"2 EMERGENCY ROOM HOSPICE Medicaid ENGLISH \n",
"3 EMERGENCY ROOM HOME Medicaid ENGLISH \n",
"4 EMERGENCY ROOM NaN Other ENGLISH \n",
"... ... ... ... ... \n",
"431226 TRANSFER FROM HOSPITAL HOME HEALTH CARE Other ENGLISH \n",
"431227 PHYSICIAN REFERRAL HOME HEALTH CARE Other ENGLISH \n",
"431228 EMERGENCY ROOM DIED Other ENGLISH \n",
"431229 EMERGENCY ROOM HOME Other ENGLISH \n",
"431230 EMERGENCY ROOM REHAB Other ENGLISH \n",
"\n",
" marital_status race edregtime edouttime \\\n",
"0 WIDOWED WHITE 2180-05-06 19:17:00 2180-05-06 23:30:00 \n",
"1 WIDOWED WHITE 2180-06-26 15:54:00 2180-06-26 21:31:00 \n",
"2 WIDOWED WHITE 2180-08-05 20:58:00 2180-08-06 01:44:00 \n",
"3 WIDOWED WHITE 2180-07-23 05:54:00 2180-07-23 14:00:00 \n",
"4 SINGLE WHITE 2160-03-03 21:55:00 2160-03-04 06:26:00 \n",
"... ... ... ... ... \n",
"431226 SINGLE WHITE 2149-01-08 09:11:00 2149-01-08 18:12:00 \n",
"431227 SINGLE WHITE 2147-07-17 17:18:00 2147-07-18 17:34:00 \n",
"431228 WIDOWED WHITE 2164-09-10 11:09:00 2164-09-10 14:46:00 \n",
"431229 WIDOWED WHITE 2164-07-24 21:16:00 2164-07-25 01:20:00 \n",
"431230 NaN UNKNOWN 2145-11-02 19:28:00 2145-11-02 22:59:00 \n",
"\n",
" hospital_expire_flag \n",
"0 0 \n",
"1 0 \n",
"2 0 \n",
"3 0 \n",
"4 0 \n",
"... ... \n",
"431226 0 \n",
"431227 0 \n",
"431228 1 \n",
"431229 0 \n",
"431230 0 \n",
"\n",
"[431231 rows x 16 columns]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Admission table (admissions)\n",
"df_admission = pd.read_csv(os.path.join(mimic4_root, 'hosp', 'admissions.csv.gz'),\n",
" dtype={'subject_id': 'string', 'hadm_id': 'string'})\n",
"df_admission"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "7d1bc926",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"431231 stays in the hospital\n",
"180733 unique patients in the hospital\n"
]
}
],
"source": [
"print(f'{len(df_admission)} stays in the hospital')\n",
"print(f'{len(df_admission.subject_id.unique())} unique patients in the hospital')"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "9a820082",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>subject_id</th>\n",
" <th>hadm_id</th>\n",
" <th>stay_id</th>\n",
" <th>first_careunit</th>\n",
" <th>last_careunit</th>\n",
" <th>intime</th>\n",
" <th>outtime</th>\n",
" <th>los</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10000032</td>\n",
" <td>29079034</td>\n",
" <td>39553978</td>\n",
" <td>Medical Intensive Care Unit (MICU)</td>\n",
" <td>Medical Intensive Care Unit (MICU)</td>\n",
" <td>2180-07-23 14:00:00</td>\n",
" <td>2180-07-23 23:50:47</td>\n",
" <td>0.410266</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10000980</td>\n",
" <td>26913865</td>\n",
" <td>39765666</td>\n",
" <td>Medical Intensive Care Unit (MICU)</td>\n",
" <td>Medical Intensive Care Unit (MICU)</td>\n",
" <td>2189-06-27 08:42:00</td>\n",
" <td>2189-06-27 20:38:27</td>\n",
" <td>0.497535</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>10001217</td>\n",
" <td>24597018</td>\n",
" <td>37067082</td>\n",
" <td>Surgical Intensive Care Unit (SICU)</td>\n",
" <td>Surgical Intensive Care Unit (SICU)</td>\n",
" <td>2157-11-20 19:18:02</td>\n",
" <td>2157-11-21 22:08:00</td>\n",
" <td>1.118032</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>10001217</td>\n",
" <td>27703517</td>\n",
" <td>34592300</td>\n",
" <td>Surgical Intensive Care Unit (SICU)</td>\n",
" <td>Surgical Intensive Care Unit (SICU)</td>\n",
" <td>2157-12-19 15:42:24</td>\n",
" <td>2157-12-20 14:27:41</td>\n",
" <td>0.948113</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>10001725</td>\n",
" <td>25563031</td>\n",
" <td>31205490</td>\n",
" <td>Medical/Surgical Intensive Care Unit (MICU/SICU)</td>\n",
" <td>Medical/Surgical Intensive Care Unit (MICU/SICU)</td>\n",
" <td>2110-04-11 15:52:22</td>\n",
" <td>2110-04-12 23:59:56</td>\n",
" <td>1.338588</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>73176</th>\n",
" <td>19999442</td>\n",
" <td>26785317</td>\n",
" <td>32336619</td>\n",
" <td>Surgical Intensive Care Unit (SICU)</td>\n",
" <td>Surgical Intensive Care Unit (SICU)</td>\n",
" <td>2148-11-19 14:23:43</td>\n",
" <td>2148-11-26 13:12:15</td>\n",
" <td>6.950370</td>\n",
" </tr>\n",
" <tr>\n",
" <th>73177</th>\n",
" <td>19999625</td>\n",
" <td>25304202</td>\n",
" <td>31070865</td>\n",
" <td>Medical/Surgical Intensive Care Unit (MICU/SICU)</td>\n",
" <td>Medical/Surgical Intensive Care Unit (MICU/SICU)</td>\n",
" <td>2139-10-10 19:18:00</td>\n",
" <td>2139-10-11 18:21:28</td>\n",
" <td>0.960741</td>\n",
" </tr>\n",
" <tr>\n",
" <th>73178</th>\n",
" <td>19999828</td>\n",
" <td>25744818</td>\n",
" <td>36075953</td>\n",
" <td>Medical Intensive Care Unit (MICU)</td>\n",
" <td>Medical Intensive Care Unit (MICU)</td>\n",
" <td>2149-01-08 18:12:00</td>\n",
" <td>2149-01-10 13:11:02</td>\n",
" <td>1.790995</td>\n",
" </tr>\n",
" <tr>\n",
" <th>73179</th>\n",
" <td>19999840</td>\n",
" <td>21033226</td>\n",
" <td>38978960</td>\n",
" <td>Trauma SICU (TSICU)</td>\n",
" <td>Surgical Intensive Care Unit (SICU)</td>\n",
" <td>2164-09-12 09:26:28</td>\n",
" <td>2164-09-17 16:35:15</td>\n",
" <td>5.297766</td>\n",
" </tr>\n",
" <tr>\n",
" <th>73180</th>\n",
" <td>19999987</td>\n",
" <td>23865745</td>\n",
" <td>36195440</td>\n",
" <td>Trauma SICU (TSICU)</td>\n",
" <td>Trauma SICU (TSICU)</td>\n",
" <td>2145-11-02 22:59:00</td>\n",
" <td>2145-11-04 21:29:30</td>\n",
" <td>1.937847</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>73181 rows × 8 columns</p>\n",
"</div>"
],
"text/plain": [
" subject_id hadm_id stay_id \\\n",
"0 10000032 29079034 39553978 \n",
"1 10000980 26913865 39765666 \n",
"2 10001217 24597018 37067082 \n",
"3 10001217 27703517 34592300 \n",
"4 10001725 25563031 31205490 \n",
"... ... ... ... \n",
"73176 19999442 26785317 32336619 \n",
"73177 19999625 25304202 31070865 \n",
"73178 19999828 25744818 36075953 \n",
"73179 19999840 21033226 38978960 \n",
"73180 19999987 23865745 36195440 \n",
"\n",
" first_careunit \\\n",
"0 Medical Intensive Care Unit (MICU) \n",
"1 Medical Intensive Care Unit (MICU) \n",
"2 Surgical Intensive Care Unit (SICU) \n",
"3 Surgical Intensive Care Unit (SICU) \n",
"4 Medical/Surgical Intensive Care Unit (MICU/SICU) \n",
"... ... \n",
"73176 Surgical Intensive Care Unit (SICU) \n",
"73177 Medical/Surgical Intensive Care Unit (MICU/SICU) \n",
"73178 Medical Intensive Care Unit (MICU) \n",
"73179 Trauma SICU (TSICU) \n",
"73180 Trauma SICU (TSICU) \n",
"\n",
" last_careunit intime \\\n",
"0 Medical Intensive Care Unit (MICU) 2180-07-23 14:00:00 \n",
"1 Medical Intensive Care Unit (MICU) 2189-06-27 08:42:00 \n",
"2 Surgical Intensive Care Unit (SICU) 2157-11-20 19:18:02 \n",
"3 Surgical Intensive Care Unit (SICU) 2157-12-19 15:42:24 \n",
"4 Medical/Surgical Intensive Care Unit (MICU/SICU) 2110-04-11 15:52:22 \n",
"... ... ... \n",
"73176 Surgical Intensive Care Unit (SICU) 2148-11-19 14:23:43 \n",
"73177 Medical/Surgical Intensive Care Unit (MICU/SICU) 2139-10-10 19:18:00 \n",
"73178 Medical Intensive Care Unit (MICU) 2149-01-08 18:12:00 \n",
"73179 Surgical Intensive Care Unit (SICU) 2164-09-12 09:26:28 \n",
"73180 Trauma SICU (TSICU) 2145-11-02 22:59:00 \n",
"\n",
" outtime los \n",
"0 2180-07-23 23:50:47 0.410266 \n",
"1 2189-06-27 20:38:27 0.497535 \n",
"2 2157-11-21 22:08:00 1.118032 \n",
"3 2157-12-20 14:27:41 0.948113 \n",
"4 2110-04-12 23:59:56 1.338588 \n",
"... ... ... \n",
"73176 2148-11-26 13:12:15 6.950370 \n",
"73177 2139-10-11 18:21:28 0.960741 \n",
"73178 2149-01-10 13:11:02 1.790995 \n",
"73179 2164-09-17 16:35:15 5.297766 \n",
"73180 2145-11-04 21:29:30 1.937847 \n",
"\n",
"[73181 rows x 8 columns]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# ICU stay table (icustays)\n",
"df_icu = pd.read_csv(os.path.join(mimic4_root, 'icu', 'icustays.csv.gz'),\n",
" dtype={'subject_id': 'string', 'hadm_id': 'string', 'stay_id': 'string'})\n",
"df_icu"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "e8048b75",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"73181 ICU stays\n",
"66239 unique admissions in the hospital\n"
]
}
],
"source": [
"print(f'{len(df_icu)} ICU stays')\n",
"print(f'{len(df_icu.hadm_id.unique())} unique admissions in the hospital')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6073e052",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"id": "0b7862e4",
"metadata": {},
"source": [
"## 2. Diagnoses / procedures"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "6529c95a",
"metadata": {},
"outputs": [],
"source": [
"df_diag = pd.read_csv(os.path.join(mimic4_root, 'hosp', 'diagnoses_icd.csv.gz'),\n",
" dtype={\"icd_code\": \"string\", \"subject_id\": \"string\", \"hadm_id\": \"string\"})\n",
"df_proc = pd.read_csv(os.path.join(mimic4_root, 'hosp', 'procedures_icd.csv.gz'),\n",
" dtype={\"icd_code\": \"string\", \"subject_id\": \"string\", \"hadm_id\": \"string\"})"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "0d02c85b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"4756326 diagnoses of 180640 patients and 430852 admissions\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>subject_id</th>\n",
" <th>hadm_id</th>\n",
" <th>seq_num</th>\n",
" <th>icd_code</th>\n",
" <th>icd_version</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10000032</td>\n",
" <td>22595853</td>\n",
" <td>1</td>\n",
" <td>5723</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10000032</td>\n",
" <td>22595853</td>\n",
" <td>2</td>\n",
" <td>78959</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>10000032</td>\n",
" <td>22595853</td>\n",
" <td>3</td>\n",
" <td>5715</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>10000032</td>\n",
" <td>22595853</td>\n",
" <td>4</td>\n",
" <td>07070</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>10000032</td>\n",
" <td>22595853</td>\n",
" <td>5</td>\n",
" <td>496</td>\n",
" <td>9</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" subject_id hadm_id seq_num icd_code icd_version\n",
"0 10000032 22595853 1 5723 9\n",
"1 10000032 22595853 2 78959 9\n",
"2 10000032 22595853 3 5715 9\n",
"3 10000032 22595853 4 07070 9\n",
"4 10000032 22595853 5 496 9"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"print(f'{len(df_diag)} diagnoses of {len(df_diag.subject_id.unique())} patients and {len(df_diag.hadm_id.unique())} admissions')\n",
"df_diag.head()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "09334b4d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"669186 procedures of 121891 patients and 229445 admissions\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>subject_id</th>\n",
" <th>hadm_id</th>\n",
" <th>seq_num</th>\n",
" <th>chartdate</th>\n",
" <th>icd_code</th>\n",
" <th>icd_version</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10000032</td>\n",
" <td>22595853</td>\n",
" <td>1</td>\n",
" <td>2180-05-07</td>\n",
" <td>5491</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10000032</td>\n",
" <td>22841357</td>\n",
" <td>1</td>\n",
" <td>2180-06-27</td>\n",
" <td>5491</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>10000032</td>\n",
" <td>25742920</td>\n",
" <td>1</td>\n",
" <td>2180-08-06</td>\n",
" <td>5491</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>10000068</td>\n",
" <td>25022803</td>\n",
" <td>1</td>\n",
" <td>2160-03-03</td>\n",
" <td>8938</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>10000117</td>\n",
" <td>27988844</td>\n",
" <td>1</td>\n",
" <td>2183-09-19</td>\n",
" <td>0QS734Z</td>\n",
" <td>10</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" subject_id hadm_id seq_num chartdate icd_code icd_version\n",
"0 10000032 22595853 1 2180-05-07 5491 9\n",
"1 10000032 22841357 1 2180-06-27 5491 9\n",
"2 10000032 25742920 1 2180-08-06 5491 9\n",
"3 10000068 25022803 1 2160-03-03 8938 9\n",
"4 10000117 27988844 1 2183-09-19 0QS734Z 10"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"print(f'{len(df_proc)} procedures of {len(df_proc.subject_id.unique())} patients and {len(df_proc.hadm_id.unique())} admissions')\n",
"df_proc.head()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "197cf284",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"180650 unique patients\n",
"430876 unique admissions\n"
]
}
],
"source": [
"print(f'{len(set(df_diag.subject_id) | set(df_proc.subject_id))} unique patients')\n",
"print(f'{len(set(df_diag.hadm_id) | set(df_proc.hadm_id))} unique admissions')"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "8e7baae8",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 9072 unique ICD-9 diagnoses used\n",
"16757 unique ICD-10 diagnoses used\n",
" 2544 unique ICD-9 procedures used\n",
"10031 unique ICD-10 procedures used\n"
]
}
],
"source": [
"print(f'{len(set(df_diag[df_diag.icd_version==9].icd_code)):5d} unique ICD-9 diagnoses used')\n",
"print(f'{len(set(df_diag[df_diag.icd_version==10].icd_code)):5d} unique ICD-10 diagnoses used')\n",
"print(f'{len(set(df_proc[df_proc.icd_version==9].icd_code)):5d} unique ICD-9 procedures used')\n",
"print(f'{len(set(df_proc[df_proc.icd_version==10].icd_code)):5d} unique ICD-10 procedures used')"
]
},
{
"cell_type": "markdown",
"id": "1e31d53b",
"metadata": {},
"source": [
"- There are two versions of ICD codes provided by MIMIC-IV ($\\texttt{icd}\\_\\texttt{version} \\in \\{\\texttt{9}, \\texttt{10}\\}$)."
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "955649f9",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2766877 ICD-9 codes diagnosed to 276803 admissions\n",
"1989449 ICD-10 codes diagnosed to 154059 admissions\n"
]
}
],
"source": [
"diag_icd_counter = collections.Counter(df_diag.icd_version)\n",
"print(f'{diag_icd_counter[9]} ICD-9 codes diagnosed to {len(df_diag[df_diag.icd_version == 9].hadm_id.unique())} admissions')\n",
"print(f'{diag_icd_counter[10]} ICD-10 codes diagnosed to {len(df_diag[df_diag.icd_version == 10].hadm_id.unique())} admissions')"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "ecda2585",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'29918558', '21575927', '23969809', '23076003', '27635276', '27016754', '28333632', '24773199', '22870827', '20216016'}\n"
]
}
],
"source": [
"# HADM_IDs with both ICD-9 and ICD-10 diagnoses\n",
"print(set(df_diag[df_diag.icd_version == 9].hadm_id) & set(df_diag[df_diag.icd_version == 10].hadm_id))"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "5e146ee9",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>subject_id</th>\n",
" <th>hadm_id</th>\n",
" <th>seq_num</th>\n",
" <th>icd_code</th>\n",
" <th>icd_version</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>818</th>\n",
" <td>10001884</td>\n",
" <td>27016754</td>\n",
" <td>1</td>\n",
" <td>49322</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>819</th>\n",
" <td>10001884</td>\n",
" <td>27016754</td>\n",
" <td>1</td>\n",
" <td>J441</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>820</th>\n",
" <td>10001884</td>\n",
" <td>27016754</td>\n",
" <td>2</td>\n",
" <td>3051</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>821</th>\n",
" <td>10001884</td>\n",
" <td>27016754</td>\n",
" <td>2</td>\n",
" <td>F17210</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>822</th>\n",
" <td>10001884</td>\n",
" <td>27016754</td>\n",
" <td>3</td>\n",
" <td>78650</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4222526</th>\n",
" <td>18862236</td>\n",
" <td>22870827</td>\n",
" <td>8</td>\n",
" <td>Z87891</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4222527</th>\n",
" <td>18862236</td>\n",
" <td>22870827</td>\n",
" <td>9</td>\n",
" <td>53081</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4222528</th>\n",
" <td>18862236</td>\n",
" <td>22870827</td>\n",
" <td>9</td>\n",
" <td>K219</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4222529</th>\n",
" <td>18862236</td>\n",
" <td>22870827</td>\n",
" <td>10</td>\n",
" <td>31400</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4222530</th>\n",
" <td>18862236</td>\n",
" <td>22870827</td>\n",
" <td>10</td>\n",
" <td>F909</td>\n",
" <td>10</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>249 rows × 5 columns</p>\n",
"</div>"
],
"text/plain": [
" subject_id hadm_id seq_num icd_code icd_version\n",
"818 10001884 27016754 1 49322 9\n",
"819 10001884 27016754 1 J441 10\n",
"820 10001884 27016754 2 3051 9\n",
"821 10001884 27016754 2 F17210 10\n",
"822 10001884 27016754 3 78650 9\n",
"... ... ... ... ... ...\n",
"4222526 18862236 22870827 8 Z87891 10\n",
"4222527 18862236 22870827 9 53081 9\n",
"4222528 18862236 22870827 9 K219 10\n",
"4222529 18862236 22870827 10 31400 9\n",
"4222530 18862236 22870827 10 F909 10\n",
"\n",
"[249 rows x 5 columns]"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"hadm_id_set_temp = set(df_diag[df_diag.icd_version == 9].hadm_id) & set(df_diag[df_diag.icd_version == 10].hadm_id)\n",
"check_fn = lambda x: x in hadm_id_set_temp\n",
"df_diag[df_diag.hadm_id.map(check_fn)]"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "29513ea6",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>subject_id</th>\n",
" <th>hadm_id</th>\n",
" <th>seq_num</th>\n",
" <th>icd_code</th>\n",
" <th>icd_version</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>818</th>\n",
" <td>10001884</td>\n",
" <td>27016754</td>\n",
" <td>1</td>\n",
" <td>49322</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>819</th>\n",
" <td>10001884</td>\n",
" <td>27016754</td>\n",
" <td>1</td>\n",
" <td>J441</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>820</th>\n",
" <td>10001884</td>\n",
" <td>27016754</td>\n",
" <td>2</td>\n",
" <td>3051</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>821</th>\n",
" <td>10001884</td>\n",
" <td>27016754</td>\n",
" <td>2</td>\n",
" <td>F17210</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>822</th>\n",
" <td>10001884</td>\n",
" <td>27016754</td>\n",
" <td>3</td>\n",
" <td>78650</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>823</th>\n",
" <td>10001884</td>\n",
" <td>27016754</td>\n",
" <td>3</td>\n",
" <td>R079</td>\n",
" <td>10</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" subject_id hadm_id seq_num icd_code icd_version\n",
"818 10001884 27016754 1 49322 9\n",
"819 10001884 27016754 1 J441 10\n",
"820 10001884 27016754 2 3051 9\n",
"821 10001884 27016754 2 F17210 10\n",
"822 10001884 27016754 3 78650 9\n",
"823 10001884 27016754 3 R079 10"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_diag[df_diag.hadm_id == '27016754']"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "44d83f4f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"24123\n"
]
}
],
"source": [
"# SUBJECT_ID with both ICD-9 and ICD-10 procedures\n",
"print(len(set(df_diag[df_diag.icd_version == 9].subject_id) & set(df_diag[df_diag.icd_version == 10].subject_id)))"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "e9ba65c0",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"24123 patients have ICD-9 and ICD-10 diagnoses\n",
"['19802368', '15791003', '14814315', '13105954', '12378122', '16516105', '11115884', '17557763', '16170354', '17322687']\n"
]
}
],
"source": [
"# SUBJECT_ID with both ICD-9 and ICD-10 diagnoses\n",
"subject_id_set_diag_both = set(df_diag[df_diag.icd_version == 9].subject_id) & set(df_diag[df_diag.icd_version == 10].subject_id)\n",
"print(f'{len(subject_id_set_diag_both)} patients have ICD-9 and ICD-10 diagnoses')\n",
"print(list(subject_id_set_diag_both)[:10])"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "c3066866",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"446079 ICD-9 codes applied to 155891 patients\n",
"223107 ICD-10 codes applied to 73555 patients\n"
]
}
],
"source": [
"proc_icd_counter = collections.Counter(df_proc.icd_version)\n",
"print(f'{proc_icd_counter[9]} ICD-9 codes applied to {len(df_proc[df_proc.icd_version == 9].hadm_id.unique())} patients')\n",
"print(f'{proc_icd_counter[10]} ICD-10 codes applied to {len(df_proc[df_proc.icd_version == 10].hadm_id.unique())} patients')"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "49278514",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'27635276'}\n"
]
}
],
"source": [
"# HADM_IDs with both ICD-9 and ICD-10 procedures\n",
"print(set(df_proc[df_proc.icd_version == 9].hadm_id) & set(df_proc[df_proc.icd_version == 10].hadm_id))"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "2fd29daa",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>subject_id</th>\n",
" <th>hadm_id</th>\n",
" <th>seq_num</th>\n",
" <th>chartdate</th>\n",
" <th>icd_code</th>\n",
" <th>icd_version</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>331953</th>\n",
" <td>14975184</td>\n",
" <td>27635276</td>\n",
" <td>1</td>\n",
" <td>2193-12-21</td>\n",
" <td>8411</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>331954</th>\n",
" <td>14975184</td>\n",
" <td>27635276</td>\n",
" <td>1</td>\n",
" <td>2193-12-28</td>\n",
" <td>5A1D60Z</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>331955</th>\n",
" <td>14975184</td>\n",
" <td>27635276</td>\n",
" <td>2</td>\n",
" <td>2193-12-21</td>\n",
" <td>0Y6X0Z0</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>331956</th>\n",
" <td>14975184</td>\n",
" <td>27635276</td>\n",
" <td>2</td>\n",
" <td>2193-12-21</td>\n",
" <td>3995</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>331957</th>\n",
" <td>14975184</td>\n",
" <td>27635276</td>\n",
" <td>3</td>\n",
" <td>2193-12-21</td>\n",
" <td>5A1D00Z</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>331958</th>\n",
" <td>14975184</td>\n",
" <td>27635276</td>\n",
" <td>4</td>\n",
" <td>2193-12-31</td>\n",
" <td>0DJD8ZZ</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>331959</th>\n",
" <td>14975184</td>\n",
" <td>27635276</td>\n",
" <td>5</td>\n",
" <td>2194-01-01</td>\n",
" <td>05H533Z</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>331960</th>\n",
" <td>14975184</td>\n",
" <td>27635276</td>\n",
" <td>6</td>\n",
" <td>2194-01-01</td>\n",
" <td>B546ZZA</td>\n",
" <td>10</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" subject_id hadm_id seq_num chartdate icd_code icd_version\n",
"331953 14975184 27635276 1 2193-12-21 8411 9\n",
"331954 14975184 27635276 1 2193-12-28 5A1D60Z 10\n",
"331955 14975184 27635276 2 2193-12-21 0Y6X0Z0 10\n",
"331956 14975184 27635276 2 2193-12-21 3995 9\n",
"331957 14975184 27635276 3 2193-12-21 5A1D00Z 10\n",
"331958 14975184 27635276 4 2193-12-31 0DJD8ZZ 10\n",
"331959 14975184 27635276 5 2194-01-01 05H533Z 10\n",
"331960 14975184 27635276 6 2194-01-01 B546ZZA 10"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_proc[df_proc.hadm_id == '27635276']"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "dbe831a8",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"12274 patients have ICD-9 and ICD-10 procedures\n",
"['19133405', '16238625', '17848200', '19514041', '17207245', '10165779', '17649929', '12416453', '15853302', '13559141']\n"
]
}
],
"source": [
"# SUBJECT_ID with both ICD-9 and ICD-10 procedures\n",
"subject_id_set_proc_both = set(df_proc[df_proc.icd_version == 9].subject_id) & set(df_proc[df_proc.icd_version == 10].subject_id)\n",
"print(f'{len(subject_id_set_proc_both)} patients have ICD-9 and ICD-10 procedures')\n",
"print(list(subject_id_set_proc_both)[:10])"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "b86c5b5c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>subject_id</th>\n",
" <th>hadm_id</th>\n",
" <th>seq_num</th>\n",
" <th>chartdate</th>\n",
" <th>icd_code</th>\n",
" <th>icd_version</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>174215</th>\n",
" <td>12615684</td>\n",
" <td>23994038</td>\n",
" <td>1</td>\n",
" <td>2184-09-21</td>\n",
" <td>00B70ZZ</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>174216</th>\n",
" <td>12615684</td>\n",
" <td>24108892</td>\n",
" <td>1</td>\n",
" <td>2184-12-13</td>\n",
" <td>3E04305</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>174217</th>\n",
" <td>12615684</td>\n",
" <td>24108892</td>\n",
" <td>2</td>\n",
" <td>2184-12-09</td>\n",
" <td>0JH60WZ</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>174218</th>\n",
" <td>12615684</td>\n",
" <td>24108892</td>\n",
" <td>3</td>\n",
" <td>2184-12-09</td>\n",
" <td>02H633Z</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>174219</th>\n",
" <td>12615684</td>\n",
" <td>24602178</td>\n",
" <td>1</td>\n",
" <td>2179-11-14</td>\n",
" <td>4576</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>174220</th>\n",
" <td>12615684</td>\n",
" <td>24602178</td>\n",
" <td>2</td>\n",
" <td>2179-11-14</td>\n",
" <td>6839</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>174221</th>\n",
" <td>12615684</td>\n",
" <td>24602178</td>\n",
" <td>3</td>\n",
" <td>2179-11-14</td>\n",
" <td>4719</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>174222</th>\n",
" <td>12615684</td>\n",
" <td>24602178</td>\n",
" <td>4</td>\n",
" <td>2179-11-14</td>\n",
" <td>6561</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>174223</th>\n",
" <td>12615684</td>\n",
" <td>24602178</td>\n",
" <td>5</td>\n",
" <td>2179-11-14</td>\n",
" <td>5459</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>174224</th>\n",
" <td>12615684</td>\n",
" <td>24602178</td>\n",
" <td>6</td>\n",
" <td>2179-11-14</td>\n",
" <td>598</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>174225</th>\n",
" <td>12615684</td>\n",
" <td>24602178</td>\n",
" <td>7</td>\n",
" <td>2179-11-14</td>\n",
" <td>0390</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>174226</th>\n",
" <td>12615684</td>\n",
" <td>24718569</td>\n",
" <td>1</td>\n",
" <td>2182-06-02</td>\n",
" <td>5361</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>174227</th>\n",
" <td>12615684</td>\n",
" <td>24718569</td>\n",
" <td>2</td>\n",
" <td>2182-06-02</td>\n",
" <td>4562</td>\n",
" <td>9</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" subject_id hadm_id seq_num chartdate icd_code icd_version\n",
"174215 12615684 23994038 1 2184-09-21 00B70ZZ 10\n",
"174216 12615684 24108892 1 2184-12-13 3E04305 10\n",
"174217 12615684 24108892 2 2184-12-09 0JH60WZ 10\n",
"174218 12615684 24108892 3 2184-12-09 02H633Z 10\n",
"174219 12615684 24602178 1 2179-11-14 4576 9\n",
"174220 12615684 24602178 2 2179-11-14 6839 9\n",
"174221 12615684 24602178 3 2179-11-14 4719 9\n",
"174222 12615684 24602178 4 2179-11-14 6561 9\n",
"174223 12615684 24602178 5 2179-11-14 5459 9\n",
"174224 12615684 24602178 6 2179-11-14 598 9\n",
"174225 12615684 24602178 7 2179-11-14 0390 9\n",
"174226 12615684 24718569 1 2182-06-02 5361 9\n",
"174227 12615684 24718569 2 2182-06-02 4562 9"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_proc[df_proc.subject_id == '12615684']"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "a1011b87",
"metadata": {},
"outputs": [],
"source": [
"both_icd_check_fn = lambda x: x in subject_id_set_proc_both\n",
"df_proc2 = df_proc[df_proc.subject_id.map(both_icd_check_fn)].copy()\n",
"df_proc2.sort_values(by=['subject_id', 'chartdate', 'seq_num'], inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "a871a608",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>subject_id</th>\n",
" <th>hadm_id</th>\n",
" <th>seq_num</th>\n",
" <th>chartdate</th>\n",
" <th>icd_code</th>\n",
" <th>icd_version</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>86</th>\n",
" <td>10001884</td>\n",
" <td>27765344</td>\n",
" <td>1</td>\n",
" <td>2127-12-11</td>\n",
" <td>3950</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>87</th>\n",
" <td>10001884</td>\n",
" <td>27765344</td>\n",
" <td>2</td>\n",
" <td>2127-12-11</td>\n",
" <td>0055</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>88</th>\n",
" <td>10001884</td>\n",
" <td>27765344</td>\n",
" <td>3</td>\n",
" <td>2127-12-11</td>\n",
" <td>0046</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>89</th>\n",
" <td>10001884</td>\n",
" <td>27765344</td>\n",
" <td>4</td>\n",
" <td>2127-12-11</td>\n",
" <td>0040</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>85</th>\n",
" <td>10001884</td>\n",
" <td>26202981</td>\n",
" <td>1</td>\n",
" <td>2130-08-23</td>\n",
" <td>0DJ08ZZ</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75</th>\n",
" <td>10001884</td>\n",
" <td>26184834</td>\n",
" <td>1</td>\n",
" <td>2131-01-11</td>\n",
" <td>5A1945Z</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>77</th>\n",
" <td>10001884</td>\n",
" <td>26184834</td>\n",
" <td>3</td>\n",
" <td>2131-01-11</td>\n",
" <td>0BH17EZ</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>78</th>\n",
" <td>10001884</td>\n",
" <td>26184834</td>\n",
" <td>4</td>\n",
" <td>2131-01-11</td>\n",
" <td>5A1223Z</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>79</th>\n",
" <td>10001884</td>\n",
" <td>26184834</td>\n",
" <td>5</td>\n",
" <td>2131-01-11</td>\n",
" <td>5A12012</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>80</th>\n",
" <td>10001884</td>\n",
" <td>26184834</td>\n",
" <td>6</td>\n",
" <td>2131-01-11</td>\n",
" <td>0BH17EZ</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>76</th>\n",
" <td>10001884</td>\n",
" <td>26184834</td>\n",
" <td>2</td>\n",
" <td>2131-01-12</td>\n",
" <td>5A1955Z</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>84</th>\n",
" <td>10001884</td>\n",
" <td>26184834</td>\n",
" <td>10</td>\n",
" <td>2131-01-12</td>\n",
" <td>0BH17EZ</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>81</th>\n",
" <td>10001884</td>\n",
" <td>26184834</td>\n",
" <td>7</td>\n",
" <td>2131-01-14</td>\n",
" <td>02HK3JZ</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>82</th>\n",
" <td>10001884</td>\n",
" <td>26184834</td>\n",
" <td>8</td>\n",
" <td>2131-01-14</td>\n",
" <td>02HV33Z</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>83</th>\n",
" <td>10001884</td>\n",
" <td>26184834</td>\n",
" <td>9</td>\n",
" <td>2131-01-15</td>\n",
" <td>3E0G76Z</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>118</th>\n",
" <td>10002013</td>\n",
" <td>28603984</td>\n",
" <td>1</td>\n",
" <td>2156-06-28</td>\n",
" <td>8361</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>119</th>\n",
" <td>10002013</td>\n",
" <td>28603984</td>\n",
" <td>2</td>\n",
" <td>2156-06-28</td>\n",
" <td>0444</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>106</th>\n",
" <td>10002013</td>\n",
" <td>23745275</td>\n",
" <td>1</td>\n",
" <td>2157-10-31</td>\n",
" <td>3607</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>107</th>\n",
" <td>10002013</td>\n",
" <td>23745275</td>\n",
" <td>2</td>\n",
" <td>2157-10-31</td>\n",
" <td>0066</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>108</th>\n",
" <td>10002013</td>\n",
" <td>23745275</td>\n",
" <td>3</td>\n",
" <td>2157-10-31</td>\n",
" <td>0040</td>\n",
" <td>9</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" subject_id hadm_id seq_num chartdate icd_code icd_version\n",
"86 10001884 27765344 1 2127-12-11 3950 9\n",
"87 10001884 27765344 2 2127-12-11 0055 9\n",
"88 10001884 27765344 3 2127-12-11 0046 9\n",
"89 10001884 27765344 4 2127-12-11 0040 9\n",
"85 10001884 26202981 1 2130-08-23 0DJ08ZZ 10\n",
"75 10001884 26184834 1 2131-01-11 5A1945Z 10\n",
"77 10001884 26184834 3 2131-01-11 0BH17EZ 10\n",
"78 10001884 26184834 4 2131-01-11 5A1223Z 10\n",
"79 10001884 26184834 5 2131-01-11 5A12012 10\n",
"80 10001884 26184834 6 2131-01-11 0BH17EZ 10\n",
"76 10001884 26184834 2 2131-01-12 5A1955Z 10\n",
"84 10001884 26184834 10 2131-01-12 0BH17EZ 10\n",
"81 10001884 26184834 7 2131-01-14 02HK3JZ 10\n",
"82 10001884 26184834 8 2131-01-14 02HV33Z 10\n",
"83 10001884 26184834 9 2131-01-15 3E0G76Z 10\n",
"118 10002013 28603984 1 2156-06-28 8361 9\n",
"119 10002013 28603984 2 2156-06-28 0444 9\n",
"106 10002013 23745275 1 2157-10-31 3607 9\n",
"107 10002013 23745275 2 2157-10-31 0066 9\n",
"108 10002013 23745275 3 2157-10-31 0040 9"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_proc2[:20]"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "47eba470",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "7f163ebbe2254545a9e72423ab3289df",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/144240 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Check if the ICD code version changes at some point of time\n",
"# CHARTDATEs are in the same order as the actual date for a given SUBJECT_ID.\n",
"# So we check, for each patient, the ICD_VERSION is 9 for earlier dates and 10 for later dates.\n",
"last_subject_id = -1\n",
"last_icd_version = -1\n",
"for i, row in tqdm(df_proc2.iterrows(), total=len(df_proc2)):\n",
" if row.hadm_id == '27635276': # The only HADM_ID with both ICD-9 and ICD-10\n",
" continue\n",
" \n",
" if last_subject_id != row.subject_id:\n",
" last_icd_version = row.icd_version\n",
" last_subject_id = row.subject_id\n",
" \n",
" assert(last_icd_version <= row.icd_version)\n",
" last_icd_version = row.icd_version"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "6a6c9427",
"metadata": {},
"outputs": [],
"source": [
"hadm_id_set_diag_9 = set(df_diag[df_diag.icd_version == 9].hadm_id)\n",
"hadm_id_set_diag_10 = set(df_diag[df_diag.icd_version == 10].hadm_id)\n",
"hadm_id_set_proc_9 = set(df_proc[df_proc.icd_version == 9].hadm_id)\n",
"hadm_id_set_proc_10 = set(df_proc[df_proc.icd_version == 10].hadm_id)"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "030d75d2",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"HADM_ID that has ICD-9 diag : 276803\n",
"HADM_ID that has ICD-10 diag: 154059\n",
"HADM_ID that has ICD-9 proc : 155891\n",
"HADM_ID that has ICD-10 proc: 73555\n"
]
}
],
"source": [
"print(f\"HADM_ID that has ICD-9 diag : {len(hadm_id_set_diag_9)}\")\n",
"print(f\"HADM_ID that has ICD-10 diag: {len(hadm_id_set_diag_10)}\")\n",
"print(f\"HADM_ID that has ICD-9 proc : {len(hadm_id_set_proc_9)}\")\n",
"print(f\"HADM_ID that has ICD-10 proc: {len(hadm_id_set_proc_10)}\")"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "a77a545b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"diag 9 & diag 10: 10\n",
"proc 9 & proc 10: 1\n",
"diag 9 & proc 9 : 155884\n",
"diag 10 & proc 10: 73538\n",
"diag 9 & proc 10: 2\n",
"proc 9 & diag 10: 1\n"
]
}
],
"source": [
"print(f'diag 9 & diag 10: {len(hadm_id_set_diag_9 & hadm_id_set_diag_10)}')\n",
"print(f'proc 9 & proc 10: {len(hadm_id_set_proc_9 & hadm_id_set_proc_10)}')\n",
"print(f'diag 9 & proc 9 : {len(hadm_id_set_diag_9 & hadm_id_set_proc_9)}')\n",
"print(f'diag 10 & proc 10: {len(hadm_id_set_diag_10 & hadm_id_set_proc_10)}')\n",
"print(f'diag 9 & proc 10: {len(hadm_id_set_diag_9 & hadm_id_set_proc_10)}')\n",
"print(f'proc 9 & diag 10: {len(hadm_id_set_proc_9 & hadm_id_set_diag_10)}')"
]
},
{
"cell_type": "markdown",
"id": "bb50dcc8",
"metadata": {},
"source": [
"$\\rightarrow$ HADM ID with ICD-9 diagnosis also has ICD-9 prorocedures (in most cases). \n",
"$\\rightarrow$ HADM ID with ICD-10 diagnosis also has ICD-10 pcedures (in most cases)."
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "e5480c1c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'29918558', '21575927', '23969809', '23076003', '27635276', '27016754', '28333632', '24773199', '22870827', '20216016'}\n"
]
}
],
"source": [
"# HADM_ID that has both ICD-9 and ICD-10 diagnoses (or prodecures)\n",
"print((hadm_id_set_diag_9 & hadm_id_set_diag_10) | (hadm_id_set_proc_9 & hadm_id_set_proc_10))"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "ce660e02",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"HADM_ID that has ICD-9 diag only : 276793\n",
"HADM_ID that has ICD-10 diag only: 154049\n",
"HADM_ID that has ICD-9 proc only : 155890\n",
"HADM_ID that has ICD-10 proc only: 154049\n"
]
}
],
"source": [
"# HADM_ID that has diagnoses (or procedures) in the same code set only\n",
"temp_set_diag_9 = hadm_id_set_diag_9 - hadm_id_set_diag_10\n",
"temp_set_diag_10 = hadm_id_set_diag_10 - hadm_id_set_diag_9\n",
"temp_set_proc_9 = hadm_id_set_proc_9 - hadm_id_set_proc_10\n",
"temp_set_proc_10 = hadm_id_set_proc_10 - hadm_id_set_proc_9\n",
"\n",
"print(f\"HADM_ID that has ICD-9 diag only : {len(temp_set_diag_9)}\")\n",
"print(f\"HADM_ID that has ICD-10 diag only: {len(temp_set_diag_10)}\")\n",
"print(f\"HADM_ID that has ICD-9 proc only : {len(temp_set_proc_9)}\")\n",
"print(f\"HADM_ID that has ICD-10 proc only: {len(temp_set_diag_10)}\")"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "824ef6a5",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"HADM_ID that has ICD-9 only : 276800\n",
"HADM_ID that has ICD-10 only: 154066\n"
]
}
],
"source": [
"hadm_id_set_9 = (hadm_id_set_diag_9 | hadm_id_set_proc_9) - (hadm_id_set_diag_10 | hadm_id_set_proc_10)\n",
"hadm_id_set_10 = (hadm_id_set_diag_10 | hadm_id_set_proc_10) - (hadm_id_set_diag_9 | hadm_id_set_proc_9)\n",
"print(f\"HADM_ID that has ICD-9 only : {len(hadm_id_set_9)}\")\n",
"print(f\"HADM_ID that has ICD-10 only: {len(hadm_id_set_10)}\")"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "1adfa2d4",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"276800\n",
"154049\n"
]
}
],
"source": [
"print(len(temp_set_diag_9 | temp_set_proc_9))\n",
"print(len(temp_set_diag_10 | temp_set_diag_10))"
]
},
{
"cell_type": "markdown",
"id": "29cc0574",
"metadata": {},
"source": [
"### ICD code definition table"
]
},
{
"cell_type": "code",
"execution_count": 34,
"id": "d14326e4",
"metadata": {},
"outputs": [],
"source": [
"df_d_diag = pd.read_csv(os.path.join(mimic4_root, 'hosp', 'd_icd_diagnoses.csv.gz'),\n",
" dtype={'icd_code': 'string', 'long_title': 'string'})\n",
"df_d_proc = pd.read_csv(os.path.join(mimic4_root, 'hosp', 'd_icd_procedures.csv.gz'),\n",
" dtype={'icd_code': 'string', 'long_title': 'string'})"
]
},
{
"cell_type": "code",
"execution_count": 35,
"id": "1551a6cd",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>icd_code</th>\n",
" <th>icd_version</th>\n",
" <th>long_title</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0010</td>\n",
" <td>9</td>\n",
" <td>Cholera due to vibrio cholerae</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0011</td>\n",
" <td>9</td>\n",
" <td>Cholera due to vibrio cholerae el tor</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0019</td>\n",
" <td>9</td>\n",
" <td>Cholera, unspecified</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0020</td>\n",
" <td>9</td>\n",
" <td>Typhoid fever</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0021</td>\n",
" <td>9</td>\n",
" <td>Paratyphoid fever A</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>109770</th>\n",
" <td>Z992</td>\n",
" <td>10</td>\n",
" <td>Dependence on renal dialysis</td>\n",
" </tr>\n",
" <tr>\n",
" <th>109771</th>\n",
" <td>Z993</td>\n",
" <td>10</td>\n",
" <td>Dependence on wheelchair</td>\n",
" </tr>\n",
" <tr>\n",
" <th>109772</th>\n",
" <td>Z998</td>\n",
" <td>10</td>\n",
" <td>Dependence on other enabling machines and devices</td>\n",
" </tr>\n",
" <tr>\n",
" <th>109773</th>\n",
" <td>Z9981</td>\n",
" <td>10</td>\n",
" <td>Dependence on supplemental oxygen</td>\n",
" </tr>\n",
" <tr>\n",
" <th>109774</th>\n",
" <td>Z9989</td>\n",
" <td>10</td>\n",
" <td>Dependence on other enabling machines and devices</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>109775 rows × 3 columns</p>\n",
"</div>"
],
"text/plain": [
" icd_code icd_version \\\n",
"0 0010 9 \n",
"1 0011 9 \n",
"2 0019 9 \n",
"3 0020 9 \n",
"4 0021 9 \n",
"... ... ... \n",
"109770 Z992 10 \n",
"109771 Z993 10 \n",
"109772 Z998 10 \n",
"109773 Z9981 10 \n",
"109774 Z9989 10 \n",
"\n",
" long_title \n",
"0 Cholera due to vibrio cholerae \n",
"1 Cholera due to vibrio cholerae el tor \n",
"2 Cholera, unspecified \n",
"3 Typhoid fever \n",
"4 Paratyphoid fever A \n",
"... ... \n",
"109770 Dependence on renal dialysis \n",
"109771 Dependence on wheelchair \n",
"109772 Dependence on other enabling machines and devices \n",
"109773 Dependence on supplemental oxygen \n",
"109774 Dependence on other enabling machines and devices \n",
"\n",
"[109775 rows x 3 columns]"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_d_diag"
]
},
{
"cell_type": "code",
"execution_count": 36,
"id": "75101276",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>icd_code</th>\n",
" <th>icd_version</th>\n",
" <th>long_title</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0001</td>\n",
" <td>9</td>\n",
" <td>Therapeutic ultrasound of vessels of head and ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0002</td>\n",
" <td>9</td>\n",
" <td>Therapeutic ultrasound of heart</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0003</td>\n",
" <td>9</td>\n",
" <td>Therapeutic ultrasound of peripheral vascular ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0009</td>\n",
" <td>9</td>\n",
" <td>Other therapeutic ultrasound</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>001</td>\n",
" <td>10</td>\n",
" <td>Central Nervous System and Cranial Nerves, Bypass</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>85252</th>\n",
" <td>XW0DXV5</td>\n",
" <td>10</td>\n",
" <td>Introduction of Gilteritinib Antineoplastic in...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>85253</th>\n",
" <td>XXE</td>\n",
" <td>10</td>\n",
" <td>New Technology, Physiological Systems, Measure...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>85254</th>\n",
" <td>XXE5XM5</td>\n",
" <td>10</td>\n",
" <td>Measurement of Infection, Whole Blood Nucleic ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>85255</th>\n",
" <td>XY0</td>\n",
" <td>10</td>\n",
" <td>New Technology, Extracorporeal, Introduction</td>\n",
" </tr>\n",
" <tr>\n",
" <th>85256</th>\n",
" <td>XY0VX83</td>\n",
" <td>10</td>\n",
" <td>Extracorporeal Introduction of Endothelial Dam...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>85257 rows × 3 columns</p>\n",
"</div>"
],
"text/plain": [
" icd_code icd_version long_title\n",
"0 0001 9 Therapeutic ultrasound of vessels of head and ...\n",
"1 0002 9 Therapeutic ultrasound of heart\n",
"2 0003 9 Therapeutic ultrasound of peripheral vascular ...\n",
"3 0009 9 Other therapeutic ultrasound\n",
"4 001 10 Central Nervous System and Cranial Nerves, Bypass\n",
"... ... ... ...\n",
"85252 XW0DXV5 10 Introduction of Gilteritinib Antineoplastic in...\n",
"85253 XXE 10 New Technology, Physiological Systems, Measure...\n",
"85254 XXE5XM5 10 Measurement of Infection, Whole Blood Nucleic ...\n",
"85255 XY0 10 New Technology, Extracorporeal, Introduction\n",
"85256 XY0VX83 10 Extracorporeal Introduction of Endothelial Dam...\n",
"\n",
"[85257 rows x 3 columns]"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_d_proc"
]
},
{
"cell_type": "code",
"execution_count": 37,
"id": "c584b42f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"14666 ICD-9 diagnoses defined\n",
"95109 ICD-10 diagnoses defined\n",
" 3888 ICD-9 procedures defined\n",
"81369 ICD-10 procedures defined\n"
]
}
],
"source": [
"diag_9_set = set(df_d_diag[df_d_diag.icd_version==9].icd_code)\n",
"diag_10_set = set(df_d_diag[df_d_diag.icd_version==10].icd_code)\n",
"print(f'{len(diag_9_set):5d} ICD-9 diagnoses defined')\n",
"print(f'{len(diag_10_set):5d} ICD-10 diagnoses defined')\n",
"\n",
"proc_9_set = set(df_d_proc[df_d_proc.icd_version==9].icd_code)\n",
"proc_10_set = set(df_d_proc[df_d_proc.icd_version==10].icd_code)\n",
"print(f'{len(proc_9_set):5d} ICD-9 procedures defined')\n",
"print(f'{len(proc_10_set):5d} ICD-10 procedures defined')"
]
},
{
"cell_type": "code",
"execution_count": 38,
"id": "183b0b12",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 493 diagnoses are both in ICD-9 and ICD-10\n",
" 6 procedures are both in ICD-9 and ICD-10\n"
]
}
],
"source": [
"print(f'{len(diag_9_set & diag_10_set):5d} diagnoses are both in ICD-9 and ICD-10')\n",
"print(f'{len(proc_9_set & proc_10_set):5d} procedures are both in ICD-9 and ICD-10')"
]
},
{
"cell_type": "code",
"execution_count": 39,
"id": "91562f14",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 1627 codes are both in ICD-9 diags and procs\n",
" 58 codes are both in ICD-10 diags and procs\n"
]
}
],
"source": [
"print(f'{len(diag_9_set & proc_9_set):5d} codes are both in ICD-9 diags and procs')\n",
"print(f'{len(diag_10_set & proc_10_set):5d} codes are both in ICD-10 diags and procs')"
]
},
{
"cell_type": "markdown",
"id": "ab26b6b4",
"metadata": {},
"source": [
"## 3. Discharge summary notes"
]
},
{
"cell_type": "code",
"execution_count": 40,
"id": "7fe84a7a",
"metadata": {},
"outputs": [],
"source": [
"df_disch = pd.read_csv(os.path.join(mimic4_root, 'note', 'discharge.csv.gz'),\n",
" dtype={'note_id': 'string', 'subject_id': 'string', 'hadm_id': 'string', 'text': 'string'})"
]
},
{
"cell_type": "code",
"execution_count": 41,
"id": "7f69891f",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>note_id</th>\n",
" <th>subject_id</th>\n",
" <th>hadm_id</th>\n",
" <th>note_type</th>\n",
" <th>note_seq</th>\n",
" <th>charttime</th>\n",
" <th>storetime</th>\n",
" <th>text</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10000032-DS-21</td>\n",
" <td>10000032</td>\n",
" <td>22595853</td>\n",
" <td>DS</td>\n",
" <td>21</td>\n",
" <td>2180-05-07 00:00:00</td>\n",
" <td>2180-05-09 15:26:00</td>\n",
" <td>Name: ___ Unit No: __...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10000032-DS-22</td>\n",
" <td>10000032</td>\n",
" <td>22841357</td>\n",
" <td>DS</td>\n",
" <td>22</td>\n",
" <td>2180-06-27 00:00:00</td>\n",
" <td>2180-07-01 10:15:00</td>\n",
" <td>Name: ___ Unit No: __...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>10000032-DS-23</td>\n",
" <td>10000032</td>\n",
" <td>29079034</td>\n",
" <td>DS</td>\n",
" <td>23</td>\n",
" <td>2180-07-25 00:00:00</td>\n",
" <td>2180-07-25 21:42:00</td>\n",
" <td>Name: ___ Unit No: __...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>10000032-DS-24</td>\n",
" <td>10000032</td>\n",
" <td>25742920</td>\n",
" <td>DS</td>\n",
" <td>24</td>\n",
" <td>2180-08-07 00:00:00</td>\n",
" <td>2180-08-10 05:43:00</td>\n",
" <td>Name: ___ Unit No: __...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>10000084-DS-17</td>\n",
" <td>10000084</td>\n",
" <td>23052089</td>\n",
" <td>DS</td>\n",
" <td>17</td>\n",
" <td>2160-11-25 00:00:00</td>\n",
" <td>2160-11-25 15:09:00</td>\n",
" <td>Name: ___ Unit No: ___...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>331789</th>\n",
" <td>19999828-DS-6</td>\n",
" <td>19999828</td>\n",
" <td>29734428</td>\n",
" <td>DS</td>\n",
" <td>6</td>\n",
" <td>2147-08-04 00:00:00</td>\n",
" <td>2147-08-12 15:36:00</td>\n",
" <td>Name: ___ Unit No: ___\n",
"...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>331790</th>\n",
" <td>19999828-DS-7</td>\n",
" <td>19999828</td>\n",
" <td>25744818</td>\n",
" <td>DS</td>\n",
" <td>7</td>\n",
" <td>2149-01-18 00:00:00</td>\n",
" <td>2149-01-19 07:03:00</td>\n",
" <td>Name: ___ Unit No: ___\n",
"...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>331791</th>\n",
" <td>19999840-DS-20</td>\n",
" <td>19999840</td>\n",
" <td>26071774</td>\n",
" <td>DS</td>\n",
" <td>20</td>\n",
" <td>2164-07-28 00:00:00</td>\n",
" <td>2164-07-29 14:52:00</td>\n",
" <td>Name: ___ Unit No: ___\n",
" ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>331792</th>\n",
" <td>19999840-DS-21</td>\n",
" <td>19999840</td>\n",
" <td>21033226</td>\n",
" <td>DS</td>\n",
" <td>21</td>\n",
" <td>2164-09-17 00:00:00</td>\n",
" <td>2164-09-18 01:36:00</td>\n",
" <td>Name: ___ Unit No: ___\n",
" ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>331793</th>\n",
" <td>19999987-DS-2</td>\n",
" <td>19999987</td>\n",
" <td>23865745</td>\n",
" <td>DS</td>\n",
" <td>2</td>\n",
" <td>2145-11-11 00:00:00</td>\n",
" <td>2145-11-11 13:13:00</td>\n",
" <td>Name: ___ Unit No: ___...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>331794 rows × 8 columns</p>\n",
"</div>"
],
"text/plain": [
" note_id subject_id hadm_id note_type note_seq \\\n",
"0 10000032-DS-21 10000032 22595853 DS 21 \n",
"1 10000032-DS-22 10000032 22841357 DS 22 \n",
"2 10000032-DS-23 10000032 29079034 DS 23 \n",
"3 10000032-DS-24 10000032 25742920 DS 24 \n",
"4 10000084-DS-17 10000084 23052089 DS 17 \n",
"... ... ... ... ... ... \n",
"331789 19999828-DS-6 19999828 29734428 DS 6 \n",
"331790 19999828-DS-7 19999828 25744818 DS 7 \n",
"331791 19999840-DS-20 19999840 26071774 DS 20 \n",
"331792 19999840-DS-21 19999840 21033226 DS 21 \n",
"331793 19999987-DS-2 19999987 23865745 DS 2 \n",
"\n",
" charttime storetime \\\n",
"0 2180-05-07 00:00:00 2180-05-09 15:26:00 \n",
"1 2180-06-27 00:00:00 2180-07-01 10:15:00 \n",
"2 2180-07-25 00:00:00 2180-07-25 21:42:00 \n",
"3 2180-08-07 00:00:00 2180-08-10 05:43:00 \n",
"4 2160-11-25 00:00:00 2160-11-25 15:09:00 \n",
"... ... ... \n",
"331789 2147-08-04 00:00:00 2147-08-12 15:36:00 \n",
"331790 2149-01-18 00:00:00 2149-01-19 07:03:00 \n",
"331791 2164-07-28 00:00:00 2164-07-29 14:52:00 \n",
"331792 2164-09-17 00:00:00 2164-09-18 01:36:00 \n",
"331793 2145-11-11 00:00:00 2145-11-11 13:13:00 \n",
"\n",
" text \n",
"0 \n",
"Name: ___ Unit No: __... \n",
"1 \n",
"Name: ___ Unit No: __... \n",
"2 \n",
"Name: ___ Unit No: __... \n",
"3 \n",
"Name: ___ Unit No: __... \n",
"4 \n",
"Name: ___ Unit No: ___... \n",
"... ... \n",
"331789 \n",
"Name: ___ Unit No: ___\n",
"... \n",
"331790 \n",
"Name: ___ Unit No: ___\n",
"... \n",
"331791 \n",
"Name: ___ Unit No: ___\n",
" ... \n",
"331792 \n",
"Name: ___ Unit No: ___\n",
" ... \n",
"331793 \n",
"Name: ___ Unit No: ___... \n",
"\n",
"[331794 rows x 8 columns]"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_disch"
]
},
{
"cell_type": "code",
"execution_count": 42,
"id": "8bba22fb",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Unique SUBJECT_ID in discharge summary : 145915\n",
"Unique SUBJECT_ID in diag + proc : 180650\n",
"Unique SUBJECT_ID in disch & (diag+proc): 145876\n"
]
}
],
"source": [
"# Patient\n",
"subject_id_diag_set = set(df_diag.subject_id) | set(df_proc.subject_id)\n",
"subject_id_disch_set = set(df_disch.subject_id)\n",
"\n",
"print(f'Unique SUBJECT_ID in discharge summary : {len(subject_id_disch_set)}')\n",
"print(f'Unique SUBJECT_ID in diag + proc : {len(subject_id_diag_set)}')\n",
"print(f'Unique SUBJECT_ID in disch & (diag+proc): {len(subject_id_disch_set & subject_id_diag_set)}')"
]
},
{
"cell_type": "code",
"execution_count": 43,
"id": "258b19b7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Unique HADM_ID in discharge summary : 331794\n",
"Unique HADM_ID in diag + proc : 430876\n",
"Unique HADM_ID in disch & (diag+proc): 331669\n"
]
}
],
"source": [
"# HADM ID\n",
"hadm_id_diag_set = set(df_diag.hadm_id) | set(df_proc.hadm_id)\n",
"hadm_id_disch_set = set(df_disch.hadm_id)\n",
"\n",
"print(f'Unique HADM_ID in discharge summary : {len(hadm_id_disch_set)}')\n",
"print(f'Unique HADM_ID in diag + proc : {len(hadm_id_diag_set)}')\n",
"print(f'Unique HADM_ID in disch & (diag+proc): {len(hadm_id_disch_set & hadm_id_diag_set)}')"
]
},
{
"cell_type": "code",
"execution_count": 44,
"id": "352fa52a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" \n",
"Name: ___ Unit No: ___\n",
" \n",
"Admission Date: ___ Discharge Date: ___\n",
" \n",
"Date of Birth: ___ Sex: M\n",
" \n",
"Service: MEDICINE\n",
" \n",
"Allergies: \n",
"No Known Allergies / Adverse Drug Reactions\n",
" \n",
"Attending: ___\n",
" \n",
"Chief Complaint:\n",
"shortness of breath\n",
" \n",
"Major Surgical or Invasive Procedure:\n",
"Large volume paracentesis 2x/week with interventional radiology\n",
" \n",
"History of Present Illness:\n",
"___ man Child B HCV/EtOH cirrhosis (decompensated by\n",
"varices s/p TIPS in ___, ascites, HE, SBP; followed at ___ but\n",
"lost to f/u), Afib (not on AC), COPD, fragmented care, presents\n",
"with dyspnea. \n",
"\n",
" ___ states that he was in ___ near ___\n",
"when he developed abdominal swelling and shortness of breath \n",
"much\n",
"worse over the last week. Around ___ he was slightly short\n",
"of breath walking around, noticed he could not swim across the\n",
"pool like he normally could and had abdominal distention. This\n",
"was rapidly progressive over that week to the point that when he\n",
"was flying back the day of admission, he requested O2 from the\n",
"flight attendant which he states relieved his shortness of\n",
"breath. He was met by EMS at disembarkation, his O2 sat per him\n",
"was 79%, he was placed on nonrebreather and brought to ___ ED.\n",
"\n",
" He states that while he normally follows a low salt diet, he\n",
"\"cheated a little bit down in ___.. while the cats are \n",
"away,\n",
"mice must play\" he had bbq, drank more fluids even 1 beer while\n",
"waiting for his flight home after delayed for three hours. He\n",
"reports bowel movements three times per day only since adding\n",
"miralax to his lactulose. He has been taking Tylenol ___ QID\n",
"for back pain for the last two weeks (prior was 625mg x6 per\n",
"day). He denies fever, cough, dysuria, diarrhea. He does endorse\n",
"abdominal pain. \n",
"\n",
"In the ED initial vitals: T: 98.3 HR: 65 BP: 151/62 RR: 24\n",
"SO2: \n",
"100% Non-Rebreather \n",
"- Exam notable for: \n",
" Morbidly obese man with significant abdominal distention and\n",
"edema and positive fluid wave\n",
" No scleral icterus normocephalic atraumatic\n",
" Neck is supple\n",
" Tachycardic regular rhythm\n",
" Rhonchorous breath sounds in bilateral lobes with diffuse\n",
"expiratory wheezes\n",
" Significant lower extremity swelling bilaterally\n",
" Large amount of ascites on bedside fast\n",
" Small frequency flapping with asterixis\n",
"- Labs notable for:\n",
"CBC: WBC: 14.2 Hgb: 8.4 Plt: 225 \n",
"Chem7: Na: 125 Cl: 87 BUN: 11 \n",
" K: 5.9 HCO3: 25 Crt: 0.9 \n",
"LFTs: ALT: 20 AST: 112 AP: 120 Tbili: 1.9 Alb: 3.0 \n",
"Coags: ___: 15.6 PTT: 27.7 INR: 1.4 \n",
"Urine Sodium: Na:<20 osm: 252\n",
"Trop-T: <0.01 \n",
"Ascitic fluid: WBC 746 40% Poly\n",
"\n",
"- Imaging notable for: see below. Incompletely evaluated TIPS,\n",
"large volume ascites. \n",
"\n",
"- Consults: Hepatology was consulted. Recommended careful\n",
"diuresis as he had kidney injury last admission as well as\n",
"hyponatremia (discharge diuretic was 20mg of PO torsemide). \n",
"Evaluation for SBP. Lactulose for HE.\n",
"\n",
"- ___ was given: \n",
" ___ 06:24 IV Furosemide 80 mg \n",
" ___ 08:00 IV CefTRIAXone 2 g \n",
" ___ 10:12 NEB Ipratropium-Albuterol Neb \n",
" ___ 10:12 IH Albuterol 0.083% Neb Soln \n",
" ___ 10:12 PO/NG Diltiazem 60 mg \n",
" ___ 10:12 PO/NG Gabapentin 300 mg \n",
"\n",
"- Vitals on transfer: T: 98.3 HR: 100 BP: 109/61 RR: 20 SO2:\n",
"95% 4 liters \n",
" \n",
"Past Medical History:\n",
"1. Hypertension\n",
"2. A. fib\n",
"3. HCV(unclear if treated)\n",
"4. EtOH/HCV cirrhosis ( complicated by esophageal varices and \n",
"ascites) \n",
"5. status post TIPS\n",
"6. alcohol abuse\n",
"7. polysubstance abuse \n",
"8. emphysema\n",
" \n",
"Social History:\n",
"___\n",
"Family History:\n",
"Father with type 2 diabetes. Twin brother with alcoholic \n",
"cirrhosis.\n",
" \n",
"Physical Exam:\n",
"ADMISSION PHYSICAL EXAMINATION: \n",
"VS: ___ 1147 Temp: 98.4 PO BP: 102/60 HR: 95 RR: 20 O2 sat:\n",
"92% O2 delivery: 4LNC Admission Wt: 152 kg Discharge Wt (___):\n",
"283lb/128.37 kg \n",
"GENERAL: AOx3, NAD, obese man with several tattoos on both arms,\n",
"Pickwickian, distended abdomen \n",
"HEENT: MMM, PERRL, no scleral icterus but yellowing of buccal\n",
"mucosa \n",
"CV: Irregularly irregular rhythm, ___ SEM radiation to carotids\n",
"RESP: rhoncorous throughout prolonged expiration, cough with\n",
"forced expiration \n",
"ABD: Abdomen large but soft. Tympanic to percussion. No \n",
"rebound or guarding. \n",
"EXTR: brawny bilateral elephantiasis nostras verrucosa edema 2+\n",
"to thighs \n",
"NEURO: Awake, AAOx3, slight asterixis no hyperreflexia\n",
"SKIN: Venous stasis of lower abdomen, non-tender. Caput medusa. \n",
"\n",
"\n",
"DISCHARGE PHYSICAL EXAMINATION: \n",
"VITALS: T98.0 BP 127/68 HR 106 RR 18 94%Ra \n",
"GENERAL: awake, obese man, NAD\n",
"HEENT: mildly icteric sclera, MMM\n",
"CV: irregularly irregular rhythm, ___ SEM radiation to carotids\n",
"RESP: diffuse rhonchi w upper airway sounds transmitted, nl WOB \n",
"on RA\n",
"ABD: obese, distended, firm, NT, no rebound or guarding\n",
"GU: penoscrotal swelling decreased, no cyanosis \n",
"EXTR: bilateral elephantiasis nostras verrucosa, edema 2+ to \n",
"thighs \n",
"NEURO: AOx3, face symmetrical, moving all extremities, no \n",
"asterixis\n",
"SKIN: venous stasis of lower abdomen, non-tender, caput medusa\n",
"\n",
" \n",
"Pertinent Results:\n",
"ADMISSION LABS\n",
"___ 04:28AM BLOOD WBC-14.2* RBC-3.62* Hgb-8.4* Hct-29.6* \n",
"MCV-82 MCH-23.2* MCHC-28.4* RDW-21.5* RDWSD-62.8* Plt ___\n",
"___ 04:28AM BLOOD Neuts-81.8* Lymphs-6.1* Monos-10.8 \n",
"Eos-0.1* Baso-0.1 NRBC-0.1* Im ___ AbsNeut-11.64* \n",
"AbsLymp-0.87* AbsMono-1.53* AbsEos-0.02* AbsBaso-0.02\n",
"___ 04:28AM BLOOD ___ PTT-27.7 ___\n",
"___ 04:28AM BLOOD Glucose-120* UreaN-11 Creat-0.9 Na-125* \n",
"K-5.9* Cl-87* HCO3-25 AnGap-13\n",
"___ 04:28AM BLOOD ALT-20 AST-112* AlkPhos-120 TotBili-1.9*\n",
"___ 04:28AM BLOOD Lipase-41\n",
"___ 04:28AM BLOOD cTropnT-<0.01\n",
"___ 04:28AM BLOOD Albumin-3.0*\n",
"___ 06:00AM BLOOD Calcium-8.6 Phos-3.5 Mg-1.7\n",
"___ 06:00AM BLOOD Osmolal-266*\n",
"___ 06:00AM BLOOD Acetmnp-NEG\n",
"___ 04:38AM BLOOD ___ pO2-48* pCO2-49* pH-7.39 \n",
"calTCO2-31* Base XS-3\n",
"\n",
"___ 04:50AM ASCITES TNC-746* RBC-2749* Polys-40* Lymphs-38* \n",
"Monos-3* Mesothe-10* Macroph-9*\n",
"___ 04:50AM ASCITES TotPro-2.0 Glucose-133\n",
"\n",
"PERTINENT INTERVAL LABS\n",
"___ 06:37AM BLOOD calTIBC-263 VitB12-430 Folate->20 \n",
"Ferritn-24* TRF-202\n",
"___ 06:37AM BLOOD Iron-16*\n",
"___ 05:13AM BLOOD %HbA1c-5.0 eAG-97\n",
"___ 06:00AM BLOOD Osmolal-266*\n",
"\n",
"___ 06:17AM BLOOD Digoxin-0.5*\n",
"___ 06:21AM BLOOD Digoxin-0.6\n",
"___ 06:37AM BLOOD Digoxin-0.5*\n",
"___ 04:40AM BLOOD Digoxin-0.7\n",
"\n",
"___ 02:43PM BLOOD ___ pO2-41* pCO2-84* pH-7.26* \n",
"calTCO2-39* Base XS-6\n",
"___ 03:48PM BLOOD ___ pO2-50* pCO2-62* pH-7.39 \n",
"calTCO2-39* Base XS-9 Comment-GREEN TOP \n",
"\n",
"DISCHARGE LABS\n",
"--------------------\n",
"___ 04:26AM BLOOD WBC-9.8 RBC-3.07* Hgb-7.3* Hct-25.8* \n",
"MCV-84 MCH-23.8* MCHC-28.3* RDW-21.8* RDWSD-67.2* Plt ___\n",
"___ 04:26AM BLOOD Neuts-66.2 Lymphs-16.0* Monos-16.9* \n",
"Eos-0.1* Baso-0.3 Im ___ AbsNeut-6.49* AbsLymp-1.57 \n",
"AbsMono-1.66* AbsEos-0.01* AbsBaso-0.03\n",
"___ 04:26AM BLOOD ___ PTT-33.5 ___\n",
"___ 04:26AM BLOOD Glucose-100 UreaN-17 Creat-0.8 Na-133* \n",
"K-4.1 Cl-90* HCO3-32 AnGap-11\n",
"___ 04:26AM BLOOD ALT-10 AST-23 LD(LDH)-179 AlkPhos-94 \n",
"TotBili-1.5\n",
"___ 04:26AM BLOOD Albumin-2.8* Calcium-8.4 Phos-2.9 Mg-1.3*\n",
"___ 04:26AM BLOOD Digoxin-0.7\n",
"\n",
"MICROBIOLOGY\n",
"------------------\n",
"___ 01:05PM BLOOD HCV Ab-POS*\n",
"___ 01:05PM BLOOD HCV VL-5.1*\n",
"\n",
"BLOOD CULTURES NEGATIVE ON: ___\n",
"PERITONEAL FLUID CULTURES NEGATIVE ON: ___, \n",
"___\n",
"\n",
"URINE CULTURE POSITIVE ON: ___\n",
" ENTEROCOCCUS SP.. 10,000-100,000 CFU/mL. \n",
"\n",
" SENSITIVITIES: MIC expressed in \n",
"MCG/ML\n",
" \n",
"_________________________________________________________\n",
" ENTEROCOCCUS SP.\n",
" | \n",
"AMPICILLIN------------ <=2 S\n",
"NITROFURANTOIN-------- <=16 S\n",
"TETRACYCLINE---------- <=1 S\n",
"VANCOMYCIN------------ 1 S\n",
"\n",
"URINE CULTURE NEGATIVE ON: ___\n",
"\n",
"IMAGING AND STUDIES\n",
"___ CXR\n",
"1. Mild bilateral interstitial pulmonary edema.\n",
"2. Bilateral layering pleural effusions.\n",
" \n",
"___ ABD/PEL DUPLEX US\n",
"1. The study is markedly limited by ___ body habitus, as \n",
"described above.\n",
"2. The main portal vein was not definitively seen. There is \n",
"some flow in the proximal TIPS with flow velocity of 112 \n",
"centimeters/second. However flow is not definitively seen in the \n",
"mid and distal TIPSs. If there is persistent clinical concern \n",
"may consider repeat study.\n",
"3. Cirrhotic liver and ascites.\n",
"\n",
"___ CXR\n",
"Comparison to ___. The ___ has received the new \n",
"right-sided PICC line. The course of the line is unremarkable, \n",
"the tip of the line projects over the mid to lower SVC. No \n",
"complications, notably no pneumothorax. Otherwise the radiograph \n",
"is unchanged.\n",
"\n",
"___ CXR\n",
"Moderate right pleural effusion, mild pulmonary edema and severe \n",
"enlargement of the cardiac silhouette have all worsened since \n",
"___. No pneumothorax. Multiple healed right rib fractures \n",
"noted. Right PIC line ends close to the superior cavoatrial \n",
"junction.\n",
" \n",
"___ ABD/PEL DUPLEX US\n",
"Patent TIPS, appropriate velocities as reported above.\n",
"\n",
"___ CXR\n",
"Previous pulmonary and mediastinal vascular congestion have \n",
"resolved and cardiac silhouette though still moderately enlarged \n",
"is slightly smaller. Small right pleural effusion has \n",
"decreased. Probably still a moderate amount of left lower lobe \n",
"atelectasis. Fractures, lateral, right middle ribs, well-healed.\n",
"\n",
"___ CXR \n",
"Comparison to ___. The ___ has received a new \n",
"right PICC line. The course of the line is unremarkable, the \n",
"tip of the line projects over the lower SVC. No complications, \n",
"notably no pneumothorax. The radiograph appears otherwise \n",
"unchanged.\n",
"\n",
" \n",
"Brief Hospital Course:\n",
"___ SUMMARY\n",
"___ man Child B HCV/EtOH cirrhosis (decompensated by \n",
"varices s/p TIPS in ___, ascites, HE, SBP), Afib (not on AC), \n",
"COPD, fragmented care presented with volume overload and \n",
"hypoxemia after recent dietary indiscretion in ___.\n",
"\n",
"ACUTE ISSUES\n",
"# Decompensated cirrhosis with volume overload, hepatic \n",
"encephalopathy, hyponatremia, and spontaneous bacteria \n",
"peritonitis \n",
"___ presented from 2 weeks trip in ___ with reported \n",
"increased sodium intake grossly volume overloaded with new \n",
"oxygen requirement. Initial paracentesis showed SBP despite his \n",
"home ciprofloxacin ppx and he completed at 5d course of \n",
"ceftriaxone. He was also noted to have asterixis on admission \n",
"which improved with increasing his lactulose dosing frequency. \n",
"He was initially diuresed on the floor with IV furosemide \n",
"boluses then drip. Required transfer to MICU on ___ for \n",
"hypoxemic respiratory failure felt to be secondary to volume \n",
"overload as well as tachycardia (see elsewhere for MICU course). \n",
"After stabilization, he was transferred back to the floor and \n",
"continued on furosemide drip for several days. He developed \n",
"hyponatremia which did not improve with albumin. TIPS was \n",
"evaluated w US and patent. Furosemide was discontinued and he \n",
"was started on tolvaptan on ___. His sodium trended up and he \n",
"was restarted on IV furosemide then transitioned to PO torsemide \n",
"prior to discharge. He required bi-weekly paracentesis ___ \n",
"removed) with interventional radiology. For SBP prophylaxis, he \n",
"was started on Bactrim because he had developed SBP while on \n",
"ciprofloxacin ppx. He was not euvolemic at discharge, but was \n",
"felt to be stable and that diuresis could continue slowly at \n",
"home with close follow-up. Final diuresis plan: tolvaptan 30mg \n",
"daily, torsemide 40mg daily, and plan to set up weekly or \n",
"twice-weekly paracentesis outpatient through his new \n",
"hepatologist. \n",
"\n",
"# Paracentesis leak. ___ underwent LVP on ___, \n",
"complicated by persistent ascites leak. Pressure dressings were \n",
"applied without improvement in leaking site. A ___ was not \n",
"placed given this was unsuccessful during previous admission. \n",
"___ was advised to stay inpatient for monitoring of this \n",
"leaking paracentesis site. He deferred and was discharged to \n",
"home with ostomy bag covering paracentesis site. He understood \n",
"the risks of infection and need for re-admission if leak were to \n",
"continue. He will have ___ at home and will advise ___ to \n",
"monitor the area for infection. \n",
"\n",
"# Acute Hypoxemic & Chronic Hypercarbic Respiratory Failure\n",
"# COPD \n",
"___ came to MICU on ___ for hypoxemic respiratory failure \n",
"felt to be secondary to volume overload with poor lung reserves \n",
"from COPD. Stabilized on BIPAP and then weaned to nasal cannula \n",
"O2. Started on Lasix bolus and gtt with excellent diuresis. \n",
"Transitioned back to the floor and was weaned off of nasal \n",
"cannula several days prior to discharge. Interval CXR showed \n",
"improvement in pulmonary edema. He was continued on his CPAP at \n",
"night and COPD medications. Stable on room air at time of \n",
"discharge. \n",
"\n",
"# Atrial fibrillation with rapid ventricular response\n",
"Pt w known history of afib (CHADSVASC of 0, not on \n",
"anticoagulation), came in on diltiazem 60mg TID. Upon time of \n",
"transfer to MICU, ___ was in rapid A-fib with rates into the \n",
"120s-130s. Diltiazem was increased to 90 mg q.8H, which was well \n",
"tolerated and controlled rates well (90s-100s) when he was \n",
"transitioned back to the floor. Unfortunately hypotension became \n",
"an issues as his diuresis continued, and he was first switched \n",
"to metoprolol, also with continued hypotension of SBPs ___. \n",
"He was transitioned to digoxin as it was felt this would remove \n",
"any hypotensive side effects of rate-control medications and \n",
"allow for better diuresis. Levels were check and were 0.7 day \n",
"prior to discharge. He was discharged on 0.25mg daily with HRs \n",
"in 100s-110s. Plan for him to follow-up with a new cardiologist \n",
"at ___. He was counseled on s/sx of digoxin toxicity, and \n",
"that he was at increased risk for this due to other medications \n",
"he was on. He stated he will maintain a high suspicion for this \n",
"and seek medical attention if needed. \n",
"\n",
"# UTI, Enterococcal\n",
"Noted to be more somnolent on afternoon of ___ which did not \n",
"resolve despite increase in lactulose. Infectious work-up was \n",
"negative for SBP but did reveal enterococcus in urine. He was \n",
"treated broad-spectrium antibiotics that were eventually \n",
"narrowed to ampicillin for a total of 7d course. No dysuria or \n",
"other concerning symptoms at time of discharge. \n",
"\n",
"# Penoscrotal edema\n",
"___ noted this to develop during hospital course and was \n",
"painful. He did not have any difficulty urinating. He was seen \n",
"by urology team who attempted to reduce foreskin but were unable \n",
"as ___ did not tolerate the pain. Healthy appearing glans \n",
"without any concern for neurovascular compromise. Thought to be \n",
"due to total body volume overload, no concern for infection or \n",
"obstruction. Genitals were elevated and swelling diminished at \n",
"time of discharge. \n",
"\n",
"# Anemia\n",
"Appears to be combination of macro and microcytic. Some \n",
"contribution from iatrogenic phlebotomy in setting of poor \n",
"reserves so received 1u pRBCs during hospital course, no \n",
"evidence of bleeding. Iron levels were low, repletion should be \n",
"discussed with outpatient PCP. \n",
"\n",
"# Changes in care providers\n",
"___ requested that all of his care be changed from ___ to \n",
"___. We were unable to find PCP taking patients in a safe \n",
"interval for ___ to follow-up, so instead set up with PCP in \n",
"___. Unable to find hepatologist available per Care \n",
"Connection in ___ either, so was set up at ___ to follow \n",
"him outpatient. He was set up with a new cardiologist at \n",
"___. The importance of following up with providers was \n",
"discussed with the ___ to ensure safe coordination of his \n",
"outpatient care plan. \n",
"\n",
"CHRONIC ISSUES\n",
"# Hepatorenal syndrome\n",
"Continued on home Midodrine which was uptitrated to 15mg TID to \n",
"improve renal perfusion and aid in diuresis. Octreotide was not \n",
"used given history of bowel pseudo-obstruction. Discharged on \n",
"Midodrine 15mg TID. \n",
"\n",
"# Chronic pain\n",
"Largely in low back, likely exacerbated by diffuse fluid \n",
"overload. Maintained in home gabapentin, tramadol, and morphine \n",
"when appropriate. \n",
"\n",
"# Obstructive sleep apnea: Maintained on CPAP while sleeping. \n",
"\n",
"TRANSITIONAL ISSUES\n",
"----NEW MEDICATIONS----\n",
" * Tolvaptan for diuresis and hyponatremia \n",
" * Digoxin for afib rate control without blood pressure \n",
"side-effects\n",
" * Magnesium oxide for electrolyte repletion \n",
" * Bactrim for SBP prophylaxis as became infected while on Cipro \n",
"ppx \n",
"\n",
"----CHANGED MEDICATIONS----\n",
" * Midodrine dose increased\n",
" * Torsemide dose increased\n",
"\n",
"----STOPPED MEDICATIONS----\n",
" * Diltiazem stopped in favor of digoxin to decrease hypotensive \n",
"side effects\n",
" * Spironolactone stopped as unnecessary, can restart in future \n",
"if ___ requires more diuresis \n",
"\n",
"- It was recommended that ___ be discharged to rehab for \n",
"continued ___. ___ deferred and was able to state the risks \n",
"of returning home. \n",
"\n",
"___\n",
"- Please collect CBC and Chem10 on ___ and fax results to \n",
"attn: Dr. ___ at ___. \n",
"\n",
"[ ] Monitor volume status and adjust the dose of diuretics as \n",
"needed. \n",
"[ ] ___ sent home with ostomy bag over site of recent \n",
"paracentesis as he continued to leak fluid after therapeutic \n",
"paracentesis on ___. He was advised of the risks of infection \n",
"and ongoing fluid loss and stated his ___ would follow this up. \n",
"Please monitor site for infection and ongoing leakage. Per \n",
"interventional radiology, can try steristrips with tegaderm \n",
"(this was tried multiple times with leak soaking through). \n",
"[ ] Repeat BMP in one week after discharge and adjust diuretics \n",
"as needed. Be especially mindful of risk of developing \n",
"hypernatremia on tolvaptan and risk of medication toxicity (dig) \n",
"if there are changes to Cr. \n",
"[ ] Noted to have low ferratin and serum iron, consider starting \n",
"___ on repletion v. IV infusions as outpatient. \n",
"[ ] Monitor digoxin levels as appropriate.\n",
"[ ] Consider discontinuing tolvaptan once ___ is close to \n",
"euvolemia. \n",
"[ ] ___ decided to move all his care from ___ to ___, so \n",
"will follow up with new PCP, ___, and \n",
"outpatient cardiologist. \n",
"[ ] Will likely require ___ weekly paracentesis outpatient for \n",
"refractory ascites, to be coordinated in outpatient ___ \n",
"clinic. \n",
"\n",
"# DISCHARGE WEIGHT: 208 lbs\n",
"# DISCHARGE CREATININE: 0.9\n",
"# CODE STATUS: full\n",
"# CONTACT/HCP: ___ (wife) ___ \n",
"\n",
" \n",
"Medications on Admission:\n",
"The Preadmission Medication list is accurate and complete.\n",
"1. Albuterol 0.083% Neb Soln 1 NEB IH Q4H:PRN SOB \n",
"2. Diltiazem 60 mg PO TID \n",
"3. Fluorometholone 0.1% Ophth Susp. 1 DROP BOTH EYES BID \n",
"4. FoLIC Acid 1 mg PO DAILY \n",
"5. Gabapentin 300 mg PO BID \n",
"6. Pantoprazole 40 mg PO Q24H \n",
"7. Rifaximin 550 mg PO BID \n",
"8. Thiamine 100 mg PO DAILY \n",
"9. Midodrine 5 mg PO TID \n",
"10. Spironolactone 50 mg PO DAILY \n",
"11. Albuterol Inhaler 2 PUFF IH Q4H:PRN SOB \n",
"12. Dulera (mometasone-formoterol) 200-5 mcg/actuation \n",
"inhalation BID \n",
"13. Lactulose 30 mL PO TID \n",
"14. melatonin 3 mg oral QHS \n",
"15. Multivitamins 1 TAB PO DAILY \n",
"16. Tiotropium Bromide 1 CAP IH DAILY \n",
"17. TraMADol 50 mg PO Q6H:PRN Pain - Moderate \n",
"18. Sulfameth/Trimethoprim DS 1 TAB PO DAILY \n",
"19. Torsemide 20 mg PO DAILY \n",
"20. Morphine SR (MS ___ 15 mg PO Q12H \n",
"21. Polyethylene Glycol 17 g PO DAILY \n",
"\n",
" \n",
"Discharge Medications:\n",
"1. Digoxin 0.25 mg PO DAILY \n",
"RX *digoxin 250 mcg 1 tablet(s) by mouth once a day Disp #*30 \n",
"Tablet Refills:*0 \n",
"2. Magnesium Oxide 400 mg PO DAILY \n",
"RX *magnesium oxide 400 mg 1 tablet(s) by mouth once a day Disp \n",
"#*30 Tablet Refills:*0 \n",
"3. Midodrine 15 mg PO TID \n",
"RX *midodrine 10 mg 1.5 (One and a half) tablet(s) by mouth \n",
"three times a day Disp #*135 Tablet Refills:*0 \n",
"4. Multivitamins W/minerals 1 TAB PO DAILY \n",
"RX *multivitamin,tx-minerals [Vitamins and Minerals] 1 \n",
"tablet(s) by mouth once a day Disp #*30 Tablet Refills:*0 \n",
"5. Sulfameth/Trimethoprim DS 1 TAB PO DAILY \n",
"RX *sulfamethoxazole-trimethoprim 800 mg-160 mg 1 tablet(s) by \n",
"mouth once a day Disp #*30 Tablet Refills:*0 \n",
"6. Tolvaptan 30 mg PO DAILY \n",
"7. Torsemide 40 mg PO DAILY \n",
"RX *torsemide 20 mg 2 tablet(s) by mouth once a day Disp #*60 \n",
"Tablet Refills:*0 \n",
"8. Albuterol 0.083% Neb Soln 1 NEB IH Q4H:PRN SOB \n",
"9. Albuterol Inhaler 2 PUFF IH Q4H:PRN SOB \n",
"10. Dulera (mometasone-formoterol) 200-5 mcg/actuation \n",
"inhalation BID \n",
"11. Fluorometholone 0.1% Ophth Susp. 1 DROP BOTH EYES BID \n",
"12. FoLIC Acid 1 mg PO DAILY \n",
"13. Gabapentin 300 mg PO BID \n",
"14. Lactulose 30 mL PO TID \n",
"15. melatonin 3 mg oral QHS \n",
"16. Morphine SR (MS ___ 15 mg PO Q12H \n",
"17. Pantoprazole 40 mg PO Q24H \n",
"18. Polyethylene Glycol 17 g PO DAILY \n",
"19. Rifaximin 550 mg PO BID \n",
"20. Thiamine 100 mg PO DAILY \n",
"21. Tiotropium Bromide 1 CAP IH DAILY \n",
"22. TraMADol 50 mg PO Q6H:PRN Pain - Moderate \n",
"23. HELD- Spironolactone 50 mg PO DAILY This medication was \n",
"held. Do not restart Spironolactone until your liver doctor \n",
"tells you to. \n",
"24.Outpatient Lab Work\n",
"ICD 10: K70.31\n",
"Please collect CBC and Chem10 on ___ and fax results to attn: \n",
"Dr. ___ at ___ \n",
"\n",
" \n",
"Discharge Disposition:\n",
"Home With Service\n",
" \n",
"Facility:\n",
"___\n",
" \n",
"Discharge Diagnosis:\n",
"Primary Diagnoses\n",
"- Decompensated cirrhosis\n",
"- Spontaneous bacterial peritonitis \n",
"- Volume overload\n",
"- Hyponatremia\n",
"- Hepatic encephalopathy \n",
"- Acute hypoxemic on chronic hypercarbic respiratory failure \n",
"- Urinary tract infection \n",
"- Atrial fibrillation with rapid ventricular response \n",
"- Penoscrotal edema \n",
"\n",
"Secondary diagnoses\n",
"- Hepatorenal syndrome\n",
"- Chronic pain\n",
"- Chronic obstructive pulmonary disease\n",
"- Obstructive sleep apnea \n",
"\n",
" \n",
"Discharge Condition:\n",
"Mental Status: Clear and coherent.\n",
"Level of Consciousness: Alert and interactive.\n",
"Activity Status: Ambulatory - requires assistance or aid (walker \n",
"or cane).\n",
"\n",
" \n",
"Discharge Instructions:\n",
"Dear Mr. ___, \n",
"\n",
"WHY DID YOU COME TO THE HOSPITAL?\n",
" - You were having trouble breathing and extra swelling in your \n",
"legs after your trip to ___. \n",
"\n",
"WHAT HAPPENED TO YOU DURING YOUR HOSPITAL STAY?\n",
" - We gave you medication to help your kidneys get rid of the \n",
"extra fluid. \n",
" - We found that you have an infection in your bladder and gave \n",
"you antibiotics. \n",
" - We changed some of your heart and liver medications to \n",
"improve your health. \n",
"\n",
"WHAT SHOULD YOU DO WHEN YOU LEAVE THE HOSPITAL?\n",
"1) Please take your medications as below\n",
"----NEW MEDICATIONS----\n",
" * Digoxin (for atrial fibrillation/rapid heart rate) \n",
"\n",
"----CHANGED MEDICATIONS----\n",
" * \n",
"\n",
"----STOPPED MEDICATIONS----\n",
" * Diltiazem - this was for your atrial fibrillation however it \n",
"also caused you to have low blood pressure so it was stopped and \n",
"replaced with digoxin. \n",
" * Spironolactone - stopped for now, your liver doctor might \n",
"decide to restart it in the future if you need another \n",
"medication to help remove extra fluid. \n",
"\n",
"2) Attend all of your follow-up appointments with your doctors \n",
"as ___.\n",
"\n",
"3) Please weight yourself every morning and call your doctor if \n",
"your weight increases or decreases by more than 3 pounds per \n",
"day. Your weight on discharge is 308 lbs.\n",
"\n",
"4) You need to get a blood test next week to make sure your \n",
"electrolytes are normal. \n",
"\n",
"5) PLEASE QUIT SMOKING! You can call the ___ Smokers \n",
"Hotline at ___ or visit their \n",
"website at ___ for support including \n",
"counselors and free nicotine patches.\n",
"\n",
"We wish you the best in your recovery and it was a pleasure to \n",
"care for you. \n",
"\n",
" \n",
"Followup Instructions:\n",
"___\n",
"\n"
]
}
],
"source": [
"# Example note - admitted to ICU\n",
"print(df_disch[df_disch.hadm_id == \"24903681\"].iloc[0].text)"
]
},
{
"cell_type": "code",
"execution_count": 45,
"id": "44c03f40",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"3460"
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(df_disch[df_disch.hadm_id == \"24903681\"].iloc[0].text.split())"
]
},
{
"cell_type": "code",
"execution_count": 46,
"id": "a3642528",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" \n",
"Name: ___ Unit No: ___\n",
" \n",
"Admission Date: ___ Discharge Date: ___\n",
" \n",
"Date of Birth: ___ Sex: F\n",
" \n",
"Service: MEDICINE\n",
" \n",
"Allergies: \n",
"Keflex / Avelox / Nifedipine / Pulmicort\n",
" \n",
"Attending: ___.\n",
" \n",
"Chief Complaint:\n",
"Progressive shortness of breath\n",
"\n",
" \n",
"Major Surgical or Invasive Procedure:\n",
"___ Diagnostic cardiac catheterization \n",
"1. Right dominant system demonstrated no obstructive disease. \n",
"The\n",
"LMCA, LAD, LCx, and RCA were all normal 2. Limited resting \n",
"hemodynamics revealed a normal right-sidedfilling pressure with \n",
"an RVEDP of 8mmHg and a borderline normal wedge pressure with a \n",
"mean PCWP of 11 mmHg. The PASP was moderately elevated at 50 \n",
"mmHg. The cardiac output was preserved with a CI of 2.8 \n",
"L/min/m2. 3. A supravalvular aortogram showed no aortic \n",
"regurgiation and wide\n",
"sinus of Valsalva widths. The distal aortogram showed minimal\n",
"obstructive disease \n",
"\n",
" \n",
"\n",
" \n",
"\n",
" \n",
"History of Present Illness:\n",
"___ yo woman with a history of hepatopulmonary syndrome who has \n",
"developed progressive shortness of breath due to critical aortic \n",
"stenosis. She currently reports exertional dyspnea at ___ blocks \n",
"that requires that she stop and rest. She does not have chest \n",
"pain and has not had any episodes of dizziness or syncope with \n",
"the episodes. She has become markedly limited in her activity \n",
"over the past several months. She is ___ Class III and has been \n",
"on supplememtal oxygen logstanding. She is referred for \n",
"evaluation for aortic valve replacement.\n",
" \n",
"\n",
" \n",
"Past Medical History:\n",
"Aortic stenosis \n",
"Hepatic cirrhosis \n",
"Hepatopulmonary syndrome\n",
"Pulmonary hypertension\n",
"Esophageal varices- s/p bleed ___\n",
"Prior ETOH use\n",
"Pulmonary nodules by chest CT stable since ___\n",
"Arthritis of spine and hips\n",
"Basal cell cancer (removed from lip) \n",
" \n",
"Social History:\n",
"___\n",
"Family History:\n",
"Father had emphysema. \n",
"Mother has alzheimer's and is ___\n",
" \n",
"Physical Exam:\n",
"Patient seen an examined upon arrival to ___ 4 pre cardiac \n",
"cathetrization\n",
"VS: 100-110/60-70 HR 80 SR RR 16 2L NP sats 96%\n",
"Lungs CTA \n",
"Heart RRR IV/VI SEM\n",
"ABD NT + BS\n",
"PV Fem 2+ no bruits\n",
" DPs 1+ b/l no edema\n",
" \n",
"Pertinent Results:\n",
"___ 12:30PM BLOOD WBC-4.9 RBC-4.52 Hgb-14.6 Hct-42.8 MCV-95 \n",
"MCH-32.4* MCHC-34.2 RDW-13.1 Plt ___\n",
"___ 12:30PM BLOOD Plt ___\n",
"___ 12:30PM BLOOD ___ PTT-27.4 ___\n",
"___ 12:30PM BLOOD Glucose-104* UreaN-12 Creat-0.7 Na-135 \n",
"K-4.0 Cl-101 HCO3-23 AnGap-15\n",
"___ 12:30PM BLOOD ALT-25 AST-44* AlkPhos-89 TotBili-1.9*\n",
"\n",
"Carotid artery duplex: No evidence of carotid artery stenosis \n",
"bilaterally. \n",
" \n",
"\n",
"Chest CT:IMPRESSION: \n",
"1. Dense atherosclerotic calcification of the aortic valve but \n",
"without annular calcifications. Mild calcifications of the \n",
"thoracic aorta which is of normal caliber. 2. Mild peripheral \n",
"interstitial abnormality, but no pulmonary nodules identified. \n",
"3. Nodular shrunken appearance to the liver compatible with \n",
"cirrhosis. 4. Hiatal hernia with thickening of the mucosa in the \n",
"region of the hernia. This could be explained by the patient's \n",
"history of varices, although gastritis or masses are not \n",
"excluded. If indicated, further evaluation could be performed \n",
"with endoscopy. \n",
"\n",
"Gallbladder/Liver ultrasound: 1. Non-occlusive thrombus in the \n",
"left portal vein. 2. Cirrhotic liver with lobulated contour and \n",
"heterogeneous echotexture. No evidence for focal liver masses. \n",
"3. Echogenic appearance of the renal cortex consistent with \n",
"medical renal disease. 4. Small adherent lesion to the \n",
"gallbladder wall may represent a small polyp versus an adherent \n",
"stone. Dr. ___ was paged after completion of the study to \n",
"discuss these findings. \n",
"\n",
"Duplex Abdomen: 1. Non-occlusive thrombus in the left portal \n",
"vein. \n",
"2. Cirrhotic liver with lobulated contour and heterogeneous \n",
"echotexture. No evidence for focal liver masses. 3. Echogenic \n",
"appearance of the renal cortex consistent with medical renal \n",
"disease. 4. Small adherent lesion to the gallbladder wall may \n",
"represent a small polyp versus an adherent stone. Dr. ___ was \n",
"paged after completion of the study to discuss these findings. \n",
"\n",
" \n",
"\n",
" \n",
"Brief Hospital Course:\n",
"___ yo female admitted for cardiac catheterization and evaluation \n",
"of her critical aortic stenosis. Patient was initially \n",
"consulted to cardiac surgery for evaluation for AVR however \n",
"declined to due history of hepatic cirrhosis with esophageal \n",
"varices. Patient was then seen by Dr. ___ evaluation of \n",
"CORE VALVE Trail/TAVI and workup was initiated (carotid duplex, \n",
"chest CT, abdominal ultrasound). Due to her esophageal varices \n",
"it was recommended by GI that she have EGD with possible banding \n",
"of varices dependent upon grade. She was originally scheduled \n",
"for EDG on ___ however due to overwhelming anxiety and \n",
"patients wish to go home for a few days the procedure was \n",
"postponed and will be arranged on an outpatient basis. Patient \n",
"remained hemodynamically stable throughout her hospitalization \n",
"and will be discharged to home with CT angio on ___ and \n",
"EGD/varice banding on ___ with GI. She will follow up Dr. \n",
"___ to complete Core Valve study evaluation.\n",
" \n",
"Medications on Admission:\n",
"ALBUTEROL SULFATE - 90 mcg HFA Aerosol Inhaler - one puf TID as \n",
"needed / dyspnea\n",
"FUROSEMIDE - 40 mg Tablet - one Tablet BID\n",
"POTASSIUM CHLORIDE [KLOR-CON M20] - 20 mEq Tablet - 2 Tablets \n",
"BID\n",
"SILDENAFIL [REVATIO] - 20 mg Tablet - one Tablet TID\n",
"SPIRONOLACTONE - 50 mg Tablet - one Tablet BID\n",
"ZOLPIDEM - 10 mg Tablet - one Tablet as needed\n",
"MULTIVITAMIN - 1 Tablet, Chewable - daily\n",
" \n",
"Discharge Medications:\n",
"1. furosemide 40 mg Tablet Sig: One (1) Tablet PO BID (2 times a \n",
"day). \n",
"2. spironolactone 50 mg Tablet Sig: One (1) Tablet PO twice a \n",
"day. \n",
"3. potassium chloride 20 mEq Packet Sig: Two (2) Packet PO BID \n",
"(2 times a day). \n",
"4. Revatio 20 mg Tablet Sig: One (1) Tablet PO three times a \n",
"day. \n",
"5. zolpidem 10 mg Tablet Sig: One (1) Tablet PO at bedtime. \n",
"6. multivitamin Tablet, Chewable Sig: One (1) Tablet, \n",
"Chewable PO once a day. \n",
"7. albuterol sulfate 90 mcg/Actuation HFA Aerosol Inhaler Sig: \n",
"One (1) puff Inhalation three times a day as needed for \n",
"shortness of breath or wheezing. \n",
"8. Ativan 0.5 mg Tablet Sig: ___ Tablet PO every twelve (12) \n",
"hours as needed for anxiety. \n",
"\n",
" \n",
"Discharge Disposition:\n",
"Home\n",
" \n",
"Discharge Diagnosis:\n",
"Aortic stenosis\n",
"Hepatic cirrhosis \n",
"Hepatopulmonary syndrome- pulmonary hypertension\n",
"Esophageal varices \n",
" - s/p bleed ___\n",
"Pulmonary nodules by chest CT stable since ___\n",
"Arthritis of spine and hips\n",
"Basal cell cancer removed from lip \n",
"Hemorrhoid surgery\n",
"\n",
" \n",
"Discharge Condition:\n",
"___ yo with a history of hepatopulmonary syndrome who has \n",
"developed progressive shortness of breath due to critical aortic \n",
"stenosis. Referred for cardiac cath and surgical eval for AVR. \n",
"Declined by ___ due to bleed risk in setting of liver disease. \n",
"Post cath recovery uneventful. She remained hemodynamically \n",
"stable with no changes made in meds or diuretic therapy. \n",
"Remainder of hospitalization for evaluation/diagnostic testing \n",
"pre TAVI by Dr ___. \n",
" \n",
"She is discharged on all current meds. Will return for CTA of \n",
"the torso. ___ will contact Mrs ___ after discharge in \n",
"ongoing coordination and scheduling for return date. She is \n",
"tentatively scheduled for esophageal banding on ___ with \n",
"Dr. ___ will call to confirm procedure date and time with \n",
"patient upon discharge. \n",
" \n",
"Cardiac cath: \n",
"1. Coronary angiography in this right dominant demonstrated no \n",
"obstructive disease. The LMCA, LAD, LCx, and RCA were all \n",
"normal. \n",
"2. Limited resting hemodynamics revealed a normal right-sided \n",
"filling pressure with an RVEDP of 8mmHg and a borderline normal \n",
"wedge \n",
"pressure with a mean PCWP of 11 mmHg. The PASP was moderately \n",
"elevated at 50 mmHg. The cardiac output was preserved with a CI \n",
"of 2.8 L/min/m2. \n",
"3. A supravalvular aortogram showed no aortic regurgiation and \n",
"wide \n",
"sinus of Valsalva widths. The distal aortogram showed minimal \n",
"obstructive disease. \n",
" \n",
" \n",
"Mental Status: Clear and coherent. Level of Consciousness: Alert \n",
"and interactive. Activity Status: Ambulatory - Independent. \n",
" \n",
"VS 118/65 HR ___ SR \n",
"Lungs CTA \n",
"Heart RRR IV/VI SEM \n",
"PV R fem access - hematoma or bruit \n",
"labs none at discharge \n",
"#1. Aortic Stenosis - critical AS by cath, declined by c-surg \n",
"a. No obstructive CAD by cath \n",
"b. Outpatient CT angio on ___ \n",
"c. Follow up with Dr. ___: ___ \n",
"#2. Esophageal Varices - stable \n",
"a. Outpatient EGD tentatively scheduled for ___ with Dr. ___ \n",
"___ possible banding as part of pre-op eval for TAVI \n",
"#3. CHF - NYHA class II-III/Stage C \n",
"a. Continue furosemide/aldactone/potassium \n",
"b. Daily weights, call PCP ___ 3 pound weight gain in 2 days or \n",
"5 pound weight gain in 5 days \n",
"c. Low sodium diet \n",
"#4. Pulmonary HTN -stable \n",
"a. Revatio 20 mg tid \n",
"Condition: Good \n",
"Disp: Home with Follow up with Dr. ___ to \n",
"arrange) and follow with GI/Liver on ___ (GI will call \n",
"patient with time). \n",
"\n",
" \n",
"Discharge Instructions:\n",
" \n",
" You were admitted for a cardiac catheterization as part of an \n",
"evaulation of your aortic stenosis. Due to the high surgical \n",
"risk for bleeding due to your liver disease you are not a \n",
"candidate for valve replacement by surgery. Dr ___ will \n",
"perform the aortic valve replacement by a nonsurgical approach \n",
"in the future. He has discussed this in detail with you. If you \n",
"have any questions you may reach Dr ___ at ___. Also \n",
"the nurse practitioner who coordinates this program - ___ \n",
"___ - may be reached at ___.\n",
" You have been scheduled for a CT angiogram of the \n",
"chest/abdomen ___ at 10AM\n",
" You have been tentatively scheduled for a procedure with Dr. \n",
"___ to evaluate and treat your esophagus. The date is \n",
"___ however someone will call to confirm this with you. \n",
" Continue all current medicines as prescribed. You may take \n",
"___ tablet of a 0.5 mg ativan every 12 hours as needed for \n",
"anxiety. Do not drive when taking ativan. \n",
" \n",
"Followup Instructions:\n",
"___\n",
"\n"
]
}
],
"source": [
"# Example note - not admitted to ICU\n",
"print(df_disch[df_disch.hadm_id == \"24117249\"].iloc[0].text)"
]
},
{
"cell_type": "code",
"execution_count": 47,
"id": "9ae3f01f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1534"
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(df_disch[df_disch.hadm_id == \"24117249\"].iloc[0].text.split())"
]
},
{
"cell_type": "code",
"execution_count": 48,
"id": "c47ba441",
"metadata": {},
"outputs": [],
"source": [
"df_disch['text_len'] = df_disch['text'].map(len)\n",
"df_disch['word_cnt'] = df_disch['text'].map(lambda x: len(x.split()))"
]
},
{
"cell_type": "code",
"execution_count": 49,
"id": "c19b6cec",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" text_len word_cnt\n",
"count 331794.000000 331794.00000\n",
"mean 10550.969767 1600.27430\n",
"std 4452.120954 666.99493\n",
"min 353.000000 44.00000\n",
"25% 7462.000000 1135.00000\n",
"50% 9847.000000 1501.00000\n",
"75% 12831.000000 1952.00000\n",
"max 60381.000000 9026.00000\n"
]
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 864x288 with 2 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"print(df_disch[['text_len', 'word_cnt']].describe())\n",
"plt.figure(figsize=(12, 4))\n",
"plt.subplot(1, 2, 1)\n",
"sns.histplot(df_disch['text_len']); plt.grid()\n",
"plt.subplot(1, 2, 2)\n",
"sns.histplot(df_disch['word_cnt']); plt.grid()"
]
},
{
"cell_type": "code",
"execution_count": 50,
"id": "acb5edf5",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"145915"
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"total_note_subject_ids = set(df_disch['subject_id'])\n",
"len(total_note_subject_ids)"
]
},
{
"cell_type": "code",
"execution_count": 51,
"id": "1e583d45",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"331794"
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"total_note_hadm_ids = set(df_disch['hadm_id'])\n",
"len(total_note_hadm_ids)"
]
},
{
"cell_type": "code",
"execution_count": 52,
"id": "84b18dec",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"66239"
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"icd_hadm_ids = set(df_icu['hadm_id'])\n",
"len(icd_hadm_ids)"
]
},
{
"cell_type": "code",
"execution_count": 53,
"id": "4db40658",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"HADMs with discharge summary AND ICU admission : 65330\n",
"HADMs with discharge summary AND no ICU admission: 266464\n"
]
}
],
"source": [
"icu_note_hadm_ids = total_note_hadm_ids & icd_hadm_ids\n",
"noicd_note_hadm_ids = total_note_hadm_ids - icd_hadm_ids\n",
"print(f'HADMs with discharge summary AND ICU admission : {len(icu_note_hadm_ids)}')\n",
"print(f'HADMs with discharge summary AND no ICU admission: {len(noicd_note_hadm_ids)}')"
]
},
{
"cell_type": "code",
"execution_count": 54,
"id": "9f690251",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['21203131', '28775682', '28244450', '26965345', '26065396']\n",
"['21753812', '25003596', '24663864', '27850472', '25603743']\n"
]
}
],
"source": [
"print(list(icu_note_hadm_ids)[:5])\n",
"print(list(noicd_note_hadm_ids)[:5])"
]
},
{
"cell_type": "code",
"execution_count": 55,
"id": "ba722453",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>note_id</th>\n",
" <th>subject_id</th>\n",
" <th>hadm_id</th>\n",
" <th>note_type</th>\n",
" <th>note_seq</th>\n",
" <th>charttime</th>\n",
" <th>storetime</th>\n",
" <th>text</th>\n",
" <th>text_len</th>\n",
" <th>word_cnt</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>235463</th>\n",
" <td>17109926-DS-20</td>\n",
" <td>17109926</td>\n",
" <td>20671539</td>\n",
" <td>DS</td>\n",
" <td>20</td>\n",
" <td>2132-05-09 00:00:00</td>\n",
" <td>2132-05-09 11:09:00</td>\n",
" <td>Name: ___ Unit No: _...</td>\n",
" <td>6336</td>\n",
" <td>916</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" note_id subject_id hadm_id note_type note_seq \\\n",
"235463 17109926-DS-20 17109926 20671539 DS 20 \n",
"\n",
" charttime storetime \\\n",
"235463 2132-05-09 00:00:00 2132-05-09 11:09:00 \n",
"\n",
" text text_len word_cnt \n",
"235463 \n",
"Name: ___ Unit No: _... 6336 916 "
]
},
"execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_disch[df_disch.hadm_id == '20671539']"
]
},
{
"cell_type": "code",
"execution_count": 56,
"id": "aa4b2c97",
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>note_id</th>\n",
" <th>subject_id</th>\n",
" <th>hadm_id</th>\n",
" <th>note_type</th>\n",
" <th>note_seq</th>\n",
" <th>charttime</th>\n",
" <th>storetime</th>\n",
" <th>text</th>\n",
" <th>text_len</th>\n",
" <th>word_cnt</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>135697</th>\n",
" <td>14097607-DS-54</td>\n",
" <td>14097607</td>\n",
" <td>22963358</td>\n",
" <td>DS</td>\n",
" <td>54</td>\n",
" <td>2198-07-19 00:00:00</td>\n",
" <td>2198-07-19 15:22:00</td>\n",
" <td>Name: ___ Unit No: ___\n",
" \n",
"...</td>\n",
" <td>7738</td>\n",
" <td>1133</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" note_id subject_id hadm_id note_type note_seq \\\n",
"135697 14097607-DS-54 14097607 22963358 DS 54 \n",
"\n",
" charttime storetime \\\n",
"135697 2198-07-19 00:00:00 2198-07-19 15:22:00 \n",
"\n",
" text text_len word_cnt \n",
"135697 \n",
"Name: ___ Unit No: ___\n",
" \n",
"... 7738 1133 "
]
},
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_disch[df_disch.hadm_id == '22963358']"
]
},
{
"cell_type": "code",
"execution_count": 57,
"id": "87ad1359",
"metadata": {},
"outputs": [],
"source": [
"icu_note_word_cnt = df_disch[df_disch.hadm_id.map(lambda x: x in icu_note_hadm_ids)].word_cnt\n",
"noicu_note_word_cnt = df_disch[df_disch.hadm_id.map(lambda x: x in noicd_note_hadm_ids)].word_cnt"
]
},
{
"cell_type": "code",
"execution_count": 58,
"id": "482a5cc3",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ICU</th>\n",
" <th>NO ICU</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>65330.000000</td>\n",
" <td>266464.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>1881.747528</td>\n",
" <td>1531.264430</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>780.948368</td>\n",
" <td>616.639497</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>44.000000</td>\n",
" <td>63.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>1341.000000</td>\n",
" <td>1096.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>1755.000000</td>\n",
" <td>1446.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>2281.000000</td>\n",
" <td>1871.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>9026.000000</td>\n",
" <td>7890.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" ICU NO ICU\n",
"count 65330.000000 266464.000000\n",
"mean 1881.747528 1531.264430\n",
"std 780.948368 616.639497\n",
"min 44.000000 63.000000\n",
"25% 1341.000000 1096.000000\n",
"50% 1755.000000 1446.000000\n",
"75% 2281.000000 1871.000000\n",
"max 9026.000000 7890.000000"
]
},
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"sns.kdeplot(icu_note_word_cnt, label='ICU')\n",
"sns.kdeplot(noicu_note_word_cnt, label='NO ICU')\n",
"plt.legend(); plt.grid()\n",
"\n",
"pd.DataFrame({\"ICU\": icu_note_word_cnt.describe(), \"NO ICU\": noicu_note_word_cnt.describe()})"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bf417420",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "70b42c84",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment