Skip to content

Instantly share code, notes, and snippets.

@fonnesbeck
Created July 30, 2015 19:18
Show Gist options
  • Save fonnesbeck/fa3f556df2b4c786d6f1 to your computer and use it in GitHub Desktop.
Save fonnesbeck/fa3f556df2b4c786d6f1 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"%matplotlib inline\n",
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"from datetime import datetime\n",
"import seaborn as sb\n",
"import pymc as pm\n",
"sb.set_style(\"white\")"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/lib/python3.4/site-packages/pandas/io/parsers.py:1170: DtypeWarning: Columns (140,142,144,146,148,181,206,212,213,263,282,283,284,298,299) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" data = self._reader.read(nrows)\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>greater_48hrs</th>\n",
" <th>fever_neutropenia</th>\n",
" <th>never_left</th>\n",
" <th>written_consent</th>\n",
" <th>child_name</th>\n",
" <th>mother_name</th>\n",
" <th>mother_birth_date</th>\n",
" <th>mother_record</th>\n",
" <th>mother_nationality</th>\n",
" <th>other_mother_nationality</th>\n",
" <th>...</th>\n",
" <th>was_whole_blood_obtained_f</th>\n",
" <th>date</th>\n",
" <th>whole_blood_complete</th>\n",
" <th>age_months</th>\n",
" <th>length_of_stay</th>\n",
" <th>gest_age</th>\n",
" <th>death</th>\n",
" <th>hospitalized_vitamin_d</th>\n",
" <th>wheezing_ind</th>\n",
" <th>sex</th>\n",
" </tr>\n",
" <tr>\n",
" <th>case_id</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>A0001</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>Remas Mahmoud Jbarah</td>\n",
" <td>Huda Katalo</td>\n",
" <td>1976-01-21</td>\n",
" <td>NaN</td>\n",
" <td>3</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>6</td>\n",
" <td>40</td>\n",
" <td>False</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>F</td>\n",
" </tr>\n",
" <tr>\n",
" <th>A0002</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>Majed Abdel Kareem Majed</td>\n",
" <td>Noor SHa'aban Mahmood</td>\n",
" <td>1989-09-09</td>\n",
" <td>NaN</td>\n",
" <td>3</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" <td>40</td>\n",
" <td>False</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>M</td>\n",
" </tr>\n",
" <tr>\n",
" <th>A0003</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>Rayyan Jamal Muhyi Al.Deen</td>\n",
" <td>SAra Hussein Muhyi Al.Deen</td>\n",
" <td>1965-01-01</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>11</td>\n",
" <td>10</td>\n",
" <td>40</td>\n",
" <td>False</td>\n",
" <td>35</td>\n",
" <td>0</td>\n",
" <td>F</td>\n",
" </tr>\n",
" <tr>\n",
" <th>A0004</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>Hanan Mohd Mustapha Abu Othman</td>\n",
" <td>Kawla Abu Shanab</td>\n",
" <td>1983-10-31</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>7</td>\n",
" <td>3</td>\n",
" <td>38</td>\n",
" <td>False</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>F</td>\n",
" </tr>\n",
" <tr>\n",
" <th>A0005</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>Yara Mahmoud Azmi Ismael</td>\n",
" <td>Suha Abdel Aziz</td>\n",
" <td>1986-02-28</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>39</td>\n",
" <td>False</td>\n",
" <td>6</td>\n",
" <td>0</td>\n",
" <td>F</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 414 columns</p>\n",
"</div>"
],
"text/plain": [
" greater_48hrs fever_neutropenia never_left written_consent \\\n",
"case_id \n",
"A0001 0 0 0 1 \n",
"A0002 0 0 0 1 \n",
"A0003 0 0 0 1 \n",
"A0004 0 0 0 1 \n",
"A0005 0 0 0 1 \n",
"\n",
" child_name mother_name \\\n",
"case_id \n",
"A0001 Remas Mahmoud Jbarah Huda Katalo \n",
"A0002 Majed Abdel Kareem Majed Noor SHa'aban Mahmood \n",
"A0003 Rayyan Jamal Muhyi Al.Deen SAra Hussein Muhyi Al.Deen \n",
"A0004 Hanan Mohd Mustapha Abu Othman Kawla Abu Shanab \n",
"A0005 Yara Mahmoud Azmi Ismael Suha Abdel Aziz \n",
"\n",
" mother_birth_date mother_record mother_nationality \\\n",
"case_id \n",
"A0001 1976-01-21 NaN 3 \n",
"A0002 1989-09-09 NaN 3 \n",
"A0003 1965-01-01 NaN 1 \n",
"A0004 1983-10-31 NaN 1 \n",
"A0005 1986-02-28 NaN 1 \n",
"\n",
" other_mother_nationality ... was_whole_blood_obtained_f date \\\n",
"case_id ... \n",
"A0001 NaN ... NaN NaN \n",
"A0002 NaN ... NaN NaN \n",
"A0003 NaN ... NaN NaN \n",
"A0004 NaN ... NaN NaN \n",
"A0005 NaN ... NaN NaN \n",
"\n",
" whole_blood_complete age_months length_of_stay gest_age death \\\n",
"case_id \n",
"A0001 0 1 6 40 False \n",
"A0002 0 1 5 40 False \n",
"A0003 0 11 10 40 False \n",
"A0004 0 7 3 38 False \n",
"A0005 0 2 1 39 False \n",
"\n",
" hospitalized_vitamin_d wheezing_ind sex \n",
"case_id \n",
"A0001 3 0 F \n",
"A0002 4 1 M \n",
"A0003 35 0 F \n",
"A0004 2 1 F \n",
"A0005 6 0 F \n",
"\n",
"[5 rows x 414 columns]"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"hospitalized = pd.read_csv('data/hospitalized.csv', index_col=0)\n",
"hospitalized.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Convert dates"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"hospitalized.child_birth_date = pd.to_datetime(hospitalized.child_birth_date)\n",
"hospitalized.enrollment_date = pd.to_datetime(hospitalized.enrollment_date)\n",
"hospitalized.admission_date = pd.to_datetime(hospitalized.admission_date)\n",
"hospitalized.discharge_date = pd.to_datetime(hospitalized.discharge_date)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Assign virus year"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"2012 1191\n",
"2013 1179\n",
"2011 798\n",
"dtype: int64"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"hospitalized['virus_year'] = 2011\n",
"hospitalized.loc[(hospitalized.admission_date >= '2011-03-31') \n",
" & (hospitalized.admission_date <= '2012-03-31'), 'virus_year'] = 2012\n",
"hospitalized.loc[hospitalized.admission_date > '2012-03-31', 'virus_year'] = 2013\n",
"\n",
"hospitalized.virus_year.value_counts()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Extract RSV subset"
]
},
{
"cell_type": "code",
"execution_count": 72,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"7"
]
},
"execution_count": 72,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"hospitalized['RSV'] = hospitalized['pcr_result___1']\n",
"RSV_subset = hospitalized[hospitalized.RSV==1]\n",
"RSV_subset.death.sum()"
]
},
{
"cell_type": "code",
"execution_count": 78,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0.08110065170166546"
]
},
"execution_count": 78,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"RSV_subset.icu.mean()"
]
},
{
"cell_type": "code",
"execution_count": 73,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"deaths = RSV_subset[RSV_subset.death==1]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Dictionary to hold samples"
]
},
{
"cell_type": "code",
"execution_count": 79,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"random_sample = {2011: [], 2012: [], 2013: []}"
]
},
{
"cell_type": "code",
"execution_count": 80,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"for i,d in deaths.iterrows():\n",
" random_sample[d.virus_year].append(i)"
]
},
{
"cell_type": "code",
"execution_count": 81,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"for year in random_sample:\n",
" \n",
" year_subset = RSV_subset[RSV_subset.virus_year==year]\n",
" \n",
" n_required = 31 - len(random_sample[year])\n",
" \n",
" # Determine number in each group\n",
" n_oxygen = int(n_required * 0.4)\n",
" n_vent_icu = int(n_required * 0.1)\n",
" n_none = n_required - n_oxygen - n_vent\n",
"\n",
" # Mechanical vent or ICU patients\n",
" random_sample[year] += year_subset[(year_subset.vent==1) | (year_subset.icu==1)].sample(n=n_vent).index.values.tolist()\n",
" # Oxygen patients\n",
" random_sample[year] += year_subset[(year_subset.oxygen==1) & (year_subset.vent==0)].sample(n=n_oxygen).index.values.tolist() \n",
" # No oxygen or ventilator\n",
" random_sample[year] += year_subset[(year_subset.oxygen==0) & (year_subset.vent==0)].sample(n=n_none).index.values.tolist()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Here is the sample from each study year"
]
},
{
"cell_type": "code",
"execution_count": 82,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"{2011: ['B1191',\n",
" 'D3040',\n",
" 'C2104',\n",
" 'B1146',\n",
" 'B1105',\n",
" 'A0077',\n",
" 'A0137',\n",
" 'D3097',\n",
" 'D3080',\n",
" 'A0024',\n",
" 'C2015',\n",
" 'A0155',\n",
" 'A0109',\n",
" 'C2001',\n",
" 'C2138',\n",
" 'A0104',\n",
" 'C2086',\n",
" 'D3128',\n",
" 'A0182',\n",
" 'B1160',\n",
" 'D3109',\n",
" 'A0121',\n",
" 'D3142',\n",
" 'C2022',\n",
" 'D3089',\n",
" 'B1134',\n",
" 'B1030',\n",
" 'B1172',\n",
" 'B1165',\n",
" 'A0125',\n",
" 'B1193'],\n",
" 2012: ['A0368',\n",
" 'B1515',\n",
" 'D3416',\n",
" 'A0366',\n",
" 'D3374',\n",
" 'A0456',\n",
" 'C2382',\n",
" 'A0391',\n",
" 'C2383',\n",
" 'C2363',\n",
" 'D3292',\n",
" 'C2448',\n",
" 'D3436',\n",
" 'C2357',\n",
" 'D3430',\n",
" 'D3354',\n",
" 'C2353',\n",
" 'B1396',\n",
" 'D3346',\n",
" 'B1375',\n",
" 'C2402',\n",
" 'A0371',\n",
" 'A0430',\n",
" 'B1522',\n",
" 'C2464',\n",
" 'B1451',\n",
" 'D3345',\n",
" 'B1380',\n",
" 'C2459',\n",
" 'D3449',\n",
" 'C2462'],\n",
" 2013: ['A0718',\n",
" 'B1594',\n",
" 'D3731',\n",
" 'D3539',\n",
" 'A0614',\n",
" 'B1831',\n",
" 'C2637',\n",
" 'B1559',\n",
" 'D3633',\n",
" 'D3762',\n",
" 'C2673',\n",
" 'D3677',\n",
" 'B1724',\n",
" 'A0753',\n",
" 'B1752',\n",
" 'A0675',\n",
" 'A0720',\n",
" 'B1554',\n",
" 'B1568',\n",
" 'A0515',\n",
" 'A0683',\n",
" 'B1767',\n",
" 'A0748',\n",
" 'D3714',\n",
" 'D3679',\n",
" 'D3629',\n",
" 'C2779',\n",
" 'D3667',\n",
" 'B1772',\n",
" 'C2708',\n",
" 'A0530']}"
]
},
"execution_count": 82,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"random_sample"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.4.2"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment