Skip to content

Instantly share code, notes, and snippets.

@fonnesbeck
Created January 1, 2017 22:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save fonnesbeck/a6080ff94a62c2e72c5601b6045b1853 to your computer and use it in GitHub Desktop.
Save fonnesbeck/a6080ff94a62c2e72c5601b6045b1853 to your computer and use it in GitHub Desktop.
Jordan Randomization.ipynb
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"metadata": {
"collapsed": false,
"trusted": true
},
"cell_type": "code",
"source": "import pandas as pd\nimport numpy as np\nfrom datetime import datetime",
"execution_count": 1,
"outputs": []
},
{
"metadata": {
"collapsed": false,
"trusted": true
},
"cell_type": "code",
"source": "hospitalized = pd.read_csv('data/hospitalized.csv', index_col=0)\n#hospitalized.head()",
"execution_count": 33,
"outputs": [
{
"output_type": "stream",
"text": "/Users/fonnescj/anaconda3/envs/dev/lib/python3.5/site-packages/IPython/core/interactiveshell.py:2717: DtypeWarning: Columns (140,142,144,146,148,181,206,212,213,262,281,282,283,297,298) have mixed types. Specify dtype option on import or set low_memory=False.\n interactivity=interactivity, compiler=compiler, result=result)\n",
"name": "stderr"
}
]
},
{
"metadata": {
"trusted": true,
"collapsed": false
},
"cell_type": "code",
"source": "hospitalized.shape",
"execution_count": 10,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": "(3168, 407)"
},
"metadata": {},
"execution_count": 10
}
]
},
{
"metadata": {},
"cell_type": "markdown",
"source": "Assing study year"
},
{
"metadata": {
"trusted": true,
"collapsed": false
},
"cell_type": "code",
"source": "hospitalized['virus_year'] = 2011\nhospitalized.loc[(hospitalized.admission_date >= '2011-03-31') \n & (hospitalized.admission_date <= '2012-03-31'), 'virus_year'] = 2012\nhospitalized.loc[hospitalized.admission_date > '2012-03-31', 'virus_year'] = 2013\n\nhospitalized.virus_year.value_counts()",
"execution_count": 14,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": "2012 1191\n2013 1179\n2011 798\nName: virus_year, dtype: int64"
},
"metadata": {},
"execution_count": 14
}
]
},
{
"metadata": {},
"cell_type": "markdown",
"source": "Filter out CT<30 and blood/saliva"
},
{
"metadata": {
"trusted": true,
"collapsed": false
},
"cell_type": "code",
"source": "conditions = (hospitalized.rsv_count<30) & ((hospitalized.blood_culture==1) | (hospitalized.saliva_swab==1))\ndata_subset = hospitalized[conditions]",
"execution_count": 15,
"outputs": []
},
{
"metadata": {
"trusted": true,
"collapsed": false
},
"cell_type": "code",
"source": "data_subset.shape",
"execution_count": 16,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": "(636, 408)"
},
"metadata": {},
"execution_count": 16
}
]
},
{
"metadata": {},
"cell_type": "markdown",
"source": "Draw random samples:"
},
{
"metadata": {
"trusted": true,
"collapsed": true
},
"cell_type": "code",
"source": "n = 92\nn_ox = int(n*.4)\nn_noox = n - n_ox",
"execution_count": 23,
"outputs": []
},
{
"metadata": {
"collapsed": false,
"trusted": true
},
"cell_type": "code",
"source": "rsv_random_sample = []\n\nfor virus_year in (2011, 2012, 2013):\n \n year_subset = data_subset[data_subset.virus_year==virus_year]\n \n # Oxygen flag\n on_oxygen = year_subset.oxygen==1\n\n oxygen_subset = year_subset[on_oxygen]\n no_oxygen_subset = year_subset[~on_oxygen]\n \n rsv_random_sample.append(oxygen_subset.sample(n=n_ox))\n rsv_random_sample.append(no_oxygen_subset.sample(n=n_noox))\n ",
"execution_count": 25,
"outputs": []
},
{
"metadata": {
"trusted": true,
"collapsed": true
},
"cell_type": "code",
"source": "rsv_random_sample = pd.concat(rsv_random_sample)",
"execution_count": 26,
"outputs": []
},
{
"metadata": {},
"cell_type": "markdown",
"source": "Ensure sample size:"
},
{
"metadata": {
"trusted": true,
"collapsed": false
},
"cell_type": "code",
"source": "assert len(rsv_random_sample) == n*3",
"execution_count": 28,
"outputs": []
},
{
"metadata": {
"collapsed": false,
"trusted": true
},
"cell_type": "code",
"source": "pd.crosstab(rsv_random_sample.virus_year, rsv_random_sample.oxygen)",
"execution_count": 29,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": "<div>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th>oxygen</th>\n <th>0.0</th>\n <th>1.0</th>\n </tr>\n <tr>\n <th>virus_year</th>\n <th></th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>2011</th>\n <td>56</td>\n <td>36</td>\n </tr>\n <tr>\n <th>2012</th>\n <td>56</td>\n <td>36</td>\n </tr>\n <tr>\n <th>2013</th>\n <td>56</td>\n <td>36</td>\n </tr>\n </tbody>\n</table>\n</div>",
"text/plain": "oxygen 0.0 1.0\nvirus_year \n2011 56 36\n2012 56 36\n2013 56 36"
},
"metadata": {},
"execution_count": 29
}
]
},
{
"metadata": {
"collapsed": false,
"trusted": true
},
"cell_type": "code",
"source": "assert not (rsv_random_sample.rsv_count>30).sum()",
"execution_count": 31,
"outputs": []
},
{
"metadata": {},
"cell_type": "markdown",
"source": "Export the samples:"
},
{
"metadata": {
"collapsed": false,
"trusted": true
},
"cell_type": "code",
"source": "rsv_random_sample.to_csv(\"rsv_random_sample.csv\")",
"execution_count": 32,
"outputs": []
}
],
"metadata": {
"kernelspec": {
"name": "python3",
"display_name": "Python 3",
"language": "python"
},
"language_info": {
"codemirror_mode": {
"version": 3,
"name": "ipython"
},
"mimetype": "text/x-python",
"nbconvert_exporter": "python",
"file_extension": ".py",
"name": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
},
"gist": {
"id": "",
"data": {
"description": "Jordan Randomization.ipynb",
"public": true
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment