fonnesbeck/Jordan Randomization.ipynb

## Jordan Randomization.ipynb
{
  "cells": [
    {
      "metadata": {
        "collapsed": false,
        "trusted": true
      },
      "cell_type": "code",
      "source": "import pandas as pd\nimport numpy as np\nfrom datetime import datetime",
      "execution_count": 1,
      "outputs": []
    },
    {
      "metadata": {
        "collapsed": false,
        "trusted": true
      },
      "cell_type": "code",
      "source": "hospitalized = pd.read_csv('data/hospitalized.csv', index_col=0)\n#hospitalized.head()",
      "execution_count": 33,
      "outputs": [
        {
          "output_type": "stream",
          "text": "/Users/fonnescj/anaconda3/envs/dev/lib/python3.5/site-packages/IPython/core/interactiveshell.py:2717: DtypeWarning: Columns (140,142,144,146,148,181,206,212,213,262,281,282,283,297,298) have mixed types. Specify dtype option on import or set low_memory=False.\n  interactivity=interactivity, compiler=compiler, result=result)\n",
          "name": "stderr"
        }
      ]
    },
    {
      "metadata": {
        "trusted": true,
        "collapsed": false
      },
      "cell_type": "code",
      "source": "hospitalized.shape",
      "execution_count": 10,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": "(3168, 407)"
          },
          "metadata": {},
          "execution_count": 10
        }
      ]
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "Assing study year"
    },
    {
      "metadata": {
        "trusted": true,
        "collapsed": false
      },
      "cell_type": "code",
      "source": "hospitalized['virus_year'] = 2011\nhospitalized.loc[(hospitalized.admission_date >= '2011-03-31') \n                 & (hospitalized.admission_date <= '2012-03-31'), 'virus_year'] = 2012\nhospitalized.loc[hospitalized.admission_date > '2012-03-31', 'virus_year'] = 2013\n\nhospitalized.virus_year.value_counts()",
      "execution_count": 14,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": "2012    1191\n2013    1179\n2011     798\nName: virus_year, dtype: int64"
          },
          "metadata": {},
          "execution_count": 14
        }
      ]
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "Filter out CT<30 and blood/saliva"
    },
    {
      "metadata": {
        "trusted": true,
        "collapsed": false
      },
      "cell_type": "code",
      "source": "conditions = (hospitalized.rsv_count<30) & ((hospitalized.blood_culture==1) | (hospitalized.saliva_swab==1))\ndata_subset = hospitalized[conditions]",
      "execution_count": 15,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true,
        "collapsed": false
      },
      "cell_type": "code",
      "source": "data_subset.shape",
      "execution_count": 16,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": "(636, 408)"
          },
          "metadata": {},
          "execution_count": 16
        }
      ]
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "Draw random samples:"
    },
    {
      "metadata": {
        "trusted": true,
        "collapsed": true
      },
      "cell_type": "code",
      "source": "n = 92\nn_ox = int(n*.4)\nn_noox = n - n_ox",
      "execution_count": 23,
      "outputs": []
    },
    {
      "metadata": {
        "collapsed": false,
        "trusted": true
      },
      "cell_type": "code",
      "source": "rsv_random_sample = []\n\nfor virus_year in (2011, 2012, 2013):\n    \n    year_subset = data_subset[data_subset.virus_year==virus_year]\n    \n    # Oxygen flag\n    on_oxygen = year_subset.oxygen==1\n\n    oxygen_subset = year_subset[on_oxygen]\n    no_oxygen_subset = year_subset[~on_oxygen]\n    \n    rsv_random_sample.append(oxygen_subset.sample(n=n_ox))\n    rsv_random_sample.append(no_oxygen_subset.sample(n=n_noox))\n    ",
      "execution_count": 25,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": true,
        "collapsed": true
      },
      "cell_type": "code",
      "source": "rsv_random_sample = pd.concat(rsv_random_sample)",
      "execution_count": 26,
      "outputs": []
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "Ensure sample size:"
    },
    {
      "metadata": {
        "trusted": true,
        "collapsed": false
      },
      "cell_type": "code",
      "source": "assert len(rsv_random_sample) == n*3",
      "execution_count": 28,
      "outputs": []
    },
    {
      "metadata": {
        "collapsed": false,
        "trusted": true
      },
      "cell_type": "code",
      "source": "pd.crosstab(rsv_random_sample.virus_year, rsv_random_sample.oxygen)",
      "execution_count": 29,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": "<div>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th>oxygen</th>\n      <th>0.0</th>\n      <th>1.0</th>\n    </tr>\n    <tr>\n      <th>virus_year</th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>2011</th>\n      <td>56</td>\n      <td>36</td>\n    </tr>\n    <tr>\n      <th>2012</th>\n      <td>56</td>\n      <td>36</td>\n    </tr>\n    <tr>\n      <th>2013</th>\n      <td>56</td>\n      <td>36</td>\n    </tr>\n  </tbody>\n</table>\n</div>",
            "text/plain": "oxygen      0.0  1.0\nvirus_year          \n2011         56   36\n2012         56   36\n2013         56   36"
          },
          "metadata": {},
          "execution_count": 29
        }
      ]
    },
    {
      "metadata": {
        "collapsed": false,
        "trusted": true
      },
      "cell_type": "code",
      "source": "assert not (rsv_random_sample.rsv_count>30).sum()",
      "execution_count": 31,
      "outputs": []
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "Export the samples:"
    },
    {
      "metadata": {
        "collapsed": false,
        "trusted": true
      },
      "cell_type": "code",
      "source": "rsv_random_sample.to_csv(\"rsv_random_sample.csv\")",
      "execution_count": 32,
      "outputs": []
    }
  ],
  "metadata": {
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3",
      "language": "python"
    },
    "language_info": {
      "codemirror_mode": {
        "version": 3,
        "name": "ipython"
      },
      "mimetype": "text/x-python",
      "nbconvert_exporter": "python",
      "file_extension": ".py",
      "name": "python",
      "pygments_lexer": "ipython3",
      "version": "3.5.2"
    },
    "gist": {
      "id": "",
      "data": {
        "description": "Jordan Randomization.ipynb",
        "public": true
      }
    }
  },
  "nbformat": 4,
  "nbformat_minor": 2
}
	{
	"cells": [
	{
	"metadata": {
	"collapsed": false,
	"trusted": true
	},
	"cell_type": "code",
	"source": "import pandas as pd\nimport numpy as np\nfrom datetime import datetime",
	"execution_count": 1,
	"outputs": []
	},
	{
	"metadata": {
	"collapsed": false,
	"trusted": true
	},
	"cell_type": "code",
	"source": "hospitalized = pd.read_csv('data/hospitalized.csv', index_col=0)\n#hospitalized.head()",
	"execution_count": 33,
	"outputs": [
	{
	"output_type": "stream",
	"text": "/Users/fonnescj/anaconda3/envs/dev/lib/python3.5/site-packages/IPython/core/interactiveshell.py:2717: DtypeWarning: Columns (140,142,144,146,148,181,206,212,213,262,281,282,283,297,298) have mixed types. Specify dtype option on import or set low_memory=False.\n interactivity=interactivity, compiler=compiler, result=result)\n",
	"name": "stderr"
	}
	]
	},
	{
	"metadata": {
	"trusted": true,
	"collapsed": false
	},
	"cell_type": "code",
	"source": "hospitalized.shape",
	"execution_count": 10,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": "(3168, 407)"
	},
	"metadata": {},
	"execution_count": 10
	}
	]
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "Assing study year"
	},
	{
	"metadata": {
	"trusted": true,
	"collapsed": false
	},
	"cell_type": "code",
	"source": "hospitalized['virus_year'] = 2011\nhospitalized.loc[(hospitalized.admission_date >= '2011-03-31') \n & (hospitalized.admission_date <= '2012-03-31'), 'virus_year'] = 2012\nhospitalized.loc[hospitalized.admission_date > '2012-03-31', 'virus_year'] = 2013\n\nhospitalized.virus_year.value_counts()",
	"execution_count": 14,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": "2012 1191\n2013 1179\n2011 798\nName: virus_year, dtype: int64"
	},
	"metadata": {},
	"execution_count": 14
	}
	]
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "Filter out CT<30 and blood/saliva"
	},
	{
	"metadata": {
	"trusted": true,
	"collapsed": false
	},
	"cell_type": "code",
	"source": "conditions = (hospitalized.rsv_count<30) & ((hospitalized.blood_culture==1) \| (hospitalized.saliva_swab==1))\ndata_subset = hospitalized[conditions]",
	"execution_count": 15,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true,
	"collapsed": false
	},
	"cell_type": "code",
	"source": "data_subset.shape",
	"execution_count": 16,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": "(636, 408)"
	},
	"metadata": {},
	"execution_count": 16
	}
	]
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "Draw random samples:"
	},
	{
	"metadata": {
	"trusted": true,
	"collapsed": true
	},
	"cell_type": "code",
	"source": "n = 92\nn_ox = int(n*.4)\nn_noox = n - n_ox",
	"execution_count": 23,
	"outputs": []
	},
	{
	"metadata": {
	"collapsed": false,
	"trusted": true
	},
	"cell_type": "code",
	"source": "rsv_random_sample = []\n\nfor virus_year in (2011, 2012, 2013):\n \n year_subset = data_subset[data_subset.virus_year==virus_year]\n \n # Oxygen flag\n on_oxygen = year_subset.oxygen==1\n\n oxygen_subset = year_subset[on_oxygen]\n no_oxygen_subset = year_subset[~on_oxygen]\n \n rsv_random_sample.append(oxygen_subset.sample(n=n_ox))\n rsv_random_sample.append(no_oxygen_subset.sample(n=n_noox))\n ",
	"execution_count": 25,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": true,
	"collapsed": true
	},
	"cell_type": "code",
	"source": "rsv_random_sample = pd.concat(rsv_random_sample)",
	"execution_count": 26,
	"outputs": []
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "Ensure sample size:"
	},
	{
	"metadata": {
	"trusted": true,
	"collapsed": false
	},
	"cell_type": "code",
	"source": "assert len(rsv_random_sample) == n*3",
	"execution_count": 28,
	"outputs": []
	},
	{
	"metadata": {
	"collapsed": false,
	"trusted": true
	},
	"cell_type": "code",
	"source": "pd.crosstab(rsv_random_sample.virus_year, rsv_random_sample.oxygen)",
	"execution_count": 29,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/html": "<div>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th>oxygen</th>\n <th>0.0</th>\n <th>1.0</th>\n </tr>\n <tr>\n <th>virus_year</th>\n <th></th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>2011</th>\n <td>56</td>\n <td>36</td>\n </tr>\n <tr>\n <th>2012</th>\n <td>56</td>\n <td>36</td>\n </tr>\n <tr>\n <th>2013</th>\n <td>56</td>\n <td>36</td>\n </tr>\n </tbody>\n</table>\n</div>",
	"text/plain": "oxygen 0.0 1.0\nvirus_year \n2011 56 36\n2012 56 36\n2013 56 36"
	},
	"metadata": {},
	"execution_count": 29
	}
	]
	},
	{
	"metadata": {
	"collapsed": false,
	"trusted": true
	},
	"cell_type": "code",
	"source": "assert not (rsv_random_sample.rsv_count>30).sum()",
	"execution_count": 31,
	"outputs": []
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "Export the samples:"
	},
	{
	"metadata": {
	"collapsed": false,
	"trusted": true
	},
	"cell_type": "code",
	"source": "rsv_random_sample.to_csv(\"rsv_random_sample.csv\")",
	"execution_count": 32,
	"outputs": []
	}
	],
	"metadata": {
	"kernelspec": {
	"name": "python3",
	"display_name": "Python 3",
	"language": "python"
	},
	"language_info": {
	"codemirror_mode": {
	"version": 3,
	"name": "ipython"
	},
	"mimetype": "text/x-python",
	"nbconvert_exporter": "python",
	"file_extension": ".py",
	"name": "python",
	"pygments_lexer": "ipython3",
	"version": "3.5.2"
	},
	"gist": {
	"id": "",
	"data": {
	"description": "Jordan Randomization.ipynb",
	"public": true
	}
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}