Skip to content

Instantly share code, notes, and snippets.

Created February 8, 2016 00:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anonymous/5efa74c0b897167ee6f9 to your computer and use it in GitHub Desktop.
Save anonymous/5efa74c0b897167ee6f9 to your computer and use it in GitHub Desktop.
Doctors code
{
"cells": [
{
"cell_type": "code",
"execution_count": 76,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import numpy as np\n",
"import random\n",
"from tabulate import tabulate"
]
},
{
"cell_type": "code",
"execution_count": 77,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"('Y: total, num recovered', 1000, 493.0)\n"
]
}
],
"source": [
"def sim_data(N, D, p_vl=0.01, p_l=0.1, p_h=0.9, p_skill=0.9):\n",
" #N: num patients\n",
" #D: num doctors\n",
" #p_vl, p_l, p_h: very low, low, and high probability (of recovery)\n",
" #p_skill: probability of patient being assigned a doctor with skill matching severity\n",
" \n",
" #split doctors into two sets and make exactly half of them highly skilled\n",
" beta = np.zeros(D)\n",
" D_sk, D_nsk = D/2, D - D/2 #num skilled v.s. not skilled doctors, account for rounding error when D is odd\n",
" beta[random.sample(range(D),D_sk)] = 1.\n",
" \n",
" #split patients into two sets and make exactly half of them highly severe\n",
" theta = np.zeros(N)\n",
" theta[random.sample(range(N),N/2)] = 1.\n",
" \n",
" S = np.zeros(N) #skill level for each patient\n",
" Z = np.zeros((N,D)) #patient-doctor assignment matrix\n",
" Y = np.zeros(N) #outcome: did the patient recover?\n",
" \n",
" for n in range(N):\n",
" #for each patient choose the skill level of their doctor\n",
" S[n] = np.random.binomial(1, theta[n]*p_skill + (1-theta[n])*(1-p_skill))\n",
" #choose doctor assignment to patient n\n",
" Z[n,:] = np.random.multinomial(1, beta*S[n]/float(D_sk) + (1-beta)*(1-S[n])/float(D_nsk))\n",
" d_n = Z[n,:].argmax()\n",
" p_recovery = S[n]*theta[n]*p_l + (1-S[n])*theta[n]*p_vl + (1-theta[n])*p_h\n",
" Y[n] = np.random.binomial(1, p_recovery)\n",
" \n",
" return Y, Z, theta, beta\n",
"\n",
"N, D = 1000, 10\n",
"Y, Z, theta, beta = sim_data(N, D)\n",
"print('Y: total, num recovered',N,Y.sum())\n"
]
},
{
"cell_type": "code",
"execution_count": 78,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<table>\n",
"<tr><th style=\"text-align: right;\"> doctor id</th><th style=\"text-align: right;\"> num patients</th><th style=\"text-align: right;\"> survival rate</th></tr>\n",
"<tr><td style=\"text-align: right;\"> 0</td><td style=\"text-align: right;\"> 117</td><td style=\"text-align: right;\"> 0.205</td></tr>\n",
"<tr><td style=\"text-align: right;\"> 1</td><td style=\"text-align: right;\"> 85</td><td style=\"text-align: right;\"> 0.812</td></tr>\n",
"<tr><td style=\"text-align: right;\"> 2</td><td style=\"text-align: right;\"> 93</td><td style=\"text-align: right;\"> 0.129</td></tr>\n",
"<tr><td style=\"text-align: right;\"> 3</td><td style=\"text-align: right;\"> 88</td><td style=\"text-align: right;\"> 0.216</td></tr>\n",
"<tr><td style=\"text-align: right;\"> 4</td><td style=\"text-align: right;\"> 98</td><td style=\"text-align: right;\"> 0.153</td></tr>\n",
"<tr><td style=\"text-align: right;\"> 5</td><td style=\"text-align: right;\"> 112</td><td style=\"text-align: right;\"> 0.830</td></tr>\n",
"<tr><td style=\"text-align: right;\"> 6</td><td style=\"text-align: right;\"> 89</td><td style=\"text-align: right;\"> 0.809</td></tr>\n",
"<tr><td style=\"text-align: right;\"> 7</td><td style=\"text-align: right;\"> 109</td><td style=\"text-align: right;\"> 0.138</td></tr>\n",
"<tr><td style=\"text-align: right;\"> 8</td><td style=\"text-align: right;\"> 102</td><td style=\"text-align: right;\"> 0.824</td></tr>\n",
"<tr><td style=\"text-align: right;\"> 9</td><td style=\"text-align: right;\"> 107</td><td style=\"text-align: right;\"> 0.841</td></tr>\n",
"</table>\n"
]
}
],
"source": [
"tbl_naive = [(d, int(Z[:,d].sum()), (Y*Z[:,d]).sum()/Z[:,d].sum()) for d in range(D)]\n",
"print tabulate(tbl_naive,headers=['doctor id', 'num patients', 'survival rate'],floatfmt=\".3f\", tablefmt=\"html\")"
]
},
{
"cell_type": "code",
"execution_count": 79,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<table>\n",
"<tr><th style=\"text-align: right;\"> doctor id</th><th style=\"text-align: right;\"> num severe patients</th><th style=\"text-align: right;\"> recovery rate | non-severe</th><th style=\"text-align: right;\"> recovery rate | severe</th></tr>\n",
"<tr><td style=\"text-align: right;\"> 0</td><td style=\"text-align: right;\"> 103</td><td style=\"text-align: right;\"> 0.857</td><td style=\"text-align: right;\"> 0.117</td></tr>\n",
"<tr><td style=\"text-align: right;\"> 1</td><td style=\"text-align: right;\"> 7</td><td style=\"text-align: right;\"> 0.885</td><td style=\"text-align: right;\"> 0.000</td></tr>\n",
"<tr><td style=\"text-align: right;\"> 2</td><td style=\"text-align: right;\"> 87</td><td style=\"text-align: right;\"> 0.833</td><td style=\"text-align: right;\"> 0.080</td></tr>\n",
"<tr><td style=\"text-align: right;\"> 3</td><td style=\"text-align: right;\"> 78</td><td style=\"text-align: right;\"> 0.900</td><td style=\"text-align: right;\"> 0.128</td></tr>\n",
"<tr><td style=\"text-align: right;\"> 4</td><td style=\"text-align: right;\"> 92</td><td style=\"text-align: right;\"> 1.000</td><td style=\"text-align: right;\"> 0.098</td></tr>\n",
"<tr><td style=\"text-align: right;\"> 5</td><td style=\"text-align: right;\"> 6</td><td style=\"text-align: right;\"> 0.877</td><td style=\"text-align: right;\"> 0.000</td></tr>\n",
"<tr><td style=\"text-align: right;\"> 6</td><td style=\"text-align: right;\"> 11</td><td style=\"text-align: right;\"> 0.923</td><td style=\"text-align: right;\"> 0.000</td></tr>\n",
"<tr><td style=\"text-align: right;\"> 7</td><td style=\"text-align: right;\"> 97</td><td style=\"text-align: right;\"> 0.917</td><td style=\"text-align: right;\"> 0.041</td></tr>\n",
"<tr><td style=\"text-align: right;\"> 8</td><td style=\"text-align: right;\"> 11</td><td style=\"text-align: right;\"> 0.923</td><td style=\"text-align: right;\"> 0.000</td></tr>\n",
"<tr><td style=\"text-align: right;\"> 9</td><td style=\"text-align: right;\"> 8</td><td style=\"text-align: right;\"> 0.909</td><td style=\"text-align: right;\"> 0.000</td></tr>\n",
"</table>\n"
]
}
],
"source": [
"def stratify_recovery(Y,Z,N,D,theta):\n",
" tau_strat = np.zeros((2,D))\n",
" tau_strat[0,:] = np.array([((1-theta)*Y*Z[:,d]).sum()/((1-theta)*Z[:,d]).sum() for d in range(D)])\n",
" tau_strat[1,:] = np.array([((theta)*Y*Z[:,d]).sum()/((theta)*Z[:,d]).sum() for d in range(D)])\n",
" return tau_strat\n",
"\n",
"tau_strat = stratify_recovery(Y,Z,N,D,theta)\n",
"Nds = map(int,np.dot(theta, Z)) #num severely ill people treated by each doctor\n",
"print tabulate(zip(range(D),Nds,tau_strat[0,:],tau_strat[1,:]),\n",
" headers=['doctor id', 'num severe patients', 'recovery rate | non-severe', 'recovery rate | severe'],\n",
" floatfmt=\".3f\", tablefmt=\"html\")"
]
},
{
"cell_type": "code",
"execution_count": 80,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def patient_match(Y, Z, N, D, theta):\n",
" #pick the patients of each doctor and match them randomly with another patient of the same severity (high or low)\n",
" #who had another doctor, then find average outcome difference\n",
" #question: they're all getting \"treated\" there are no controls? Then does this still make sense?\n",
"\n",
" tau = np.zeros(N) #noisy treatment effect for each patient\n",
" \n",
" for n in range(N):\n",
" #find patients of same severity who were treated by a different doctor:\n",
" dn = Z[n,:].argmax() #doctor of patient n\n",
" g = (theta*theta[n] + (1-theta)*(1-theta[n]))*(1-Z[:,dn])\n",
" #pick one at random uniformly\n",
" match = np.random.multinomial(1, g/g.sum()).argmax()\n",
" #confirm that they have the same severity and have different doctors (this should always be true)\n",
" assert theta[match] == theta[n], 'patients not matched by severity'\n",
" assert Z[match,:].argmax() != Z[n,:].argmax(), 'patients have same doctor (%i, %i)' % (match, n)\n",
" #calculate difference in outcomes:\n",
" tau[n] = Y[n] - Y[match]\n",
" \n",
" #calculate average difference in outcomes over matches for each doctor:\n",
" tau_doctor = np.array([(tau*Z[:,d]).sum()/Z[:,d].sum() for d in range(D)])\n",
" \n",
" return tau_doctor\n",
"\n",
"tau_doctor = patient_match(Y, Z, N, D, theta)"
]
},
{
"cell_type": "code",
"execution_count": 81,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"('ground truth skill of doctors', array([ 1., 0., 1., 1., 1., 0., 0., 1., 0., 0.]))\n"
]
}
],
"source": [
"print('ground truth skill of doctors',beta)"
]
},
{
"cell_type": "code",
"execution_count": 82,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<table>\n",
"<tr><th style=\"text-align: right;\"> doctor id</th><th style=\"text-align: right;\"> avg treatment effect</th><th style=\"text-align: right;\"> doctor skill</th><th style=\"text-align: right;\"> num treated</th><th style=\"text-align: right;\"> avg recovery rate</th></tr>\n",
"<tr><td style=\"text-align: right;\"> 0</td><td style=\"text-align: right;\"> 0.000</td><td style=\"text-align: right;\"> 1</td><td style=\"text-align: right;\"> 117</td><td style=\"text-align: right;\"> 0.205</td></tr>\n",
"<tr><td style=\"text-align: right;\"> 1</td><td style=\"text-align: right;\"> -0.024</td><td style=\"text-align: right;\"> 0</td><td style=\"text-align: right;\"> 85</td><td style=\"text-align: right;\"> 0.812</td></tr>\n",
"<tr><td style=\"text-align: right;\"> 2</td><td style=\"text-align: right;\"> 0.000</td><td style=\"text-align: right;\"> 1</td><td style=\"text-align: right;\"> 93</td><td style=\"text-align: right;\"> 0.129</td></tr>\n",
"<tr><td style=\"text-align: right;\"> 3</td><td style=\"text-align: right;\"> 0.023</td><td style=\"text-align: right;\"> 1</td><td style=\"text-align: right;\"> 88</td><td style=\"text-align: right;\"> 0.216</td></tr>\n",
"<tr><td style=\"text-align: right;\"> 4</td><td style=\"text-align: right;\"> -0.020</td><td style=\"text-align: right;\"> 1</td><td style=\"text-align: right;\"> 98</td><td style=\"text-align: right;\"> 0.153</td></tr>\n",
"<tr><td style=\"text-align: right;\"> 5</td><td style=\"text-align: right;\"> -0.080</td><td style=\"text-align: right;\"> 0</td><td style=\"text-align: right;\"> 112</td><td style=\"text-align: right;\"> 0.830</td></tr>\n",
"<tr><td style=\"text-align: right;\"> 6</td><td style=\"text-align: right;\"> 0.034</td><td style=\"text-align: right;\"> 0</td><td style=\"text-align: right;\"> 89</td><td style=\"text-align: right;\"> 0.809</td></tr>\n",
"<tr><td style=\"text-align: right;\"> 7</td><td style=\"text-align: right;\"> -0.064</td><td style=\"text-align: right;\"> 1</td><td style=\"text-align: right;\"> 109</td><td style=\"text-align: right;\"> 0.138</td></tr>\n",
"<tr><td style=\"text-align: right;\"> 8</td><td style=\"text-align: right;\"> 0.010</td><td style=\"text-align: right;\"> 0</td><td style=\"text-align: right;\"> 102</td><td style=\"text-align: right;\"> 0.824</td></tr>\n",
"<tr><td style=\"text-align: right;\"> 9</td><td style=\"text-align: right;\"> 0.037</td><td style=\"text-align: right;\"> 0</td><td style=\"text-align: right;\"> 107</td><td style=\"text-align: right;\"> 0.841</td></tr>\n",
"</table>\n"
]
}
],
"source": [
"header_match = ['doctor id','avg treatment effect', 'doctor skill', 'num treated', 'avg recovery rate']\n",
"tbl_list_match = [(d,t,int(b),int(Z[:,d].sum()),(Y*Z[:,d]).sum()/Z[:,d].sum()) for t,b,d in zip(tau_doctor, beta, range(D))]\n",
"print tabulate(tbl_list_match, headers=header_match,floatfmt=\".3f\", tablefmt=\"html\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 90,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"('propensity', array([[ 0.028, 0.156, 0.012, 0.02 , 0.012, 0.212, 0.156, 0.024,\n",
" 0.182, 0.198],\n",
" [ 0.206, 0.014, 0.174, 0.156, 0.184, 0.012, 0.022, 0.194,\n",
" 0.022, 0.016]]))\n",
"<table>\n",
"<tr><th>unit covariate </th><th style=\"text-align: right;\"> doc_0</th><th style=\"text-align: right;\"> doc_1</th><th style=\"text-align: right;\"> doc_2</th><th style=\"text-align: right;\"> doc_3</th><th style=\"text-align: right;\"> doc_4</th></tr>\n",
"<tr><td>non-severe </td><td style=\"text-align: right;\"> 0.028</td><td style=\"text-align: right;\"> 0.156</td><td style=\"text-align: right;\"> 0.012</td><td style=\"text-align: right;\"> 0.020</td><td style=\"text-align: right;\"> 0.012</td></tr>\n",
"<tr><td>severe </td><td style=\"text-align: right;\"> 0.206</td><td style=\"text-align: right;\"> 0.014</td><td style=\"text-align: right;\"> 0.174</td><td style=\"text-align: right;\"> 0.156</td><td style=\"text-align: right;\"> 0.184</td></tr>\n",
"</table>\n",
"<table>\n",
"<tr><th>unit covariate </th><th style=\"text-align: right;\"> doc_5</th><th style=\"text-align: right;\"> doc_6</th><th style=\"text-align: right;\"> doc_7</th><th style=\"text-align: right;\"> doc_8</th><th style=\"text-align: right;\"> doc_9</th></tr>\n",
"<tr><td>non-severe </td><td style=\"text-align: right;\"> 0.212</td><td style=\"text-align: right;\"> 0.156</td><td style=\"text-align: right;\"> 0.024</td><td style=\"text-align: right;\"> 0.182</td><td style=\"text-align: right;\"> 0.198</td></tr>\n",
"<tr><td>severe </td><td style=\"text-align: right;\"> 0.012</td><td style=\"text-align: right;\"> 0.022</td><td style=\"text-align: right;\"> 0.194</td><td style=\"text-align: right;\"> 0.022</td><td style=\"text-align: right;\"> 0.016</td></tr>\n",
"</table>\n",
"<table>\n",
"<tr><th style=\"text-align: right;\"> doctor id</th><th style=\"text-align: right;\"> doctor skill</th><th style=\"text-align: right;\"> treated recovery</th><th style=\"text-align: right;\"> untreated recovery</th><th style=\"text-align: right;\"> avg treatment effect</th></tr>\n",
"<tr><td style=\"text-align: right;\"> 0</td><td style=\"text-align: right;\"> 1</td><td style=\"text-align: right;\"> 0.487</td><td style=\"text-align: right;\"> 0.489</td><td style=\"text-align: right;\"> -0.003</td></tr>\n",
"<tr><td style=\"text-align: right;\"> 1</td><td style=\"text-align: right;\"> 0</td><td style=\"text-align: right;\"> 0.442</td><td style=\"text-align: right;\"> 0.495</td><td style=\"text-align: right;\"> -0.053</td></tr>\n",
"<tr><td style=\"text-align: right;\"> 2</td><td style=\"text-align: right;\"> 1</td><td style=\"text-align: right;\"> 0.457</td><td style=\"text-align: right;\"> 0.494</td><td style=\"text-align: right;\"> -0.037</td></tr>\n",
"<tr><td style=\"text-align: right;\"> 3</td><td style=\"text-align: right;\"> 1</td><td style=\"text-align: right;\"> 0.514</td><td style=\"text-align: right;\"> 0.489</td><td style=\"text-align: right;\"> 0.025</td></tr>\n",
"<tr><td style=\"text-align: right;\"> 4</td><td style=\"text-align: right;\"> 1</td><td style=\"text-align: right;\"> 0.549</td><td style=\"text-align: right;\"> 0.491</td><td style=\"text-align: right;\"> 0.058</td></tr>\n",
"<tr><td style=\"text-align: right;\"> 5</td><td style=\"text-align: right;\"> 0</td><td style=\"text-align: right;\"> 0.439</td><td style=\"text-align: right;\"> 0.497</td><td style=\"text-align: right;\"> -0.058</td></tr>\n",
"<tr><td style=\"text-align: right;\"> 6</td><td style=\"text-align: right;\"> 0</td><td style=\"text-align: right;\"> 0.462</td><td style=\"text-align: right;\"> 0.492</td><td style=\"text-align: right;\"> -0.030</td></tr>\n",
"<tr><td style=\"text-align: right;\"> 7</td><td style=\"text-align: right;\"> 1</td><td style=\"text-align: right;\"> 0.479</td><td style=\"text-align: right;\"> 0.498</td><td style=\"text-align: right;\"> -0.019</td></tr>\n",
"<tr><td style=\"text-align: right;\"> 8</td><td style=\"text-align: right;\"> 0</td><td style=\"text-align: right;\"> 0.462</td><td style=\"text-align: right;\"> 0.492</td><td style=\"text-align: right;\"> -0.030</td></tr>\n",
"<tr><td style=\"text-align: right;\"> 9</td><td style=\"text-align: right;\"> 0</td><td style=\"text-align: right;\"> 0.455</td><td style=\"text-align: right;\"> 0.493</td><td style=\"text-align: right;\"> -0.038</td></tr>\n",
"</table>\n"
]
}
],
"source": [
"def propensity_score(Z, N, D, theta):\n",
" p = np.zeros((2,D))\n",
" p[0,:] = [(Z[:,d]*(1-theta)).sum()/(1-theta).sum() for d in range(D)]\n",
" p[1,:] = [(Z[:,d]*(theta)).sum()/(theta).sum() for d in range(D)]\n",
" return p\n",
"\n",
"p = propensity_score(Z, N, D, theta)\n",
"print('propensity',p)\n",
"\n",
"def avg_tau_propensity_weighted(Y, Z, p, N, D, theta):\n",
" tau = np.zeros(D)\n",
" itheta = map(int, theta)\n",
" #calculate the weighted effect on the treated (by doctor d)\n",
" eft = np.array([Y*Z[:,d]/p[itheta,d] for d in range(D)])\n",
" #calculate the weighted effect on the untreated (by doctor d)\n",
" efu = np.array([Y*(1-Z[:,d])/(1-p[itheta,d]) for d in range(D)])\n",
" return eft.mean(axis=1), efu.mean(axis=1), (eft - efu).mean(axis=1)\n",
" \n",
"eft, efu, tau_pw = avg_tau_propensity_weighted(Y, Z, p, N, D, theta)\n",
"print tabulate([['non-severe']+list(p[0,:5]),['severe']+list(p[1,:5])],\n",
" headers=['unit covariate']+['doc_%i'%d for d in range(5)],\n",
" floatfmt=\".3f\", tablefmt=\"html\")\n",
"print tabulate([['non-severe']+list(p[0,5:]),['severe']+list(p[1,5:])],\n",
" headers=['unit covariate']+['doc_%i'%d for d in range(5,D)],\n",
" floatfmt=\".3f\", tablefmt=\"html\")\n",
"print tabulate(zip(range(D),map(int,beta),eft,efu,tau_pw),\n",
" headers=['doctor id','doctor skill','treated recovery','untreated recovery','avg treatment effect'],\n",
" floatfmt=\".3f\", tablefmt=\"html\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.5"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment