Skip to content

Instantly share code, notes, and snippets.

@gcalmettes
Last active December 24, 2015 13:09
Show Gist options
  • Save gcalmettes/6802424 to your computer and use it in GitHub Desktop.
Save gcalmettes/6802424 to your computer and use it in GitHub Desktop.
HHURP analysis
{
"metadata": {
"name": ""
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"Setting the environment and importing the libraries"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"%pylab inline"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Populating the interactive namespace from numpy and matplotlib\n"
]
}
],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"from scipy import stats\n",
"\n",
"from code import bootstrap_routines as bt"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"Loading the data (+ info and stats)"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"ctr_data = pd.read_excel('control-full-database.xls', 'Sheet1')\n",
"hhurp_data = pd.read_excel('hhurp-full-database.xlsx', 'Sheet1')"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 117
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print \"minimum GPA in the HHURP group: %.3f\" %hhurp_data.gpa_at_application.min()\n",
"print \"minimum GPA in the control group: %.3f\" %ctr_data.gpa_at_application.min()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"minimum GPA in the HHURP group: 3.148\n",
"minimum GPA in the control group: 2.267\n"
]
}
],
"prompt_number": 118
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print \"Statistical difference:\"\n",
"print \"- Boostrap: %.9f\" %bt.bootpv(hhurp_data.gpa_at_application, ctr_data.gpa_at_application, printout=False)\n",
"print \"- Mann Whitney U: %.9f\" %stats.mannwhitneyu(hhurp_data.gpa_at_application, ctr_data.gpa_at_application)[1]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Statistical difference:\n",
"- Boostrap: 0.000200000"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"- Mann Whitney U: 0.000017109\n"
]
}
],
"prompt_number": 121
},
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"Filtering the control group to match the GPA of the HHURP group"
]
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"Thresholding GPA at 3.54 for the control group (bt = no diff / mwu = diff)"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print \"Statistical difference:\"\n",
"print \"- Boostrap: %.9f\" %bt.bootpv(hhurp_data.gpa_at_application, ctr_data.gpa_at_application[ctr_data.gpa_at_application>3.54], printout=False)\n",
"print \"- Mann Whitney U: %.9f\" %stats.mannwhitneyu(hhurp_data.gpa_at_application, ctr_data.gpa_at_application[ctr_data.gpa_at_application>3.54])[1]\n",
"print \"\"\n",
"print \"Size of the control group: %i\" %len(ctr_data.gpa_at_application[ctr_data.gpa_at_application>3.54])"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Statistical difference:\n",
"- Boostrap: 0.051800000"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"- Mann Whitney U: 0.006164596\n",
"\n",
"Size of the control group: 147\n"
]
}
],
"prompt_number": 122
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"Thresholding GPA at 3.625 for the control group (bt = no diff / mwu = no diff)"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print \"Statistical difference:\"\n",
"print \"- Boostrap: %.9f\" %bt.bootpv(hhurp_data.gpa_at_application, ctr_data.gpa_at_application[ctr_data.gpa_at_application>3.625], printout=False)\n",
"print \"- Mann Whitney U: %.9f\" %stats.mannwhitneyu(hhurp_data.gpa_at_application, ctr_data.gpa_at_application[ctr_data.gpa_at_application>3.625])[1]\n",
"print \"\"\n",
"print \"Size of the control group: %i\" %len(ctr_data.gpa_at_application[ctr_data.gpa_at_application>3.625])"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Statistical difference:\n",
"- Boostrap: 0.486800000"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"- Mann Whitney U: 0.051752804\n",
"\n",
"Size of the control group: 121\n"
]
}
],
"prompt_number": 123
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"New sub-groups for the control group"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"ctr_data_sup354 = ctr_data[ctr_data.gpa_at_application>3.54]\n",
"ctr_data_sup3625 = ctr_data[ctr_data.gpa_at_application>3.625]"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 124
},
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"Career choice"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print \"HHURP students\"\n",
"print \"- PHD: %.4f\" %(len(hhurp_data[hhurp_data.career=='PHD'])/np.float(len(hhurp_data)))\n",
"print \"- MD/PHD: %.4f\" %(len(hhurp_data[hhurp_data.career=='MD/PHD'])/np.float(len(hhurp_data)))\n",
"print \"- MD: %.4f\" %(len(hhurp_data[hhurp_data.career=='MD'])/np.float(len(hhurp_data)))\n",
"print \"- Other: %.4f\" %(len(hhurp_data[hhurp_data.career!='MD'][hhurp_data.career!='MD/PHD'][hhurp_data.career!='PHD'])/np.float(len(hhurp_data)))\n",
"print \"\"\n",
"\n",
"print \"Control students (GPA>3.54)\"\n",
"print \"- PHD: %.4f\" %(len(ctr_data_sup354[ctr_data_sup354.career=='PHD'])/np.float(len(ctr_data_sup354)))\n",
"print \"- MD/PHD: %.4f\" %(len(ctr_data_sup354[ctr_data_sup354.career=='MD/PHD'])/np.float(len(ctr_data_sup354)))\n",
"print \"- MD: %.4f\" %(len(ctr_data_sup354[ctr_data_sup354.career=='MD'])/np.float(len(ctr_data_sup354)))\n",
"print \"- Other: %.4f\" %(len(ctr_data_sup354[ctr_data_sup354.career!='MD'][ctr_data_sup354.career!='MD/PHD'][ctr_data_sup354.career!='PHD'])/np.float(len(ctr_data_sup354)))\n",
"print \"\"\n",
"\n",
"print \"Control students (GPA>3.625)\"\n",
"print \"- PHD: %.4f\" %(len(ctr_data_sup3625[ctr_data_sup3625.career=='PHD'])/np.float(len(ctr_data_sup3625)))\n",
"print \"- MD/PHD: %.4f\" %(len(ctr_data_sup3625[ctr_data_sup3625.career=='MD/PHD'])/np.float(len(ctr_data_sup3625)))\n",
"print \"- MD: %.4f\" %(len(ctr_data_sup3625[ctr_data_sup3625.career=='MD'])/np.float(len(ctr_data_sup3625)))\n",
"print \"- Other: %.4f\" %(len(ctr_data_sup3625[ctr_data_sup3625.career!='MD'][ctr_data_sup3625.career!='MD/PHD'][ctr_data_sup3625.career!='PHD'])/np.float(len(ctr_data_sup3625)))\n",
"print \"\"\n",
"\n",
"\n",
"print \"Career with PHD component\"\n",
"print \"- HHURP: %.4f\" %(len(hhurp_data[hhurp_data.career.isin(['MD/PHD', 'PHD'])])/np.float(len(hhurp_data)))\n",
"print \"- Control (GPA>3.54): %.4f\" %(len(ctr_data_sup354[ctr_data_sup354.career.isin(['MD/PHD', 'PHD'])])/np.float(len(ctr_data_sup354)))\n",
"print \"- Control (GPA>3.625): %.4f\" %(len(ctr_data_sup3625[ctr_data_sup3625.career.isin(['MD/PHD', 'PHD'])])/np.float(len(ctr_data_sup3625)))\n"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"HHURP students\n",
"- PHD: 0.2647\n",
"- MD/PHD: 0.2647\n",
"- MD: 0.4118\n",
"- Other: 0.0588\n",
"\n",
"Control students (GPA>3.54)\n",
"- PHD: 0.1088\n",
"- MD/PHD: 0.0476\n",
"- MD: 0.5238\n",
"- Other: 0.3197\n",
"\n",
"Control students (GPA>3.625)\n",
"- PHD: 0.1074\n",
"- MD/PHD: 0.0496\n",
"- MD: 0.5537\n",
"- Other: 0.2893\n",
"\n",
"Career with PHD component\n",
"- HHURP: 0.5294\n",
"- Control (GPA>3.54): 0.1565\n",
"- Control (GPA>3.625): 0.1570\n"
]
}
],
"prompt_number": 157
},
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"Fraction of UG/Grad students publishing"
]
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"Fraction of UG students publishing"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print \"HHURP students UG\"\n",
"print \"ALL: %.5f\" %(len(hhurp_data.ug_pubs[hhurp_data.ug_pubs>0])/np.float(len(hhurp_data)))\n",
"print \"All PhD: %.5f\" %(len(hhurp_data.ug_pubs[hhurp_data.ug_pubs>0][hhurp_data.career.isin(['MD/PHD', 'PHD'])])/np.float(len(hhurp_data[hhurp_data.career.isin(['MD/PHD', 'PHD'])])))\n",
"print \"MD: %.5f\" %(len(hhurp_data.ug_pubs[hhurp_data.ug_pubs>0][hhurp_data.career=='MD'])/np.float(len(hhurp_data[hhurp_data.career=='MD'])))\n",
"print \"\"\n",
"\n",
"print \"Control students UG (GPA>3.54)\"\n",
"print \"ALL: %.5f\" %(len(ctr_data_sup354.ug_pubs[ctr_data_sup354.ug_pubs>0])/np.float(len(ctr_data_sup354)))\n",
"print \"All PhD: %.5f\" %(len(ctr_data_sup354.ug_pubs[ctr_data_sup354.ug_pubs>0][ctr_data_sup354.career.isin(['MD/PHD', 'PHD'])])/np.float(len(ctr_data_sup354[ctr_data_sup354.career.isin(['MD/PHD', 'PHD'])])))\n",
"print \"MD: %.5f\" %(len(ctr_data_sup354.ug_pubs[ctr_data_sup354.ug_pubs>0][ctr_data_sup354.career=='MD'])/np.float(len(ctr_data_sup354[ctr_data_sup354.career=='MD'])))\n",
"print \"\"\n",
"\n",
"print \"Control students UG (GPA>3.625)\"\n",
"print \"ALL: %.5f\" %(len(ctr_data_sup3625.ug_pubs[ctr_data_sup3625.ug_pubs>0])/np.float(len(ctr_data_sup3625)))\n",
"print \"All PhD: %.5f\" %(len(ctr_data_sup3625.ug_pubs[ctr_data_sup3625.ug_pubs>0][ctr_data_sup3625.career.isin(['MD/PHD', 'PHD'])])/np.float(len(ctr_data_sup3625[ctr_data_sup3625.career.isin(['MD/PHD', 'PHD'])])))\n",
"print \"MD: %.5f\" %(len(ctr_data_sup3625.ug_pubs[ctr_data_sup3625.ug_pubs>0][ctr_data_sup3625.career=='MD'])/np.float(len(ctr_data_sup3625[ctr_data_sup3625.career=='MD'])))"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"HHURP students UG\n",
"ALL: 0.61765\n",
"All PhD: 0.61111\n",
"MD: 0.64286\n",
"\n",
"Control students UG (GPA>3.54)\n",
"ALL: 0.44218\n",
"All PhD: 0.56522\n",
"MD: 0.48052\n",
"\n",
"Control students UG (GPA>3.625)\n",
"ALL: 0.47107\n",
"All PhD: 0.57895\n",
"MD: 0.52239\n"
]
}
],
"prompt_number": 125
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"Fraction of Grad students publishing"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print \"HHURP students Grad\"\n",
"print \"ALL: %.5f\" %(len(hhurp_data.grad_pubs[hhurp_data.grad_pubs>0])/np.float(len(hhurp_data)))\n",
"print \"All PhD: %.5f\" %(len(hhurp_data.grad_pubs[hhurp_data.grad_pubs>0][hhurp_data.career.isin(['MD/PHD', 'PHD'])])/np.float(len(hhurp_data[hhurp_data.career.isin(['MD/PHD', 'PHD'])])))\n",
"print \"MD: %.5f\" %(len(hhurp_data.grad_pubs[hhurp_data.grad_pubs>0][hhurp_data.career=='MD'])/np.float(len(hhurp_data[hhurp_data.career=='MD'])))\n",
"print \"\"\n",
"\n",
"print \"Control students Grad (GPA>3.54)\"\n",
"print \"ALL: %.5f\" %(len(ctr_data_sup354.grad_pubs[ctr_data_sup354.grad_pubs>0])/np.float(len(ctr_data_sup354)))\n",
"print \"All PhD: %.5f\" %(len(ctr_data_sup354.grad_pubs[ctr_data_sup354.grad_pubs>0][ctr_data_sup354.career.isin(['MD/PHD', 'PHD'])])/np.float(len(ctr_data_sup354[ctr_data_sup354.career.isin(['MD/PHD', 'PHD'])])))\n",
"print \"MD: %.5f\" %(len(ctr_data_sup354.grad_pubs[ctr_data_sup354.grad_pubs>0][ctr_data_sup354.career=='MD'])/np.float(len(ctr_data_sup354[ctr_data_sup354.career=='MD'])))\n",
"print \"\"\n",
"\n",
"print \"Control students Grad (GPA>3.625)\"\n",
"print \"ALL: %.5f\" %(len(ctr_data_sup3625.grad_pubs[ctr_data_sup3625.grad_pubs>0])/np.float(len(ctr_data_sup3625)))\n",
"print \"All PhD: %.5f\" %(len(ctr_data_sup3625.grad_pubs[ctr_data_sup3625.grad_pubs>0][ctr_data_sup3625.career.isin(['MD/PHD', 'PHD'])])/np.float(len(ctr_data_sup3625[ctr_data_sup3625.career.isin(['MD/PHD', 'PHD'])])))\n",
"print \"MD: %.5f\" %(len(ctr_data_sup3625.grad_pubs[ctr_data_sup3625.grad_pubs>0][ctr_data_sup3625.career=='MD'])/np.float(len(ctr_data_sup3625[ctr_data_sup3625.career=='MD'])))\n"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"HHURP students Grad\n",
"ALL: 0.50000\n",
"All PhD: 0.66667\n",
"MD: 0.28571\n",
"\n",
"Control students Grad (GPA>3.54)\n",
"ALL: 0.23129\n",
"All PhD: 0.52174\n",
"MD: 0.28571\n",
"\n",
"Control students Grad (GPA>3.625)\n",
"ALL: 0.22314\n",
"All PhD: 0.52632\n",
"MD: 0.25373\n"
]
}
],
"prompt_number": 126
},
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"Number of papers per Undergraduate students publishing"
]
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"Comparison of MEANs"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print \"HHURP students UG\"\n",
"print \"ALL: %.5f\" %hhurp_data.ug_pubs[hhurp_data.ug_pubs>0].mean()\n",
"print \"All PhD: %.5f\" %hhurp_data.ug_pubs[hhurp_data.ug_pubs>0][hhurp_data.career.isin(['MD/PHD', 'PHD'])].mean()\n",
"print \"MD: %.5f\" %hhurp_data.ug_pubs[hhurp_data.ug_pubs>0][hhurp_data.career=='MD'].mean()\n",
"print \"\"\n",
"\n",
"print \"Control students UG (GPA>3.54)\"\n",
"print \"ALL: %.5f\" %ctr_data_sup354.ug_pubs[ctr_data_sup354.ug_pubs>0].mean()\n",
"print \"All PhD: %.5f\" %ctr_data_sup354.ug_pubs[ctr_data_sup354.ug_pubs>0][ctr_data_sup354.career.isin(['MD/PHD', 'PHD'])].mean()\n",
"print \"MD: %.5f\" %ctr_data_sup354.ug_pubs[ctr_data_sup354.ug_pubs>0][ctr_data_sup354.career=='MD'].mean()\n",
"print \"\"\n",
"\n",
"print \"Control students UG (GPA>3.625)\"\n",
"print \"ALL: %.5f\" %ctr_data_sup3625.ug_pubs[ctr_data_sup3625.ug_pubs>0].mean()\n",
"print \"All PhD: %.5f\" %ctr_data_sup3625.ug_pubs[ctr_data_sup3625.ug_pubs>0][ctr_data_sup3625.career.isin(['MD/PHD', 'PHD'])].mean()\n",
"print \"MD: %.5f\" %ctr_data_sup3625.ug_pubs[ctr_data_sup3625.ug_pubs>0][ctr_data_sup3625.career=='MD'].mean()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"HHURP students UG\n",
"ALL: 1.61905\n",
"All PhD: 1.63636\n",
"MD: 1.55556\n",
"\n",
"Control students UG (GPA>3.54)\n",
"ALL: 1.60000\n",
"All PhD: 1.84615\n",
"MD: 1.62162\n",
"\n",
"Control students UG (GPA>3.625)\n",
"ALL: 1.59649\n",
"All PhD: 1.81818\n",
"MD: 1.62857\n"
]
}
],
"prompt_number": 127
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"Comparison of MEDIANs"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print \"HHURP students UG\"\n",
"print \"ALL: %.5f\" %hhurp_data.ug_pubs[hhurp_data.ug_pubs>0].median()\n",
"print \"All PhD: %.5f\" %hhurp_data.ug_pubs[hhurp_data.ug_pubs>0][hhurp_data.career.isin(['MD/PHD', 'PHD'])].median()\n",
"print \"MD: %.5f\" %hhurp_data.ug_pubs[hhurp_data.ug_pubs>0][hhurp_data.career=='MD'].median()\n",
"print \"\"\n",
"\n",
"print \"Control students UG (GPA>3.54)\"\n",
"print \"ALL: %.5f\" %ctr_data_sup354.ug_pubs[ctr_data_sup354.ug_pubs>0].median()\n",
"print \"All PhD: %.5f\" %ctr_data_sup354.ug_pubs[ctr_data_sup354.ug_pubs>0][ctr_data_sup354.career.isin(['MD/PHD', 'PHD'])].median()\n",
"print \"MD: %.5f\" %ctr_data_sup354.ug_pubs[ctr_data_sup354.ug_pubs>0][ctr_data_sup354.career=='MD'].median()\n",
"print \"\"\n",
"\n",
"print \"Control students UG (GPA>3.625)\"\n",
"print \"ALL: %.5f\" %ctr_data_sup3625.ug_pubs[ctr_data_sup3625.ug_pubs>0].median()\n",
"print \"All PhD: %.5f\" %ctr_data_sup3625.ug_pubs[ctr_data_sup3625.ug_pubs>0][ctr_data_sup3625.career.isin(['MD/PHD', 'PHD'])].median()\n",
"print \"MD: %.5f\" %ctr_data_sup3625.ug_pubs[ctr_data_sup3625.ug_pubs>0][ctr_data_sup3625.career=='MD'].median()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"HHURP students UG\n",
"ALL: 1.00000\n",
"All PhD: 1.00000\n",
"MD: 1.00000\n",
"\n",
"Control students UG (GPA>3.54)\n",
"ALL: 1.00000\n",
"All PhD: 2.00000\n",
"MD: 1.00000\n",
"\n",
"Control students UG (GPA>3.625)\n",
"ALL: 1.00000\n",
"All PhD: 2.00000\n",
"MD: 1.00000\n"
]
}
],
"prompt_number": 129
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"Stats"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print \"Bootstrap, HHURP vs Control (GPA>3.54)\"\n",
"print \"ALL: %.5f\" %bt.bootpv(hhurp_data.ug_pubs[hhurp_data.ug_pubs>0], ctr_data_sup354.ug_pubs[ctr_data_sup354.ug_pubs>0], printout=False)\n",
"print \"All PhD: %.5f\" %bt.bootpv(hhurp_data.ug_pubs[hhurp_data.ug_pubs>0][hhurp_data.career.isin(['MD/PHD', 'PHD'])], ctr_data_sup354.ug_pubs[ctr_data_sup354.ug_pubs>0][ctr_data_sup354.career.isin(['MD/PHD', 'PHD'])], printout=False)\n",
"print \"MD: %.5f\" %bt.bootpv(hhurp_data.ug_pubs[hhurp_data.ug_pubs>0][hhurp_data.career=='MD'], ctr_data_sup354.ug_pubs[ctr_data_sup354.ug_pubs>0][ctr_data_sup354.career=='MD'], printout=False)\n",
"print \"\"\n",
"\n",
"print \"Bootstrap, HHURP vs Control (GPA>3.625)\"\n",
"print \"ALL: %.5f\" %bt.bootpv(hhurp_data.ug_pubs[hhurp_data.ug_pubs>0], ctr_data_sup3625.ug_pubs[ctr_data_sup3625.ug_pubs>0], printout=False)\n",
"print \"All PhD: %.5f\" %bt.bootpv(hhurp_data.ug_pubs[hhurp_data.ug_pubs>0][hhurp_data.career.isin(['MD/PHD', 'PHD'])], ctr_data_sup3625.ug_pubs[ctr_data_sup3625.ug_pubs>0][ctr_data_sup3625.career.isin(['MD/PHD', 'PHD'])], printout=False)\n",
"print \"MD: %.5f\" %bt.bootpv(hhurp_data.ug_pubs[hhurp_data.ug_pubs>0][hhurp_data.career=='MD'], ctr_data_sup3625.ug_pubs[ctr_data_sup3625.ug_pubs>0][ctr_data_sup3625.career=='MD'], printout=False)\n",
"print \"\"\n",
"\n",
"print \"Mann Whitney U, HHURP vs Control (GPA>3.54)\"\n",
"print \"ALL: %.5f\" %stats.mannwhitneyu(hhurp_data.ug_pubs[hhurp_data.ug_pubs>0], ctr_data_sup354.ug_pubs[ctr_data_sup354.ug_pubs>0])[1]\n",
"print \"All PhD: %.5f\" %stats.mannwhitneyu(hhurp_data.ug_pubs[hhurp_data.ug_pubs>0][hhurp_data.career.isin(['MD/PHD', 'PHD'])], ctr_data_sup354.ug_pubs[ctr_data_sup354.ug_pubs>0][ctr_data_sup354.career.isin(['MD/PHD', 'PHD'])])[1]\n",
"print \"MD: %.5f\" %stats.mannwhitneyu(hhurp_data.ug_pubs[hhurp_data.ug_pubs>0][hhurp_data.career=='MD'], ctr_data_sup354.ug_pubs[ctr_data_sup354.ug_pubs>0][ctr_data_sup354.career=='MD'])[1]\n",
"print \"\"\n",
"\n",
"print \"Bootstrap, HHURP vs Control (GPA>3.625)\"\n",
"print \"ALL: %.5f\" %stats.mannwhitneyu(hhurp_data.ug_pubs[hhurp_data.ug_pubs>0], ctr_data_sup3625.ug_pubs[ctr_data_sup3625.ug_pubs>0])[1]\n",
"print \"All PhD: %.5f\" %stats.mannwhitneyu(hhurp_data.ug_pubs[hhurp_data.ug_pubs>0][hhurp_data.career.isin(['MD/PHD', 'PHD'])], ctr_data_sup3625.ug_pubs[ctr_data_sup3625.ug_pubs>0][ctr_data_sup3625.career.isin(['MD/PHD', 'PHD'])])[1]\n",
"print \"MD: %.5f\" %stats.mannwhitneyu(hhurp_data.ug_pubs[hhurp_data.ug_pubs>0][hhurp_data.career=='MD'], ctr_data_sup3625.ug_pubs[ctr_data_sup3625.ug_pubs>0][ctr_data_sup3625.career=='MD'])[1]\n"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Bootstrap, HHURP vs Control (GPA>3.54)\n",
"ALL: 0.46070"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"All PhD: 0.28290"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"MD: 0.46600"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"\n",
"Bootstrap, HHURP vs Control (GPA>3.625)\n",
"ALL: 0.44230"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"All PhD: 0.34320"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"MD: 0.47040"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"\n",
"Mann Whitney U, HHURP vs Control (GPA>3.54)\n",
"ALL: 0.48632\n",
"All PhD: 0.29610\n",
"MD: 0.24183\n",
"\n",
"Bootstrap, HHURP vs Control (GPA>3.625)\n",
"ALL: 0.48723\n",
"All PhD: 0.32118\n",
"MD: 0.24592\n"
]
}
],
"prompt_number": 140
},
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"Number of papers per Graduate students publishing"
]
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"Comparison of MEANs"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print \"HHURP students Grad\"\n",
"print \"ALL: %.5f\" %hhurp_data.grad_pubs[hhurp_data.grad_pubs>0].mean()\n",
"print \"All PhD: %.5f\" %hhurp_data.grad_pubs[hhurp_data.grad_pubs>0][hhurp_data.career.isin(['MD/PHD', 'PHD'])].mean()\n",
"print \"MD: %.5f\" %hhurp_data.grad_pubs[hhurp_data.grad_pubs>0][hhurp_data.career=='MD'].mean()\n",
"print \"\"\n",
"\n",
"print \"Control students Grad (GPA>3.54)\"\n",
"print \"ALL: %.5f\" %ctr_data_sup354.grad_pubs[ctr_data_sup354.grad_pubs>0].mean()\n",
"print \"All PhD: %.5f\" %ctr_data_sup354.grad_pubs[ctr_data_sup354.grad_pubs>0][ctr_data_sup354.career.isin(['MD/PHD', 'PHD'])].mean()\n",
"print \"MD: %.5f\" %ctr_data_sup354.grad_pubs[ctr_data_sup354.grad_pubs>0][ctr_data_sup354.career=='MD'].mean()\n",
"print \"\"\n",
"\n",
"print \"Control students Grad (GPA>3.625)\"\n",
"print \"ALL: %.5f\" %ctr_data_sup3625.grad_pubs[ctr_data_sup3625.grad_pubs>0].mean()\n",
"print \"All PhD: %.5f\" %ctr_data_sup3625.grad_pubs[ctr_data_sup3625.grad_pubs>0][ctr_data_sup3625.career.isin(['MD/PHD', 'PHD'])].mean()\n",
"print \"MD: %.5f\" %ctr_data_sup3625.grad_pubs[ctr_data_sup3625.grad_pubs>0][ctr_data_sup3625.career=='MD'].mean()\n"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"HHURP students Grad\n",
"ALL: 2.00000\n",
"All PhD: 1.66667\n",
"MD: 2.75000\n",
"\n",
"Control students Grad (GPA>3.54)\n",
"ALL: 3.00000\n",
"All PhD: 3.91667\n",
"MD: 2.50000\n",
"\n",
"Control students Grad (GPA>3.625)\n",
"ALL: 3.29630\n",
"All PhD: 3.90000\n",
"MD: 2.94118\n"
]
}
],
"prompt_number": 135
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"Comparison of MEDIANs"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print \"HHURP students Grad\"\n",
"print \"ALL: %.5f\" %hhurp_data.grad_pubs[hhurp_data.grad_pubs>0].median()\n",
"print \"All PhD: %.5f\" %hhurp_data.grad_pubs[hhurp_data.grad_pubs>0][hhurp_data.career.isin(['MD/PHD', 'PHD'])].median()\n",
"print \"MD: %.5f\" %hhurp_data.grad_pubs[hhurp_data.grad_pubs>0][hhurp_data.career=='MD'].median()\n",
"print \"\"\n",
"\n",
"print \"Control students Grad (GPA>3.54)\"\n",
"print \"ALL: %.5f\" %ctr_data_sup354.grad_pubs[ctr_data_sup354.grad_pubs>0].median()\n",
"print \"All PhD: %.5f\" %ctr_data_sup354.grad_pubs[ctr_data_sup354.grad_pubs>0][ctr_data_sup354.career.isin(['MD/PHD', 'PHD'])].median()\n",
"print \"MD: %.5f\" %ctr_data_sup354.grad_pubs[ctr_data_sup354.grad_pubs>0][ctr_data_sup354.career=='MD'].median()\n",
"print \"\"\n",
"\n",
"print \"Control students Grad (GPA>3.625)\"\n",
"print \"ALL: %.5f\" %ctr_data_sup3625.grad_pubs[ctr_data_sup3625.grad_pubs>0].median()\n",
"print \"All PhD: %.5f\" %ctr_data_sup3625.grad_pubs[ctr_data_sup3625.grad_pubs>0][ctr_data_sup3625.career.isin(['MD/PHD', 'PHD'])].median()\n",
"print \"MD: %.5f\" %ctr_data_sup3625.grad_pubs[ctr_data_sup3625.grad_pubs>0][ctr_data_sup3625.career=='MD'].median()\n"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"HHURP students Grad\n",
"ALL: 2.00000\n",
"All PhD: 1.50000\n",
"MD: 2.00000\n",
"\n",
"Control students Grad (GPA>3.54)\n",
"ALL: 1.50000\n",
"All PhD: 3.50000\n",
"MD: 1.00000\n",
"\n",
"Control students Grad (GPA>3.625)\n",
"ALL: 2.00000\n",
"All PhD: 3.50000\n",
"MD: 1.00000\n"
]
}
],
"prompt_number": 134
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"Stats"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print \"Bootstrap, HHURP vs Control (GPA>3.54)\"\n",
"print \"ALL: %.5f\" %bt.bootpv(hhurp_data.grad_pubs[hhurp_data.grad_pubs>0], ctr_data_sup354.grad_pubs[ctr_data_sup354.grad_pubs>0], printout=False)\n",
"print \"All PhD: %.5f\" %bt.bootpv(hhurp_data.grad_pubs[hhurp_data.grad_pubs>0][hhurp_data.career.isin(['MD/PHD', 'PHD'])], ctr_data_sup354.grad_pubs[ctr_data_sup354.grad_pubs>0][ctr_data_sup354.career.isin(['MD/PHD', 'PHD'])], printout=False)\n",
"print \"MD: %.5f\" %bt.bootpv(hhurp_data.grad_pubs[hhurp_data.grad_pubs>0][hhurp_data.career=='MD'], ctr_data_sup354.grad_pubs[ctr_data_sup354.grad_pubs>0][ctr_data_sup354.career=='MD'], printout=False)\n",
"print \"\"\n",
"\n",
"print \"Bootstrap, HHURP vs Control (GPA>3.625)\"\n",
"print \"ALL: %.5f\" %bt.bootpv(hhurp_data.grad_pubs[hhurp_data.grad_pubs>0], ctr_data_sup3625.grad_pubs[ctr_data_sup3625.grad_pubs>0], printout=False)\n",
"print \"All PhD: %.5f\" %bt.bootpv(hhurp_data.grad_pubs[hhurp_data.grad_pubs>0][hhurp_data.career.isin(['MD/PHD', 'PHD'])], ctr_data_sup3625.grad_pubs[ctr_data_sup3625.grad_pubs>0][ctr_data_sup3625.career.isin(['MD/PHD', 'PHD'])], printout=False)\n",
"print \"MD: %.5f\" %bt.bootpv(hhurp_data.grad_pubs[hhurp_data.grad_pubs>0][hhurp_data.career=='MD'], ctr_data_sup3625.grad_pubs[ctr_data_sup3625.grad_pubs>0][ctr_data_sup3625.career=='MD'], printout=False)\n",
"print \"\"\n",
"\n",
"print \"Mann Whitney U, HHURP vs Control (GPA>3.54)\"\n",
"print \"ALL: %.5f\" %stats.mannwhitneyu(hhurp_data.grad_pubs[hhurp_data.grad_pubs>0], ctr_data_sup354.grad_pubs[ctr_data_sup354.grad_pubs>0])[1]\n",
"print \"All PhD: %.5f\" %stats.mannwhitneyu(hhurp_data.grad_pubs[hhurp_data.grad_pubs>0][hhurp_data.career.isin(['MD/PHD', 'PHD'])], ctr_data_sup354.grad_pubs[ctr_data_sup354.grad_pubs>0][ctr_data_sup354.career.isin(['MD/PHD', 'PHD'])])[1]\n",
"print \"MD: %.5f\" %stats.mannwhitneyu(hhurp_data.grad_pubs[hhurp_data.grad_pubs>0][hhurp_data.career=='MD'], ctr_data_sup354.grad_pubs[ctr_data_sup354.grad_pubs>0][ctr_data_sup354.career=='MD'])[1]\n",
"print \"\"\n",
"\n",
"print \"Bootstrap, HHURP vs Control (GPA>3.625)\"\n",
"print \"ALL: %.5f\" %stats.mannwhitneyu(hhurp_data.grad_pubs[hhurp_data.grad_pubs>0], ctr_data_sup3625.grad_pubs[ctr_data_sup3625.grad_pubs>0])[1]\n",
"print \"All PhD: %.5f\" %stats.mannwhitneyu(hhurp_data.grad_pubs[hhurp_data.grad_pubs>0][hhurp_data.career.isin(['MD/PHD', 'PHD'])], ctr_data_sup3625.grad_pubs[ctr_data_sup3625.grad_pubs>0][ctr_data_sup3625.career.isin(['MD/PHD', 'PHD'])])[1]\n",
"print \"MD: %.5f\" %stats.mannwhitneyu(hhurp_data.grad_pubs[hhurp_data.grad_pubs>0][hhurp_data.career=='MD'], ctr_data_sup3625.grad_pubs[ctr_data_sup3625.grad_pubs>0][ctr_data_sup3625.career=='MD'])[1]\n"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Bootstrap, HHURP vs Control (GPA>3.54)\n",
"ALL: 0.10290"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"All PhD: 0.00760"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"MD: 0.32720"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"\n",
"Bootstrap, HHURP vs Control (GPA>3.625)\n",
"ALL: 0.06880"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"All PhD: 0.01130"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"MD: 0.54540"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"\n",
"Mann Whitney U, HHURP vs Control (GPA>3.54)\n",
"ALL: 0.27138\n",
"All PhD: 0.01074\n",
"MD: 0.28477\n",
"\n",
"Bootstrap, HHURP vs Control (GPA>3.625)\n",
"ALL: 0.16680\n",
"All PhD: 0.02922\n",
"MD: 0.42309\n"
]
}
],
"prompt_number": 158
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment