Last active
December 24, 2015 13:09
-
-
Save gcalmettes/6802424 to your computer and use it in GitHub Desktop.
HHURP analysis
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"name": "" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "heading", | |
"level": 1, | |
"metadata": {}, | |
"source": [ | |
"Setting the environment and importing the libraries" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"%pylab inline" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"Populating the interactive namespace from numpy and matplotlib\n" | |
] | |
} | |
], | |
"prompt_number": 1 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"import numpy as np\n", | |
"import pandas as pd\n", | |
"import matplotlib.pyplot as plt\n", | |
"from scipy import stats\n", | |
"\n", | |
"from code import bootstrap_routines as bt" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 2 | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 1, | |
"metadata": {}, | |
"source": [ | |
"Loading the data (+ info and stats)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"ctr_data = pd.read_excel('control-full-database.xls', 'Sheet1')\n", | |
"hhurp_data = pd.read_excel('hhurp-full-database.xlsx', 'Sheet1')" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 117 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"print \"minimum GPA in the HHURP group: %.3f\" %hhurp_data.gpa_at_application.min()\n", | |
"print \"minimum GPA in the control group: %.3f\" %ctr_data.gpa_at_application.min()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"minimum GPA in the HHURP group: 3.148\n", | |
"minimum GPA in the control group: 2.267\n" | |
] | |
} | |
], | |
"prompt_number": 118 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"print \"Statistical difference:\"\n", | |
"print \"- Boostrap: %.9f\" %bt.bootpv(hhurp_data.gpa_at_application, ctr_data.gpa_at_application, printout=False)\n", | |
"print \"- Mann Whitney U: %.9f\" %stats.mannwhitneyu(hhurp_data.gpa_at_application, ctr_data.gpa_at_application)[1]" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"Statistical difference:\n", | |
"- Boostrap: 0.000200000" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"- Mann Whitney U: 0.000017109\n" | |
] | |
} | |
], | |
"prompt_number": 121 | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 1, | |
"metadata": {}, | |
"source": [ | |
"Filtering the control group to match the GPA of the HHURP group" | |
] | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 3, | |
"metadata": {}, | |
"source": [ | |
"Thresholding GPA at 3.54 for the control group (bt = no diff / mwu = diff)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"print \"Statistical difference:\"\n", | |
"print \"- Boostrap: %.9f\" %bt.bootpv(hhurp_data.gpa_at_application, ctr_data.gpa_at_application[ctr_data.gpa_at_application>3.54], printout=False)\n", | |
"print \"- Mann Whitney U: %.9f\" %stats.mannwhitneyu(hhurp_data.gpa_at_application, ctr_data.gpa_at_application[ctr_data.gpa_at_application>3.54])[1]\n", | |
"print \"\"\n", | |
"print \"Size of the control group: %i\" %len(ctr_data.gpa_at_application[ctr_data.gpa_at_application>3.54])" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"Statistical difference:\n", | |
"- Boostrap: 0.051800000" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"- Mann Whitney U: 0.006164596\n", | |
"\n", | |
"Size of the control group: 147\n" | |
] | |
} | |
], | |
"prompt_number": 122 | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 3, | |
"metadata": {}, | |
"source": [ | |
"Thresholding GPA at 3.625 for the control group (bt = no diff / mwu = no diff)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"print \"Statistical difference:\"\n", | |
"print \"- Boostrap: %.9f\" %bt.bootpv(hhurp_data.gpa_at_application, ctr_data.gpa_at_application[ctr_data.gpa_at_application>3.625], printout=False)\n", | |
"print \"- Mann Whitney U: %.9f\" %stats.mannwhitneyu(hhurp_data.gpa_at_application, ctr_data.gpa_at_application[ctr_data.gpa_at_application>3.625])[1]\n", | |
"print \"\"\n", | |
"print \"Size of the control group: %i\" %len(ctr_data.gpa_at_application[ctr_data.gpa_at_application>3.625])" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"Statistical difference:\n", | |
"- Boostrap: 0.486800000" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"- Mann Whitney U: 0.051752804\n", | |
"\n", | |
"Size of the control group: 121\n" | |
] | |
} | |
], | |
"prompt_number": 123 | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 3, | |
"metadata": {}, | |
"source": [ | |
"New sub-groups for the control group" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"ctr_data_sup354 = ctr_data[ctr_data.gpa_at_application>3.54]\n", | |
"ctr_data_sup3625 = ctr_data[ctr_data.gpa_at_application>3.625]" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 124 | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 1, | |
"metadata": {}, | |
"source": [ | |
"Career choice" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"print \"HHURP students\"\n", | |
"print \"- PHD: %.4f\" %(len(hhurp_data[hhurp_data.career=='PHD'])/np.float(len(hhurp_data)))\n", | |
"print \"- MD/PHD: %.4f\" %(len(hhurp_data[hhurp_data.career=='MD/PHD'])/np.float(len(hhurp_data)))\n", | |
"print \"- MD: %.4f\" %(len(hhurp_data[hhurp_data.career=='MD'])/np.float(len(hhurp_data)))\n", | |
"print \"- Other: %.4f\" %(len(hhurp_data[hhurp_data.career!='MD'][hhurp_data.career!='MD/PHD'][hhurp_data.career!='PHD'])/np.float(len(hhurp_data)))\n", | |
"print \"\"\n", | |
"\n", | |
"print \"Control students (GPA>3.54)\"\n", | |
"print \"- PHD: %.4f\" %(len(ctr_data_sup354[ctr_data_sup354.career=='PHD'])/np.float(len(ctr_data_sup354)))\n", | |
"print \"- MD/PHD: %.4f\" %(len(ctr_data_sup354[ctr_data_sup354.career=='MD/PHD'])/np.float(len(ctr_data_sup354)))\n", | |
"print \"- MD: %.4f\" %(len(ctr_data_sup354[ctr_data_sup354.career=='MD'])/np.float(len(ctr_data_sup354)))\n", | |
"print \"- Other: %.4f\" %(len(ctr_data_sup354[ctr_data_sup354.career!='MD'][ctr_data_sup354.career!='MD/PHD'][ctr_data_sup354.career!='PHD'])/np.float(len(ctr_data_sup354)))\n", | |
"print \"\"\n", | |
"\n", | |
"print \"Control students (GPA>3.625)\"\n", | |
"print \"- PHD: %.4f\" %(len(ctr_data_sup3625[ctr_data_sup3625.career=='PHD'])/np.float(len(ctr_data_sup3625)))\n", | |
"print \"- MD/PHD: %.4f\" %(len(ctr_data_sup3625[ctr_data_sup3625.career=='MD/PHD'])/np.float(len(ctr_data_sup3625)))\n", | |
"print \"- MD: %.4f\" %(len(ctr_data_sup3625[ctr_data_sup3625.career=='MD'])/np.float(len(ctr_data_sup3625)))\n", | |
"print \"- Other: %.4f\" %(len(ctr_data_sup3625[ctr_data_sup3625.career!='MD'][ctr_data_sup3625.career!='MD/PHD'][ctr_data_sup3625.career!='PHD'])/np.float(len(ctr_data_sup3625)))\n", | |
"print \"\"\n", | |
"\n", | |
"\n", | |
"print \"Career with PHD component\"\n", | |
"print \"- HHURP: %.4f\" %(len(hhurp_data[hhurp_data.career.isin(['MD/PHD', 'PHD'])])/np.float(len(hhurp_data)))\n", | |
"print \"- Control (GPA>3.54): %.4f\" %(len(ctr_data_sup354[ctr_data_sup354.career.isin(['MD/PHD', 'PHD'])])/np.float(len(ctr_data_sup354)))\n", | |
"print \"- Control (GPA>3.625): %.4f\" %(len(ctr_data_sup3625[ctr_data_sup3625.career.isin(['MD/PHD', 'PHD'])])/np.float(len(ctr_data_sup3625)))\n" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"HHURP students\n", | |
"- PHD: 0.2647\n", | |
"- MD/PHD: 0.2647\n", | |
"- MD: 0.4118\n", | |
"- Other: 0.0588\n", | |
"\n", | |
"Control students (GPA>3.54)\n", | |
"- PHD: 0.1088\n", | |
"- MD/PHD: 0.0476\n", | |
"- MD: 0.5238\n", | |
"- Other: 0.3197\n", | |
"\n", | |
"Control students (GPA>3.625)\n", | |
"- PHD: 0.1074\n", | |
"- MD/PHD: 0.0496\n", | |
"- MD: 0.5537\n", | |
"- Other: 0.2893\n", | |
"\n", | |
"Career with PHD component\n", | |
"- HHURP: 0.5294\n", | |
"- Control (GPA>3.54): 0.1565\n", | |
"- Control (GPA>3.625): 0.1570\n" | |
] | |
} | |
], | |
"prompt_number": 157 | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 1, | |
"metadata": {}, | |
"source": [ | |
"Fraction of UG/Grad students publishing" | |
] | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 3, | |
"metadata": {}, | |
"source": [ | |
"Fraction of UG students publishing" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"print \"HHURP students UG\"\n", | |
"print \"ALL: %.5f\" %(len(hhurp_data.ug_pubs[hhurp_data.ug_pubs>0])/np.float(len(hhurp_data)))\n", | |
"print \"All PhD: %.5f\" %(len(hhurp_data.ug_pubs[hhurp_data.ug_pubs>0][hhurp_data.career.isin(['MD/PHD', 'PHD'])])/np.float(len(hhurp_data[hhurp_data.career.isin(['MD/PHD', 'PHD'])])))\n", | |
"print \"MD: %.5f\" %(len(hhurp_data.ug_pubs[hhurp_data.ug_pubs>0][hhurp_data.career=='MD'])/np.float(len(hhurp_data[hhurp_data.career=='MD'])))\n", | |
"print \"\"\n", | |
"\n", | |
"print \"Control students UG (GPA>3.54)\"\n", | |
"print \"ALL: %.5f\" %(len(ctr_data_sup354.ug_pubs[ctr_data_sup354.ug_pubs>0])/np.float(len(ctr_data_sup354)))\n", | |
"print \"All PhD: %.5f\" %(len(ctr_data_sup354.ug_pubs[ctr_data_sup354.ug_pubs>0][ctr_data_sup354.career.isin(['MD/PHD', 'PHD'])])/np.float(len(ctr_data_sup354[ctr_data_sup354.career.isin(['MD/PHD', 'PHD'])])))\n", | |
"print \"MD: %.5f\" %(len(ctr_data_sup354.ug_pubs[ctr_data_sup354.ug_pubs>0][ctr_data_sup354.career=='MD'])/np.float(len(ctr_data_sup354[ctr_data_sup354.career=='MD'])))\n", | |
"print \"\"\n", | |
"\n", | |
"print \"Control students UG (GPA>3.625)\"\n", | |
"print \"ALL: %.5f\" %(len(ctr_data_sup3625.ug_pubs[ctr_data_sup3625.ug_pubs>0])/np.float(len(ctr_data_sup3625)))\n", | |
"print \"All PhD: %.5f\" %(len(ctr_data_sup3625.ug_pubs[ctr_data_sup3625.ug_pubs>0][ctr_data_sup3625.career.isin(['MD/PHD', 'PHD'])])/np.float(len(ctr_data_sup3625[ctr_data_sup3625.career.isin(['MD/PHD', 'PHD'])])))\n", | |
"print \"MD: %.5f\" %(len(ctr_data_sup3625.ug_pubs[ctr_data_sup3625.ug_pubs>0][ctr_data_sup3625.career=='MD'])/np.float(len(ctr_data_sup3625[ctr_data_sup3625.career=='MD'])))" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"HHURP students UG\n", | |
"ALL: 0.61765\n", | |
"All PhD: 0.61111\n", | |
"MD: 0.64286\n", | |
"\n", | |
"Control students UG (GPA>3.54)\n", | |
"ALL: 0.44218\n", | |
"All PhD: 0.56522\n", | |
"MD: 0.48052\n", | |
"\n", | |
"Control students UG (GPA>3.625)\n", | |
"ALL: 0.47107\n", | |
"All PhD: 0.57895\n", | |
"MD: 0.52239\n" | |
] | |
} | |
], | |
"prompt_number": 125 | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 3, | |
"metadata": {}, | |
"source": [ | |
"Fraction of Grad students publishing" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"print \"HHURP students Grad\"\n", | |
"print \"ALL: %.5f\" %(len(hhurp_data.grad_pubs[hhurp_data.grad_pubs>0])/np.float(len(hhurp_data)))\n", | |
"print \"All PhD: %.5f\" %(len(hhurp_data.grad_pubs[hhurp_data.grad_pubs>0][hhurp_data.career.isin(['MD/PHD', 'PHD'])])/np.float(len(hhurp_data[hhurp_data.career.isin(['MD/PHD', 'PHD'])])))\n", | |
"print \"MD: %.5f\" %(len(hhurp_data.grad_pubs[hhurp_data.grad_pubs>0][hhurp_data.career=='MD'])/np.float(len(hhurp_data[hhurp_data.career=='MD'])))\n", | |
"print \"\"\n", | |
"\n", | |
"print \"Control students Grad (GPA>3.54)\"\n", | |
"print \"ALL: %.5f\" %(len(ctr_data_sup354.grad_pubs[ctr_data_sup354.grad_pubs>0])/np.float(len(ctr_data_sup354)))\n", | |
"print \"All PhD: %.5f\" %(len(ctr_data_sup354.grad_pubs[ctr_data_sup354.grad_pubs>0][ctr_data_sup354.career.isin(['MD/PHD', 'PHD'])])/np.float(len(ctr_data_sup354[ctr_data_sup354.career.isin(['MD/PHD', 'PHD'])])))\n", | |
"print \"MD: %.5f\" %(len(ctr_data_sup354.grad_pubs[ctr_data_sup354.grad_pubs>0][ctr_data_sup354.career=='MD'])/np.float(len(ctr_data_sup354[ctr_data_sup354.career=='MD'])))\n", | |
"print \"\"\n", | |
"\n", | |
"print \"Control students Grad (GPA>3.625)\"\n", | |
"print \"ALL: %.5f\" %(len(ctr_data_sup3625.grad_pubs[ctr_data_sup3625.grad_pubs>0])/np.float(len(ctr_data_sup3625)))\n", | |
"print \"All PhD: %.5f\" %(len(ctr_data_sup3625.grad_pubs[ctr_data_sup3625.grad_pubs>0][ctr_data_sup3625.career.isin(['MD/PHD', 'PHD'])])/np.float(len(ctr_data_sup3625[ctr_data_sup3625.career.isin(['MD/PHD', 'PHD'])])))\n", | |
"print \"MD: %.5f\" %(len(ctr_data_sup3625.grad_pubs[ctr_data_sup3625.grad_pubs>0][ctr_data_sup3625.career=='MD'])/np.float(len(ctr_data_sup3625[ctr_data_sup3625.career=='MD'])))\n" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"HHURP students Grad\n", | |
"ALL: 0.50000\n", | |
"All PhD: 0.66667\n", | |
"MD: 0.28571\n", | |
"\n", | |
"Control students Grad (GPA>3.54)\n", | |
"ALL: 0.23129\n", | |
"All PhD: 0.52174\n", | |
"MD: 0.28571\n", | |
"\n", | |
"Control students Grad (GPA>3.625)\n", | |
"ALL: 0.22314\n", | |
"All PhD: 0.52632\n", | |
"MD: 0.25373\n" | |
] | |
} | |
], | |
"prompt_number": 126 | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 1, | |
"metadata": {}, | |
"source": [ | |
"Number of papers per Undergraduate students publishing" | |
] | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 3, | |
"metadata": {}, | |
"source": [ | |
"Comparison of MEANs" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"print \"HHURP students UG\"\n", | |
"print \"ALL: %.5f\" %hhurp_data.ug_pubs[hhurp_data.ug_pubs>0].mean()\n", | |
"print \"All PhD: %.5f\" %hhurp_data.ug_pubs[hhurp_data.ug_pubs>0][hhurp_data.career.isin(['MD/PHD', 'PHD'])].mean()\n", | |
"print \"MD: %.5f\" %hhurp_data.ug_pubs[hhurp_data.ug_pubs>0][hhurp_data.career=='MD'].mean()\n", | |
"print \"\"\n", | |
"\n", | |
"print \"Control students UG (GPA>3.54)\"\n", | |
"print \"ALL: %.5f\" %ctr_data_sup354.ug_pubs[ctr_data_sup354.ug_pubs>0].mean()\n", | |
"print \"All PhD: %.5f\" %ctr_data_sup354.ug_pubs[ctr_data_sup354.ug_pubs>0][ctr_data_sup354.career.isin(['MD/PHD', 'PHD'])].mean()\n", | |
"print \"MD: %.5f\" %ctr_data_sup354.ug_pubs[ctr_data_sup354.ug_pubs>0][ctr_data_sup354.career=='MD'].mean()\n", | |
"print \"\"\n", | |
"\n", | |
"print \"Control students UG (GPA>3.625)\"\n", | |
"print \"ALL: %.5f\" %ctr_data_sup3625.ug_pubs[ctr_data_sup3625.ug_pubs>0].mean()\n", | |
"print \"All PhD: %.5f\" %ctr_data_sup3625.ug_pubs[ctr_data_sup3625.ug_pubs>0][ctr_data_sup3625.career.isin(['MD/PHD', 'PHD'])].mean()\n", | |
"print \"MD: %.5f\" %ctr_data_sup3625.ug_pubs[ctr_data_sup3625.ug_pubs>0][ctr_data_sup3625.career=='MD'].mean()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"HHURP students UG\n", | |
"ALL: 1.61905\n", | |
"All PhD: 1.63636\n", | |
"MD: 1.55556\n", | |
"\n", | |
"Control students UG (GPA>3.54)\n", | |
"ALL: 1.60000\n", | |
"All PhD: 1.84615\n", | |
"MD: 1.62162\n", | |
"\n", | |
"Control students UG (GPA>3.625)\n", | |
"ALL: 1.59649\n", | |
"All PhD: 1.81818\n", | |
"MD: 1.62857\n" | |
] | |
} | |
], | |
"prompt_number": 127 | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 3, | |
"metadata": {}, | |
"source": [ | |
"Comparison of MEDIANs" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"print \"HHURP students UG\"\n", | |
"print \"ALL: %.5f\" %hhurp_data.ug_pubs[hhurp_data.ug_pubs>0].median()\n", | |
"print \"All PhD: %.5f\" %hhurp_data.ug_pubs[hhurp_data.ug_pubs>0][hhurp_data.career.isin(['MD/PHD', 'PHD'])].median()\n", | |
"print \"MD: %.5f\" %hhurp_data.ug_pubs[hhurp_data.ug_pubs>0][hhurp_data.career=='MD'].median()\n", | |
"print \"\"\n", | |
"\n", | |
"print \"Control students UG (GPA>3.54)\"\n", | |
"print \"ALL: %.5f\" %ctr_data_sup354.ug_pubs[ctr_data_sup354.ug_pubs>0].median()\n", | |
"print \"All PhD: %.5f\" %ctr_data_sup354.ug_pubs[ctr_data_sup354.ug_pubs>0][ctr_data_sup354.career.isin(['MD/PHD', 'PHD'])].median()\n", | |
"print \"MD: %.5f\" %ctr_data_sup354.ug_pubs[ctr_data_sup354.ug_pubs>0][ctr_data_sup354.career=='MD'].median()\n", | |
"print \"\"\n", | |
"\n", | |
"print \"Control students UG (GPA>3.625)\"\n", | |
"print \"ALL: %.5f\" %ctr_data_sup3625.ug_pubs[ctr_data_sup3625.ug_pubs>0].median()\n", | |
"print \"All PhD: %.5f\" %ctr_data_sup3625.ug_pubs[ctr_data_sup3625.ug_pubs>0][ctr_data_sup3625.career.isin(['MD/PHD', 'PHD'])].median()\n", | |
"print \"MD: %.5f\" %ctr_data_sup3625.ug_pubs[ctr_data_sup3625.ug_pubs>0][ctr_data_sup3625.career=='MD'].median()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"HHURP students UG\n", | |
"ALL: 1.00000\n", | |
"All PhD: 1.00000\n", | |
"MD: 1.00000\n", | |
"\n", | |
"Control students UG (GPA>3.54)\n", | |
"ALL: 1.00000\n", | |
"All PhD: 2.00000\n", | |
"MD: 1.00000\n", | |
"\n", | |
"Control students UG (GPA>3.625)\n", | |
"ALL: 1.00000\n", | |
"All PhD: 2.00000\n", | |
"MD: 1.00000\n" | |
] | |
} | |
], | |
"prompt_number": 129 | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 3, | |
"metadata": {}, | |
"source": [ | |
"Stats" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"print \"Bootstrap, HHURP vs Control (GPA>3.54)\"\n", | |
"print \"ALL: %.5f\" %bt.bootpv(hhurp_data.ug_pubs[hhurp_data.ug_pubs>0], ctr_data_sup354.ug_pubs[ctr_data_sup354.ug_pubs>0], printout=False)\n", | |
"print \"All PhD: %.5f\" %bt.bootpv(hhurp_data.ug_pubs[hhurp_data.ug_pubs>0][hhurp_data.career.isin(['MD/PHD', 'PHD'])], ctr_data_sup354.ug_pubs[ctr_data_sup354.ug_pubs>0][ctr_data_sup354.career.isin(['MD/PHD', 'PHD'])], printout=False)\n", | |
"print \"MD: %.5f\" %bt.bootpv(hhurp_data.ug_pubs[hhurp_data.ug_pubs>0][hhurp_data.career=='MD'], ctr_data_sup354.ug_pubs[ctr_data_sup354.ug_pubs>0][ctr_data_sup354.career=='MD'], printout=False)\n", | |
"print \"\"\n", | |
"\n", | |
"print \"Bootstrap, HHURP vs Control (GPA>3.625)\"\n", | |
"print \"ALL: %.5f\" %bt.bootpv(hhurp_data.ug_pubs[hhurp_data.ug_pubs>0], ctr_data_sup3625.ug_pubs[ctr_data_sup3625.ug_pubs>0], printout=False)\n", | |
"print \"All PhD: %.5f\" %bt.bootpv(hhurp_data.ug_pubs[hhurp_data.ug_pubs>0][hhurp_data.career.isin(['MD/PHD', 'PHD'])], ctr_data_sup3625.ug_pubs[ctr_data_sup3625.ug_pubs>0][ctr_data_sup3625.career.isin(['MD/PHD', 'PHD'])], printout=False)\n", | |
"print \"MD: %.5f\" %bt.bootpv(hhurp_data.ug_pubs[hhurp_data.ug_pubs>0][hhurp_data.career=='MD'], ctr_data_sup3625.ug_pubs[ctr_data_sup3625.ug_pubs>0][ctr_data_sup3625.career=='MD'], printout=False)\n", | |
"print \"\"\n", | |
"\n", | |
"print \"Mann Whitney U, HHURP vs Control (GPA>3.54)\"\n", | |
"print \"ALL: %.5f\" %stats.mannwhitneyu(hhurp_data.ug_pubs[hhurp_data.ug_pubs>0], ctr_data_sup354.ug_pubs[ctr_data_sup354.ug_pubs>0])[1]\n", | |
"print \"All PhD: %.5f\" %stats.mannwhitneyu(hhurp_data.ug_pubs[hhurp_data.ug_pubs>0][hhurp_data.career.isin(['MD/PHD', 'PHD'])], ctr_data_sup354.ug_pubs[ctr_data_sup354.ug_pubs>0][ctr_data_sup354.career.isin(['MD/PHD', 'PHD'])])[1]\n", | |
"print \"MD: %.5f\" %stats.mannwhitneyu(hhurp_data.ug_pubs[hhurp_data.ug_pubs>0][hhurp_data.career=='MD'], ctr_data_sup354.ug_pubs[ctr_data_sup354.ug_pubs>0][ctr_data_sup354.career=='MD'])[1]\n", | |
"print \"\"\n", | |
"\n", | |
"print \"Bootstrap, HHURP vs Control (GPA>3.625)\"\n", | |
"print \"ALL: %.5f\" %stats.mannwhitneyu(hhurp_data.ug_pubs[hhurp_data.ug_pubs>0], ctr_data_sup3625.ug_pubs[ctr_data_sup3625.ug_pubs>0])[1]\n", | |
"print \"All PhD: %.5f\" %stats.mannwhitneyu(hhurp_data.ug_pubs[hhurp_data.ug_pubs>0][hhurp_data.career.isin(['MD/PHD', 'PHD'])], ctr_data_sup3625.ug_pubs[ctr_data_sup3625.ug_pubs>0][ctr_data_sup3625.career.isin(['MD/PHD', 'PHD'])])[1]\n", | |
"print \"MD: %.5f\" %stats.mannwhitneyu(hhurp_data.ug_pubs[hhurp_data.ug_pubs>0][hhurp_data.career=='MD'], ctr_data_sup3625.ug_pubs[ctr_data_sup3625.ug_pubs>0][ctr_data_sup3625.career=='MD'])[1]\n" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"Bootstrap, HHURP vs Control (GPA>3.54)\n", | |
"ALL: 0.46070" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"All PhD: 0.28290" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"MD: 0.46600" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"\n", | |
"Bootstrap, HHURP vs Control (GPA>3.625)\n", | |
"ALL: 0.44230" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"All PhD: 0.34320" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"MD: 0.47040" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"\n", | |
"Mann Whitney U, HHURP vs Control (GPA>3.54)\n", | |
"ALL: 0.48632\n", | |
"All PhD: 0.29610\n", | |
"MD: 0.24183\n", | |
"\n", | |
"Bootstrap, HHURP vs Control (GPA>3.625)\n", | |
"ALL: 0.48723\n", | |
"All PhD: 0.32118\n", | |
"MD: 0.24592\n" | |
] | |
} | |
], | |
"prompt_number": 140 | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 1, | |
"metadata": {}, | |
"source": [ | |
"Number of papers per Graduate students publishing" | |
] | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 3, | |
"metadata": {}, | |
"source": [ | |
"Comparison of MEANs" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"print \"HHURP students Grad\"\n", | |
"print \"ALL: %.5f\" %hhurp_data.grad_pubs[hhurp_data.grad_pubs>0].mean()\n", | |
"print \"All PhD: %.5f\" %hhurp_data.grad_pubs[hhurp_data.grad_pubs>0][hhurp_data.career.isin(['MD/PHD', 'PHD'])].mean()\n", | |
"print \"MD: %.5f\" %hhurp_data.grad_pubs[hhurp_data.grad_pubs>0][hhurp_data.career=='MD'].mean()\n", | |
"print \"\"\n", | |
"\n", | |
"print \"Control students Grad (GPA>3.54)\"\n", | |
"print \"ALL: %.5f\" %ctr_data_sup354.grad_pubs[ctr_data_sup354.grad_pubs>0].mean()\n", | |
"print \"All PhD: %.5f\" %ctr_data_sup354.grad_pubs[ctr_data_sup354.grad_pubs>0][ctr_data_sup354.career.isin(['MD/PHD', 'PHD'])].mean()\n", | |
"print \"MD: %.5f\" %ctr_data_sup354.grad_pubs[ctr_data_sup354.grad_pubs>0][ctr_data_sup354.career=='MD'].mean()\n", | |
"print \"\"\n", | |
"\n", | |
"print \"Control students Grad (GPA>3.625)\"\n", | |
"print \"ALL: %.5f\" %ctr_data_sup3625.grad_pubs[ctr_data_sup3625.grad_pubs>0].mean()\n", | |
"print \"All PhD: %.5f\" %ctr_data_sup3625.grad_pubs[ctr_data_sup3625.grad_pubs>0][ctr_data_sup3625.career.isin(['MD/PHD', 'PHD'])].mean()\n", | |
"print \"MD: %.5f\" %ctr_data_sup3625.grad_pubs[ctr_data_sup3625.grad_pubs>0][ctr_data_sup3625.career=='MD'].mean()\n" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"HHURP students Grad\n", | |
"ALL: 2.00000\n", | |
"All PhD: 1.66667\n", | |
"MD: 2.75000\n", | |
"\n", | |
"Control students Grad (GPA>3.54)\n", | |
"ALL: 3.00000\n", | |
"All PhD: 3.91667\n", | |
"MD: 2.50000\n", | |
"\n", | |
"Control students Grad (GPA>3.625)\n", | |
"ALL: 3.29630\n", | |
"All PhD: 3.90000\n", | |
"MD: 2.94118\n" | |
] | |
} | |
], | |
"prompt_number": 135 | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 3, | |
"metadata": {}, | |
"source": [ | |
"Comparison of MEDIANs" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"print \"HHURP students Grad\"\n", | |
"print \"ALL: %.5f\" %hhurp_data.grad_pubs[hhurp_data.grad_pubs>0].median()\n", | |
"print \"All PhD: %.5f\" %hhurp_data.grad_pubs[hhurp_data.grad_pubs>0][hhurp_data.career.isin(['MD/PHD', 'PHD'])].median()\n", | |
"print \"MD: %.5f\" %hhurp_data.grad_pubs[hhurp_data.grad_pubs>0][hhurp_data.career=='MD'].median()\n", | |
"print \"\"\n", | |
"\n", | |
"print \"Control students Grad (GPA>3.54)\"\n", | |
"print \"ALL: %.5f\" %ctr_data_sup354.grad_pubs[ctr_data_sup354.grad_pubs>0].median()\n", | |
"print \"All PhD: %.5f\" %ctr_data_sup354.grad_pubs[ctr_data_sup354.grad_pubs>0][ctr_data_sup354.career.isin(['MD/PHD', 'PHD'])].median()\n", | |
"print \"MD: %.5f\" %ctr_data_sup354.grad_pubs[ctr_data_sup354.grad_pubs>0][ctr_data_sup354.career=='MD'].median()\n", | |
"print \"\"\n", | |
"\n", | |
"print \"Control students Grad (GPA>3.625)\"\n", | |
"print \"ALL: %.5f\" %ctr_data_sup3625.grad_pubs[ctr_data_sup3625.grad_pubs>0].median()\n", | |
"print \"All PhD: %.5f\" %ctr_data_sup3625.grad_pubs[ctr_data_sup3625.grad_pubs>0][ctr_data_sup3625.career.isin(['MD/PHD', 'PHD'])].median()\n", | |
"print \"MD: %.5f\" %ctr_data_sup3625.grad_pubs[ctr_data_sup3625.grad_pubs>0][ctr_data_sup3625.career=='MD'].median()\n" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"HHURP students Grad\n", | |
"ALL: 2.00000\n", | |
"All PhD: 1.50000\n", | |
"MD: 2.00000\n", | |
"\n", | |
"Control students Grad (GPA>3.54)\n", | |
"ALL: 1.50000\n", | |
"All PhD: 3.50000\n", | |
"MD: 1.00000\n", | |
"\n", | |
"Control students Grad (GPA>3.625)\n", | |
"ALL: 2.00000\n", | |
"All PhD: 3.50000\n", | |
"MD: 1.00000\n" | |
] | |
} | |
], | |
"prompt_number": 134 | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 3, | |
"metadata": {}, | |
"source": [ | |
"Stats" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"print \"Bootstrap, HHURP vs Control (GPA>3.54)\"\n", | |
"print \"ALL: %.5f\" %bt.bootpv(hhurp_data.grad_pubs[hhurp_data.grad_pubs>0], ctr_data_sup354.grad_pubs[ctr_data_sup354.grad_pubs>0], printout=False)\n", | |
"print \"All PhD: %.5f\" %bt.bootpv(hhurp_data.grad_pubs[hhurp_data.grad_pubs>0][hhurp_data.career.isin(['MD/PHD', 'PHD'])], ctr_data_sup354.grad_pubs[ctr_data_sup354.grad_pubs>0][ctr_data_sup354.career.isin(['MD/PHD', 'PHD'])], printout=False)\n", | |
"print \"MD: %.5f\" %bt.bootpv(hhurp_data.grad_pubs[hhurp_data.grad_pubs>0][hhurp_data.career=='MD'], ctr_data_sup354.grad_pubs[ctr_data_sup354.grad_pubs>0][ctr_data_sup354.career=='MD'], printout=False)\n", | |
"print \"\"\n", | |
"\n", | |
"print \"Bootstrap, HHURP vs Control (GPA>3.625)\"\n", | |
"print \"ALL: %.5f\" %bt.bootpv(hhurp_data.grad_pubs[hhurp_data.grad_pubs>0], ctr_data_sup3625.grad_pubs[ctr_data_sup3625.grad_pubs>0], printout=False)\n", | |
"print \"All PhD: %.5f\" %bt.bootpv(hhurp_data.grad_pubs[hhurp_data.grad_pubs>0][hhurp_data.career.isin(['MD/PHD', 'PHD'])], ctr_data_sup3625.grad_pubs[ctr_data_sup3625.grad_pubs>0][ctr_data_sup3625.career.isin(['MD/PHD', 'PHD'])], printout=False)\n", | |
"print \"MD: %.5f\" %bt.bootpv(hhurp_data.grad_pubs[hhurp_data.grad_pubs>0][hhurp_data.career=='MD'], ctr_data_sup3625.grad_pubs[ctr_data_sup3625.grad_pubs>0][ctr_data_sup3625.career=='MD'], printout=False)\n", | |
"print \"\"\n", | |
"\n", | |
"print \"Mann Whitney U, HHURP vs Control (GPA>3.54)\"\n", | |
"print \"ALL: %.5f\" %stats.mannwhitneyu(hhurp_data.grad_pubs[hhurp_data.grad_pubs>0], ctr_data_sup354.grad_pubs[ctr_data_sup354.grad_pubs>0])[1]\n", | |
"print \"All PhD: %.5f\" %stats.mannwhitneyu(hhurp_data.grad_pubs[hhurp_data.grad_pubs>0][hhurp_data.career.isin(['MD/PHD', 'PHD'])], ctr_data_sup354.grad_pubs[ctr_data_sup354.grad_pubs>0][ctr_data_sup354.career.isin(['MD/PHD', 'PHD'])])[1]\n", | |
"print \"MD: %.5f\" %stats.mannwhitneyu(hhurp_data.grad_pubs[hhurp_data.grad_pubs>0][hhurp_data.career=='MD'], ctr_data_sup354.grad_pubs[ctr_data_sup354.grad_pubs>0][ctr_data_sup354.career=='MD'])[1]\n", | |
"print \"\"\n", | |
"\n", | |
"print \"Bootstrap, HHURP vs Control (GPA>3.625)\"\n", | |
"print \"ALL: %.5f\" %stats.mannwhitneyu(hhurp_data.grad_pubs[hhurp_data.grad_pubs>0], ctr_data_sup3625.grad_pubs[ctr_data_sup3625.grad_pubs>0])[1]\n", | |
"print \"All PhD: %.5f\" %stats.mannwhitneyu(hhurp_data.grad_pubs[hhurp_data.grad_pubs>0][hhurp_data.career.isin(['MD/PHD', 'PHD'])], ctr_data_sup3625.grad_pubs[ctr_data_sup3625.grad_pubs>0][ctr_data_sup3625.career.isin(['MD/PHD', 'PHD'])])[1]\n", | |
"print \"MD: %.5f\" %stats.mannwhitneyu(hhurp_data.grad_pubs[hhurp_data.grad_pubs>0][hhurp_data.career=='MD'], ctr_data_sup3625.grad_pubs[ctr_data_sup3625.grad_pubs>0][ctr_data_sup3625.career=='MD'])[1]\n" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"Bootstrap, HHURP vs Control (GPA>3.54)\n", | |
"ALL: 0.10290" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"All PhD: 0.00760" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"MD: 0.32720" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"\n", | |
"Bootstrap, HHURP vs Control (GPA>3.625)\n", | |
"ALL: 0.06880" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"All PhD: 0.01130" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"MD: 0.54540" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\n", | |
"\n", | |
"Mann Whitney U, HHURP vs Control (GPA>3.54)\n", | |
"ALL: 0.27138\n", | |
"All PhD: 0.01074\n", | |
"MD: 0.28477\n", | |
"\n", | |
"Bootstrap, HHURP vs Control (GPA>3.625)\n", | |
"ALL: 0.16680\n", | |
"All PhD: 0.02922\n", | |
"MD: 0.42309\n" | |
] | |
} | |
], | |
"prompt_number": 158 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [] | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment