Skip to content

Instantly share code, notes, and snippets.

@vals
Created January 17, 2013 10:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vals/4554986 to your computer and use it in GitHub Desktop.
Save vals/4554986 to your computer and use it in GitHub Desktop.
{
"metadata": {
"name": "Lab 1"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"import pandas as pd"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"ef = pd.ExcelFile('lab1.xls')"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"ef.sheet_names"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 3,
"text": [
"[u'log']"
]
}
],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df = ef.parse('log')"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 6,
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Int64Index: 5000 entries, 0 to 4999\n",
"Data columns:\n",
"PEAKGROUP_ID 5000 non-null values\n",
"MZ_IDX 5000 non-null values\n",
"MZ 5000 non-null values\n",
"START_RT_IDX 5000 non-null values\n",
"STOP_RT_IDX 5000 non-null values\n",
"START_RT 5000 non-null values\n",
"STOP_RT 5000 non-null values\n",
"LENGTH 5000 non-null values\n",
"8x:sg_w11_o2_spike9.20090723-EC_8X_02.msmat 5000 non-null values\n",
"8x:sg_w11_o2_spike9.20090723-EC_8X_03.msmat 5000 non-null values\n",
"8x:sg_w11_o2_spike9.20090723-EC_8X_04.msmat 5000 non-null values\n",
"8x:sg_w11_o2_spike9.20090723-EC_8X_05.msmat 5000 non-null values\n",
"1x:sg_w11_o2_spike9.20090723-EC_1X_02.msmat 5000 non-null values\n",
"1x:sg_w11_o2_spike9.20090723-EC_1X_03.msmat 5000 non-null values\n",
"1x:sg_w11_o2_spike9.20090723-EC_1X_04.msmat 5000 non-null values\n",
"1x:sg_w11_o2_spike9.20090723-EC_1X_05.msmat 5000 non-null values\n",
"P-VALUE 0 non-null values\n",
"FDR 0 non-null values\n",
"PI0 0 non-null values\n",
"FDR with PI0 0 non-null values\n",
"dtypes: float64(20)"
]
}
],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from scipy.stats import ttest_ind"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 9
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"x8 = df.ix[0,8:12]"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 31
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"x1 = df.ix[0,12:16]"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 32
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"ttest_ind(x8, x1)[1]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 35,
"text": [
"0.0028980776133482574"
]
}
],
"prompt_number": 35
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"ttest_ind(df.ix[:,8:12], df.ix[:,12:16], 1)[1]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 39,
"text": [
"array([ 2.89807761e-03, 3.06661273e-03, 2.38479414e-01, ...,\n",
" 5.26971375e-01, 6.44149364e-01, 1.02842933e-06])"
]
}
],
"prompt_number": 39
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df[\"P-VALUE\"] = ttest_ind(df.ix[:,8:12], df.ix[:,12:16], 1)[1]"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 40
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 41,
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Int64Index: 5000 entries, 0 to 4999\n",
"Data columns:\n",
"PEAKGROUP_ID 5000 non-null values\n",
"MZ_IDX 5000 non-null values\n",
"MZ 5000 non-null values\n",
"START_RT_IDX 5000 non-null values\n",
"STOP_RT_IDX 5000 non-null values\n",
"START_RT 5000 non-null values\n",
"STOP_RT 5000 non-null values\n",
"LENGTH 5000 non-null values\n",
"8x:sg_w11_o2_spike9.20090723-EC_8X_02.msmat 5000 non-null values\n",
"8x:sg_w11_o2_spike9.20090723-EC_8X_03.msmat 5000 non-null values\n",
"8x:sg_w11_o2_spike9.20090723-EC_8X_04.msmat 5000 non-null values\n",
"8x:sg_w11_o2_spike9.20090723-EC_8X_05.msmat 5000 non-null values\n",
"1x:sg_w11_o2_spike9.20090723-EC_1X_02.msmat 5000 non-null values\n",
"1x:sg_w11_o2_spike9.20090723-EC_1X_03.msmat 5000 non-null values\n",
"1x:sg_w11_o2_spike9.20090723-EC_1X_04.msmat 5000 non-null values\n",
"1x:sg_w11_o2_spike9.20090723-EC_1X_05.msmat 5000 non-null values\n",
"P-VALUE 5000 non-null values\n",
"FDR 0 non-null values\n",
"PI0 0 non-null values\n",
"FDR with PI0 0 non-null values\n",
"dtypes: float64(20)"
]
}
],
"prompt_number": 41
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df.sort(\"P-VALUE\", ascending=True, inplace=True)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 52,
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Int64Index: 5000 entries, 2015 to 2990\n",
"Data columns:\n",
"PEAKGROUP_ID 5000 non-null values\n",
"MZ_IDX 5000 non-null values\n",
"MZ 5000 non-null values\n",
"START_RT_IDX 5000 non-null values\n",
"STOP_RT_IDX 5000 non-null values\n",
"START_RT 5000 non-null values\n",
"STOP_RT 5000 non-null values\n",
"LENGTH 5000 non-null values\n",
"8x:sg_w11_o2_spike9.20090723-EC_8X_02.msmat 5000 non-null values\n",
"8x:sg_w11_o2_spike9.20090723-EC_8X_03.msmat 5000 non-null values\n",
"8x:sg_w11_o2_spike9.20090723-EC_8X_04.msmat 5000 non-null values\n",
"8x:sg_w11_o2_spike9.20090723-EC_8X_05.msmat 5000 non-null values\n",
"1x:sg_w11_o2_spike9.20090723-EC_1X_02.msmat 5000 non-null values\n",
"1x:sg_w11_o2_spike9.20090723-EC_1X_03.msmat 5000 non-null values\n",
"1x:sg_w11_o2_spike9.20090723-EC_1X_04.msmat 5000 non-null values\n",
"1x:sg_w11_o2_spike9.20090723-EC_1X_05.msmat 5000 non-null values\n",
"P-VALUE 5000 non-null values\n",
"FDR 0 non-null values\n",
"PI0 0 non-null values\n",
"FDR with PI0 0 non-null values\n",
"dtypes: float64(20)"
]
}
],
"prompt_number": 52
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df[\"P-VALUE\"].head()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 53,
"text": [
"2015 5.467817e-14\n",
"1371 9.705029e-14\n",
"743 2.491953e-13\n",
"1663 7.942569e-13\n",
"1917 5.461348e-12\n",
"Name: P-VALUE"
]
}
],
"prompt_number": 53
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"a_fdr = np.zeros(5001)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 55
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"a_fdr[-1] = 1.0"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 56
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"pvalues = np.array(df[\"P-VALUE\"])"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 62
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"for i in range(len(pvalues))[::-1]:\n",
" a_fdr[i] = min(pvalues[i] * 5000 / (i + 1), a_fdr[i + 1])"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 66
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"a_fdr"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 67,
"text": [
"array([ 2.42625720e-10, 2.42625720e-10, 4.15325469e-10, ...,\n",
" 9.98650903e-01, 9.98925630e-01, 1.00000000e+00])"
]
}
],
"prompt_number": 67
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df[\"FDR\"] = a_fdr[:-1]"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 70
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Answer for Question 1\n",
"\n",
"The $q$ value shouls reflect the expected portion of false positives when we consider a feature 'significant'. By taking the minimum of the previous, we will always have it so that it reflects the expected proportion of false positives of _all_ features considered _more_ 'extreme' than the one we are looking at"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"fdr = df[\"FDR\"]"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 73
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"len(fdr[fdr <= 0.01])"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 76,
"text": [
"1348"
]
}
],
"prompt_number": 76
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Answer for Question 2\n",
"\n",
"$ 1348 $"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"a_pi0 = np.zeros(5000)\n",
"\n",
"for i, pval in enumerate(pvalues):\n",
" a_pi0[i] = len(pvalues[pvalues > pval]) / (5000 * (1 - pval))"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 88
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"a_pi0"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 89,
"text": [
"array([ 0.9998 , 0.9996 , 0.9994 , ..., 0.18216429,\n",
" 0.12913 , 0. ])"
]
}
],
"prompt_number": 89
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df[\"PI0\"] = a_pi0"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 116
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"pvalues"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 90,
"text": [
"array([ 5.46781744e-14, 9.70502882e-14, 2.49195281e-13, ...,\n",
" 9.97804180e-01, 9.98451173e-01, 9.98925630e-01])"
]
}
],
"prompt_number": 90
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"plt.scatter(pvalues, a_pi0, alpha=0.5, edgecolor='none');\n",
"plt.xlabel(\"$\\lambda$\");\n",
"plt.ylabel(\"${\\pi}_o (\\lambda)$\");\n",
"\n",
"pi0_line = 0.3\n",
"\n",
"plt.plot([0,1], [pi0_line, pi0_line], 'r');\n",
"plt.annotate('pi0 = {}'.format(pi0_line), [0.6, 0.2], color='r');"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "display_data",
"png": "iVBORw0KGgoAAAANSUhEUgAAAZMAAAEOCAYAAABM5Pr8AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XtUVNe9B/DvIGhAKOCL6Ay5KIM8BawgPqoZtFSLSlM1\nCbGxBl3E60qamGblpre2N5DcKiZrtbWhbbRRkyZKTG1abEV6q3VijEFs9EqjXkOshmGiRhRKFBEY\n9v1jh/cAwxw45wx8P2vNKjNzOPMb05kv+3H2NgghBIiIiBTw0roAIiLyfAwTIiJSjGFCRESKMUyI\niEgxhgkRESnGMCEiIsV0FyarV69GSEgIpkyZ4vT5Xbt2ISEhAfHx8Zg9ezbKyspUrpCIiDrTXZhk\nZWWhuLi42+cnTZqEI0eOoKysDD/+8Y/x6KOPqlgdERE5o7swmTNnDoKDg7t9fubMmQgMDAQApKSk\noLKyUq3SiIioG7oLk77Yvn070tPTtS6DiGjI89a6AHcdPnwYO3bswPvvv+/0eYPBoHJFRESDgzur\nbHlky6SsrAzZ2dnYt29fj11iQgiPvT333HOa18D6ta+D9XvezZNrF8L9pRo9LkwqKiqwdOlSvPnm\nmzCbzVqXQ0RE0GE310MPPYR3330XVVVVCA0NRW5uLhobGwEAa9euxfPPP4/q6mqsW7cOAODj44PS\n0lItSyYiGvJ0FyYFBQU9Pv/qq6/i1VdfVaka7VgsFq1LUIT1a4v1a8eTa1fCIJR0kumYwWBQ1P9H\nRDQUufvd6XFjJkREpD8MEyIiUoxhQkREijFMiIhIMYYJEREpxjAhIiLFGCZERKQYw4SIiBRjmBAR\nkWIMEyIiUoxhQkREijFMiIhIMYYJEREpxjAhIiLFGCZERKQYw4SIiBRjmBARkWIMEyIiUoxhQkRE\nijFMiIhIMYYJEREpxjAhIiLFdBcmq1evRkhICKZMmdLtMU888QQiIiKQkJCAU6dOqVgdERE5o7sw\nycrKQnFxcbfPFxUV4ZNPPkF5eTm2bduGdevWqVjdwLt8GRg+HDAYgDlztK6GiMg1uguTOXPmIDg4\nuNvn9+3bh1WrVgEAUlJSUFNTg6tXr6pV3oCbMAFobJQ/Hz0KTJ+ubT1ERK7w1rqAvrLb7QgNDW29\nbzKZUFlZiZCQkC7H5uTktP5ssVhgsVhUqLB/nTihdQVENJhZrVZYrVbF5/G4MAEAIUSH+waDwelx\n7cOEiIi66vyHdm5urlvn0V03V2+MRiNsNlvr/crKShiNRg0rIiIijwuTjIwM/Pa3vwUAlJSUICgo\nyGkXFxERqUd33VwPPfQQ3n33XVRVVSE0NBS5ublo/HJEeu3atUhPT0dRURHMZjNGjhyJnTt3alwx\nEREZROcBiEHCYDB0GVvxBM6GfzzwbRCRh3L3u9PjurmIiEh/GCZERKQYw4SIiBRjmBARkWIMEyIi\nUoxhQkREijFMiIhIMYYJEREpxjAhIiLFGCZERKQYw4SIiBRjmBARkWIMEyIiUoxhQkREijFMiIhI\nMYYJEREpxjAhIiLFGCZERKQYw4SIiBRjmBARkWIMEyIiUoxhQkREiukuTIqLixEVFYWIiAhs3ry5\ny/NVVVVYuHAhEhMTERcXh9dee039IomIqAODEEJoXUQLh8OByMhIHDx4EEajEcnJySgoKEB0dHTr\nMTk5Obhz5w42bdqEqqoqREZG4urVq/D29u5wLoPBAB29NZcZDF0f88C3QUQeyt3vTl21TEpLS2E2\nmxEWFgYfHx9kZmaisLCwwzHjx49HbW0tAKC2thajR4/uEiRERKQuXX0L2+12hIaGtt43mUw4fvx4\nh2Oys7Mxb948TJgwAV988QXefvtttcskIqJOdBUmBmd9PJ1s3LgRiYmJsFqtuHDhAtLS0nD69GkE\nBAR0OTYnJ6f1Z4vFAovF0o/VEhF5PqvVCqvVqvg8ugoTo9EIm83Wet9ms8FkMnU45tixY9iwYQMA\nIDw8HBMnTsT58+eRlJTU5Xztw4SIiLrq/Id2bm6uW+fR1ZhJUlISysvLcenSJTQ0NGDPnj3IyMjo\ncExUVBQOHjwIALh69SrOnz+PSZMmaVEuERF9SVctE29vb+Tn52PBggVwOBxYs2YNoqOjsXXrVgDA\n2rVr8cMf/hBZWVlISEhAc3MzXnzxRYwaNUrjyomIhjZdTQ3uT5waTETUd4NiajAREXkmhgkRESnG\nMCEiIsUYJkREpBjDxAN8/rnWFRAR9Yxh4gFmz9a6AiKinnFqsM50t6KMB74VIvJAnBpMRESaYZjo\nTEKC1hUQEfUdu7l0prERGD686+Me+FaIyAOxm2uQ8PHRugIior5jmHiIqiqtKyAi6h67uXSIM7qI\nSCvs5iIiIs0wTHSoqEjrCoiI+obdXDrFfU2ISAvs5hoC/P21roCIyDm2THSKg/BEpAW2TAaZ7dud\nP+5wqFsHEZEr2DLRMbZOiEhtbJkQEZFmGCY6Vl/v/PHuWixERFphmOjYiBFaV0BE5BrdhUlxcTGi\noqIQERGBzZs3Oz3GarVi6tSpiIuLg8ViUbdAlb3yivPHk5LUrYOIqCe6GoB3OByIjIzEwYMHYTQa\nkZycjIKCAkRHR7ceU1NTg9mzZ+Mvf/kLTCYTqqqqMGbMmC7nGgwD8C04EE9EahkUA/ClpaUwm80I\nCwuDj48PMjMzUVhY2OGY3bt3Y9myZTCZTADgNEgGm+nTta6AiKhnugoTu92O0NDQ1vsmkwl2u73D\nMeXl5bhx4wZSU1ORlJSEN954Q+0yVXf8uPPHd+9Wtw4iou54a11AewYXpik1Njbi5MmTOHToEOrq\n6jBz5kzMmDEDERERXY7Nyclp/dlisQy68ZXvfAdYsULrKojIk1mtVlitVsXn0VWYGI1G2Gy21vs2\nm621O6tFaGgoxowZA19fX/j6+mLu3Lk4ffp0r2Hi6UaPBq5f7/q4wcCxEyJyX+c/tHNzc906j666\nuZKSklBeXo5Lly6hoaEBe/bsQUZGRodjvvWtb+Ho0aNwOByoq6vD8ePHERMTo1HF6ulpp8XZs9Wr\ng4jIGV21TLy9vZGfn48FCxbA4XBgzZo1iI6OxtatWwEAa9euRVRUFBYuXIj4+Hh4eXkhOzt7SIQJ\nAIwbB3z+edfHjx0D/vAH4NvfVr8mIiJAZ1OD+9NgmhrcXk/DSoPw7RKRygbF1GDqXU//jbnMChFp\nxeVurtu3b6OgoAD/+Mc/0NTUhLq6Onh5eSEgIAApKSm4//774eXFbFKDEN0Hx9y5wJEj6tZDRORS\nN9fBgwdx9uxZLFq0COHh4R2eE0KgrKwMhw4dwvz585GQkDBgxfbFYO3marF5M/CDHzh/bhC/bSIa\nYO5+d/YaJvX19aisrITZbO71ZGfOnEFsbGyfixgIgz1MAI6fEFH/G7Aw6UlDQwOKiopw3333uXuK\nATMUwgRgoBBR/1JtAL62thYFBQV48MEHce+99+LMmTN9flHqP3/6U/fPGQzASy+pVwsRDV0utUxu\n3LiBvXv3orCwELdu3YK/vz9eeOEFTJ06VY0a3TJUWiaA3PekoaH75xcv7jl0iIhaDGg314oVKzBv\n3jwsWbIEISEhqK+vR05ODlauXKmbMZLOhlKYAK5NCx5C/xxE5KYBDZPq6moEBwd3eMzhcODpp5/G\ntGnTsHLlyj6/8EAbamECAMnJwN//3vMxQUFAdbU69RCR5xnQMZPOQQIAw4YNw89//nP4+vr2+UVp\nYJw4AXzySc/H1NTIVoyT/6RERG7rNUzu3LmDqh5WGVy+fHnrzxUVFf1TFbktPBxobu79uJZQ2bJl\n4GsiosGv1zAZMWIESkpKsHv3bty+fdvpMdXV1di2bRs+/fTTfi+Q+q4vy9KvXy+P58rDRKSEy9eZ\nXL58GTt37sTnn3+O+vp6NDY2YtiwYfDz84PJZEJ2djYCAwMHul6XDcUxE2fOngX6MkfCYHCtZUNE\ng5MmFy3qGcOko6oqYOzYvv3OI48A27YBPj4DUhIR6RDDpBOGiXNvvQU89FDff2/UKOc7PRLR4KLZ\nEvRVVVUoLy9XehpSSWamHE9ZuLBvv3fjhuwCMxiAAwcGpjYi8lyKWya//vWv4XA4EBQUhIcffri/\n6lKMLRPXfPopEBbm/u8bDHKnxxkz+q0kItKQZi2TsWPH4rHHHnNpVWHSn3/7N9lScbdxKQQwc2Zb\nq+Vf/+rf+ojIM/Q5TFr2Y28xffp0rF+/HufOneu3okh9ZrMMBiGAP//Z/fMEBbUFi8EADB8O5OX1\nX51EpE997uZatGgR9u/fDwBoamrChQsXEBkZOSDFKcFurv5RXS0H3wdCQADwxhvAt741MOcnor5T\nbTbXjBkz8OSTT+LatWu4cOECxo0bhw0bNvT5hQcaw6R/NTYC990HFBWp83q+vnKtse3bZauJiNSh\nWpgkJyfjqaeeQmBgIGbNmuV03S49YJgMvMWLgS8bqaq46y45WeDxx4HHHlPvdYmGEtXCZMWKFdi9\nezfOnz+PXbt2wWazYefOnX1+4YHGMFFXUhLw4Yfqv+499wBnzgD+/uq/NtFgpNpsrpdeegnHjh1D\nZGQknn/++X4PkuLiYkRFRSEiIgKbN2/u9rgTJ07A29sb77zzTr++Prnn739vG8BvueXnD/zrVlTI\nsZf2g/6db1/9KnDkyMDXQjSU6eoKeIfDgcjISBw8eBBGoxHJyckoKChAdHR0l+PS0tLg5+eHrKws\nLFu2rMu52DLRrzt35FItb72ldSWSnx9w/jxgMgElJcA//wksXSq71YiGGs2uM+lPpaWlMJvNCAsL\ng4+PDzIzM1FYWNjluJdffhnLly/H2L4uNkW6MGIEUFDQsRXT3AzcvCnXENu6FYiIUK+eujogNFS2\nYmbOBL7zHTkBoKVlk5cnL8xsbFSvJiJPo6swsdvtCA0Nbb1vMplgt9u7HFNYWIh169YBkClKns9g\nAEaOBEaPBh59FPj4445B89//DaSmalPbf/6nXKJ/+PCuXWiBgUBWFtDQoE1tRHrhrXUB7bkSDOvX\nr0deXl5rU6yn5lhOTk7rzxaLBRaLpR+qJLUZDMCGDfLWorkZiI+Xg+9aqq0FXntN3gDg/vuBt9/W\nsiLSQl0d8Ic/AFeuABMnAhkZgLeuvl27Z7VaYbVaFZ9HV2MmJSUlyMnJQXFxMQBg06ZN8PLywrPP\nPtt6zKRJk1oDpKqqCn5+fvjNb36DjIyMDufimMnQ1rJEzPe+B/zP/2hXh78/8MADwO3bwOefy8dC\nQ2VdiYmAl676BsgddXXAO+903DLbYpE3TzQolqBvampCZGQkDh06hAkTJmD69OlOB+BbZGVlYcmS\nJVi6dGmX5xgm1JOGBtm6mTEDOH1a62rkBZp798rtlE2mgVt1gPrXgQOyVWq3y/9uUVHy8cREeZGv\nJxoUA/De3t7Iz8/HggULEBMTgwcffBDR0dHYunVrlzXBiJQYPlzO1vrf/+06pbnl9uKLctqxGk6c\nkItuJiTIcaOW8Zgf/1id16e+u3AB+Pd/ly3f06eBDz6QfwwYDEBMjNbVqU9XLZP+xJYJDQQh5NTm\n+no5tfk//gP44gv1Xn/WLODoUfmFRdratAnYuLFtlt+wYfLaqtRUZds6aG1QtEyI9M5gkC2aoCD5\nV2ltrQyYlutUBtqxY3Kcpf2MsrvuklOZH30UcDgGvgaS419Xr8p/7+ZmeQOAadM8O0iUYMuEaADc\nugWsWiW7P5qb25bmr611f++YvvL1BXbskCs/nz0LfP/7wN13y0AKDpYrA1Dfffwx8KMfAdeuyYtc\nHQ4Z8AEBcifTOXPkeMnw4VpX6p5BMQDfnxgm5AlqaoB584CPPtLuosiJE+W2zJ03NgsNlTtxskut\nTUMDkJYGHD8uuzsBGRr+/nLSRGoqMGGCvC4pLU3bWt3l7nenh8yEJhqcgoKAkyc7PuZwAOfOyZlm\nt24NfA0XLzp/3GZrm7q8ciWwbh0wZYps2Zw6BTQ1yckCdXWyFRYQIK+tGMzTnT/6qGOQADJgWoLY\nx0f+71DccZQtEyIPUFUFfO1rcmzGU6SkyNvvfie7hCZOlN1CQUFyKm1AgAyyZ5+Vkwpu35a/d9dd\nwNix8nc/+kiG1YoVcsBba7m5QLtroTvw8wPuvVfW/cADQDdXNOgeu7k6YZjQYHfsmGw9TJwIzJ8v\n1zYb7P74R9n9duyY7EZSssnrF1/IcPPzk63AYcN6/52sLLk7qLOJDsOHyzXlVq+WkzP8/NyvTUvs\n5iIaYmbNavu58/Tkl16SC1Tevi2/1P71L9kt5e0tvwg99e+s7i4EHDsW+MtfgKlT5f3r1+V79POT\n3VLnzsnN3O65Rz5/8yawaFHb1gmzZwMHD/b82s3Ncmxp+PC2VlR7TU2yi6+2FvjrX4fedtRsmRAN\nUUeOyL/0/f2Bb35T/nVuMMgvxXnzgPfe07rCvouOlt1in37q/PlvfEOu8fbii113Cf397+XWA+3d\nuQP85jcyjMeMAX7+c9kadDaW5eUlFytduVJ2dX33u/3zntTGbq5OGCZEyjQ0AP/1X/Kv7P/7P3mh\nJgB85SvyL/76enlMXV3bdRaeLCBAjt3cc48c1ykrk+NU7Vt9BoNs3TU3y26xYcM6tlK8vOQkhVdf\nlbuPeiKGSScMEyJtXLsmV829eFFe6zJrVtv+NZ2NHCn3kMnLk9e+nD8P/OMfwO7dcqq02Qzs26f+\ne7jrLnmrqXH+vJeXnAo8aRJQWtr2+LBh8rF33wXGj1en1v7GMOmEYUKkP83NfZ863NAgV+Vtv+KA\n1gwG4MEHZRgWFMjWWcvjs2bJbQgmTNC2RncxTDrx+DDhlWJEHqupUXjMfiadcW2uwaa7pWx54423\nLrf8lwViogVCxglY7hV4/TWB2n+1Pf/6awJ+vgIGyNtwH4EVDwkEBbY95uptxHCB13bK8zqaBCIn\nC3wlQMB/pMDoUQKLF3lukCjBlgkRDQnNzXJdLYdDXp/i7S1naVksciuC7nh5yUU8m5rk7p45OXK2\nVousLDmtuGXRx29/G/j1rwf63QwcdnN1wjAhIle99Za8EPKrX5UXMhYXy1UHRo8GnnkGWLu2bSZX\nZxUVwJNPyiVmQkLk9OOlS9uuefE0DJNOGCZEpJbf/U7OQmuZXDBp0tC7zoRjJkRECgUGdpylFhio\nXS1aGYLDRERE/ctikfvGXLokpwR76vLzSrCbi4iIWrGbi4h6l50t10YBgA8/lGt/RETIEeT+4Mo5\nS0vl6PTUqXJ61J49/fPapCm2TIiGqunTgfx8+b/p6cATTwALFw78OW/fBkaMkIMMV64AcXFyQ3VX\n1oCnAceWCRFJly4BUVHAww8DMTHA/fe3rUZoscitHS9flisYTp8uH//ud+USwkq4ek5f37bR6tu3\n5Wj1IAgSux344IPud64c7BgmRIPRxx8Djz0GnD0rl/n91a/k4y3L9Njt8kq8FkajfKwzq7WtS6r9\n7Wtf63qsq+cEZFdXbKy8/fSnbr1FPblwAdi+Xe6p8vrrPV8EOVjpMkyKi4sRFRWFiIgIbN68ucvz\nu3btQkJCAuLj4zF79myUlZVpUCWRjoWGyuV4AdlCOXq04/Ourv1mscir8TrfOp+vr6ZPB86cka2k\nJ5/0+E3TT5/uuAz/UAwT3U0NdjgcePzxx3Hw4EEYjUYkJycjIyMD0e02VJ40aRKOHDmCwMBAFBcX\n49FHH0VJSYmGVRPpTPuwEKJreEyYAFRWtt2vrJQtic4OHwa+//2uj/v5Ae+/3/Exo9G1c7YXFQWE\nhwOffAJMm9bzsToWENDz/aFAdy2T0tJSmM1mhIWFwcfHB5mZmSgsLOxwzMyZMxH45VVBKSkpqGz/\nf2Aikmt8tPyBtXs3MGdOx+fHj5fdX8ePy7B54w3ne+KmpjpvmXQOkr6c89IludAVILdELC+Xs788\n2Ny5wOTJgI+P3FxrwQKtK1Kf7lomdrsdoaGhrfdNJhOOHz/e7fHbt29Henq6GqUReY7ISOCXvwRW\nr5bjEuvWdT3mV78CHnlEDoKnpyufydXTOf/0J7nhem6u7CLLy5PfvD4+wLZtMoQ82IgRcp7DtWvy\nrfj7a12R+nQXJoY+7ONx+PBh7NixA+87+ysJQE5OTuvPFosFFotFYXVEHsLbW7YMOjt8uO3nadPk\nglL9qbtzLlkib4Acw3n44f59XY3dvCn3ir9wARg+XG7kNXmy1lW5xmq1wmq1Kj6P7sLEaDTCZrO1\n3rfZbDC1nyHypbKyMmRnZ6O4uBjBwcFOz9U+TIiGFG6upqqSEuBvf2vbL/6nPwVeeUXbmlzV+Q/t\n3Nxct86juzGTpKQklJeX49KlS2hoaMCePXuQkZHR4ZiKigosXboUb775Jsxms0aVEulUWBjAGY6q\narnEpkVlZdtWvkOF7lom3t7eyM/Px4IFC+BwOLBmzRpER0dj69atAIC1a9fi+eefR3V1NdZ92Q/s\n4+OD0tJSLcsmoiEsObltL/hhw+R8gqG22yKXUyEi6gd//rO8xtPXl5tjDSoMEyJS2507smXiya0S\nd787PfgtExHpy4gRWlegHd0NwBMRkedhmBARkWLs5iIi6gf//CdQXCwXfExNlQsPDCUMEyIiherr\ngbfeAhoa5P133pFraXZzPfWgxG4uIiKFbt1qCxIAcDiA2lrt6tECw4SISKHgYLlocougIODuu7Wr\nRwu8zoSIqB/U1wMnTshWybRpnrunCS9a7IRhQkTUd+5+d7Kbi4iIFGOYEBGRYgwTIiJSjGFCRESK\nMUyIiEgxhgkRESnGMCEiIsUYJkREpBjDhIiIFGOYEBGRYgwTIiJSjGFCRESKMUyIiEgx3YVJcXEx\noqKiEBERgc2bNzs95oknnkBERAQSEhJw6tQplSskIqLOdBUmDocDjz/+OIqLi3H27FkUFBTg3Llz\nHY4pKirCJ598gvLycmzbtg3r1q3TqFoioo7sduDCBaCxUetK1KerPeBLS0thNpsRFhYGAMjMzERh\nYSGio6Nbj9m3bx9WrVoFAEhJSUFNTQ2uXr2KkJAQLUomIgIA/O1vwJEj8ufx44HVqwEfH21rUpOu\nWiZ2ux2hoaGt900mE+x2e6/HVFZWqlYjEVFnDgfw3ntt9y9fBs6f164eLeiqZWIwGFw6rvMuYN39\nXk5OTuvPFosFFovF3dKIiLplMADDhgFNTW2PeUqrxGq1wmq1Kj6PrsLEaDTCZrO13rfZbDCZTD0e\nU1lZCaPR6PR87cOEiGigeHkBS5YA+/bJVkpsLDB5stZVuabzH9q5ublunUdXYZKUlITy8nJcunQJ\nEyZMwJ49e1BQUNDhmIyMDOTn5yMzMxMlJSUICgrieAkRaS4hAYiKAhoagIAAratRn67CxNvbG/n5\n+ViwYAEcDgfWrFmD6OhobN26FQCwdu1apKeno6ioCGazGSNHjsTOnTs1rpqISBoxQt6GIoPoPAAx\nSBgMhi5jK0RE1DN3vzt1NZuLiIg8E8OEiIgUY5gQEZFiDBMiIlKMYUJERIoxTIiISDGGCRERKcYw\nISIixRgmRESkGMOEiIgUY5gQEZFiDBMiIlKMYUJERIoxTIiISDGGCRERKcYwISIixRgmRESkGMOE\niIgUY5gQEZFiDBMiIlKMYUJERIoxTIiISDGGCRERKaarMLlx4wbS0tIwefJkfOMb30BNTU2XY2w2\nG1JTUxEbG4u4uDj84he/0KDSgWe1WrUuQRHWry3Wrx1Prl0JXYVJXl4e0tLS8PHHH2P+/PnIy8vr\ncoyPjw9+9rOf4cyZMygpKcEvf/lLnDt3ToNqB5an/x+S9WuL9WvHk2tXQldhsm/fPqxatQoAsGrV\nKvzxj3/scszdd9+NxMREAIC/vz+io6Px2WefqVonERF1pKswuXr1KkJCQgAAISEhuHr1ao/HX7p0\nCadOnUJKSooa5RERUTcMQgih5gumpaXhypUrXR7/yU9+glWrVqG6urr1sVGjRuHGjRtOz3Pz5k1Y\nLBb86Ec/wn333dfleYPB0H9FExENIe7EgvcA1NGjv/71r90+FxISgitXruDuu+/G5cuXMW7cOKfH\nNTY2YtmyZXj44YedBgng3j8GERG5R1fdXBkZGXj99dcBAK+//rrToBBCYM2aNYiJicH69evVLpGI\niJxQvZurJzdu3MADDzyAiooKhIWF4e2330ZQUBA+++wzZGdnY//+/Th69Cjmzp2L+Pj41q6sTZs2\nYeHChRpXT0Q0hIlB4vr16+LrX/+6iIiIEGlpaaK6urrLMRUVFcJisYiYmBgRGxsrtmzZokGlbQ4c\nOCAiIyOF2WwWeXl5To/53ve+J8xms4iPjxcnT55UucKe9Vb/m2++KeLj48WUKVPErFmzxOnTpzWo\nsnuu/PsLIURpaakYNmyY+P3vf69idb1zpf7Dhw+LxMREERsbK+699151C+xFb/Vfu3ZNLFiwQCQk\nJIjY2Fixc+dO9YvsRlZWlhg3bpyIi4vr9hg9f3Z7q9+dz+6gCZNnnnlGbN68WQghRF5ennj22We7\nHHP58mVx6tQpIYQQX3zxhZg8ebI4e/asqnW2aGpqEuHh4eLixYuioaFBJCQkdKll//794pvf/KYQ\nQoiSkhKRkpKiRalOuVL/sWPHRE1NjRBCfnF4Wv0tx6WmpopFixaJvXv3alCpc67UX11dLWJiYoTN\nZhNCyC9nvXCl/ueee0784Ac/EELI2keNGiUaGxu1KLeLI0eOiJMnT3b7Zaznz64QvdfvzmdXV2Mm\nSnjaNSqlpaUwm80ICwuDj48PMjMzUVhY2OGY9u8pJSUFNTU1vU6XVosr9c+cOROBgYEAZP2VlZVa\nlOqUK/UDwMsvv4zly5dj7NixGlTZPVfq3717N5YtWwaTyQQAGDNmjBalOuVK/ePHj0dtbS0AoLa2\nFqNHj4a3t+pzhpyaM2cOgoODu31ez59doPf63fnsDpow8bRrVOx2O0JDQ1vvm0wm2O32Xo/Ryxey\nK/W3t337dqSnp6tRmktc/fcvLCzEunXrAOhrurkr9ZeXl+PGjRtITU1FUlIS3njjDbXL7JYr9Wdn\nZ+PMmTMpWP03AAADp0lEQVSYMGECEhISsGXLFrXLdJueP7t95epnVx8x76KerlFpz2Aw9PjBv3nz\nJpYvX44tW7bA39+/3+t0hatfTKLT/Ai9fKH1pY7Dhw9jx44deP/99wewor5xpf7169cjLy8PBoMB\nQnYJq1CZa1ypv7GxESdPnsShQ4dQV1eHmTNnYsaMGYiIiFChwp65Uv/GjRuRmJgIq9WKCxcuIC0t\nDadPn0ZAQIAKFSqn189uX/Tls+tRYaLWNSpqMBqNsNlsrfdtNltrd0R3x1RWVsJoNKpWY09cqR8A\nysrKkJ2djeLi4h6b1Wpzpf4PP/wQmZmZAICqqiocOHAAPj4+yMjIULVWZ1ypPzQ0FGPGjIGvry98\nfX0xd+5cnD59Whdh4kr9x44dw4YNGwAA4eHhmDhxIs6fP4+kpCRVa3WHnj+7rurzZ7ffRnQ09swz\nz7TOCNm0aZPTAfjm5maxcuVKsX79erXL66KxsVFMmjRJXLx4Udy5c6fXAfgPPvhAV4N4rtT/6aef\nivDwcPHBBx9oVGX3XKm/vUceeURXs7lcqf/cuXNi/vz5oqmpSdy6dUvExcWJM2fOaFRxR67U/9RT\nT4mcnBwhhBBXrlwRRqNRXL9+XYtynbp48aJLA/B6++y26Kl+dz67gyZMrl+/LubPn99larDdbhfp\n6elCCCHee+89YTAYREJCgkhMTBSJiYniwIEDmtVcVFQkJk+eLMLDw8XGjRuFEEK88sor4pVXXmk9\n5rHHHhPh4eEiPj5efPjhh1qV6lRv9a9Zs0aMGjWq9d86OTlZy3K7cOXfv4XewkQI1+p/6aWXRExM\njIiLi9N8KnxnvdV/7do1sXjxYhEfHy/i4uLErl27tCy3g8zMTDF+/Hjh4+MjTCaT2L59u0d9dnur\n353Prq4uWiQiIs80aGZzERGRdhgmRESkGMOEiIgUY5gQEZFiDBMiIlKMYUJERIoxTIhUUl1djRUr\nVnS7FTWRJ2OYEKkkODgY8+bNw969e7UuhajfMUyIVLRkyRKnS90TeTqGCZGKQkJCcOvWrdZ9OogG\nC4YJkYrq6+vh7++P/fv3a10KUb9imBCpxOFwICcnBy+88ILTnUCJPBnDhEglTz/9NFauXImpU6ei\noqICDQ0NWpdE1G8YJkQq2Lt3L6ZNm4bY2FgAwOLFi1FUVKRxVUT9h0vQExGRYmyZEBGRYgwTIiJS\njGFCRESKMUyIiEgxhgkRESnGMCEiIsUYJkREpBjDhIiIFPt/nVtbxPwtFX4AAAAASUVORK5CYII=\n"
}
],
"prompt_number": 113
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Answer to Question 3\n",
"\n",
"The $\\pi_0$ value estimates the proportion of null $p$ values"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"a_fdr2 = np.zeros(5001)\n",
"a_fdr2[-1] = 1.0"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 114
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"for i in range(len(pvalues))[::-1]:\n",
" a_fdr2[i] = min(a_pi0[i] * pvalues[i] * 5000 / (i + 1), a_fdr[i + 1])"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 117
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df[\"FDR with PI0\"] = a_fdr2[:-1]"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 120
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"fdr2 = df[\"FDR with PI0\"]\n",
"len(fdr2[fdr2 <= 0.01])"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 121,
"text": [
"1456"
]
}
],
"prompt_number": 121
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Answer to Question 4\n",
"\n",
"We now have 1456 significant features, this makes sense as estimating $\\pi_0(1)$ by 1 is usually more conservative than necessary."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment