Skip to content

Instantly share code, notes, and snippets.

@wiso
Last active August 29, 2015 14:23
Show Gist options
  • Save wiso/a1f8f1bdd4bd7def48fd to your computer and use it in GitHub Desktop.
Save wiso/a1f8f1bdd4bd7def48fd to your computer and use it in GitHub Desktop.
photon jet sample statistics
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import numpy as np\n",
"import ROOT\n",
"import rootnotes"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"ENTRIES_MC_1000_2000_TOTAL = 102922\n",
"ENTRIES_MC_2000_4000_TOTAL = 1376"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Use as a minimal bin size 250 GeV, similarly to previous analysis"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"binsize = 250 # GeV\n",
"lefts = np.arange(2000, 7000, 250)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" M range events (1000-2000) events (2000-4000) fraction (1000-2000) fraction (2000-4000) \n",
"====================================================================================================\n",
"[ 2000 - 2250] 27206.000 4.000 26.434% 0.291%\n",
"[ 2250 - 2500] 21828.000 2.000 21.208% 0.145%\n",
"[ 2500 - 2750] 14010.000 8.000 13.612% 0.581%\n",
"[ 2750 - 3000] 8427.000 14.000 8.188% 1.017%\n",
"[ 3000 - 3250] 4923.000 37.000 4.783% 2.689%\n",
"[ 3250 - 3500] 3140.000 44.000 3.051% 3.198%\n",
"[ 3500 - 3750] 1893.000 66.000 1.839% 4.797%\n",
"[ 3750 - 4000] 1074.000 130.000 1.044% 9.448%\n",
"[ 4000 - 4250] 564.000 267.000 0.548% 19.404%\n",
"[ 4250 - 4500] 305.000 242.000 0.296% 17.587%\n",
"[ 4500 - 4750] 202.000 185.000 0.196% 13.445%\n",
"[ 4750 - 5000] 104.000 118.000 0.101% 8.576%\n",
"[ 5000 - 5250] 55.000 89.000 0.053% 6.468%\n",
"[ 5250 - 5500] 37.000 61.000 0.036% 4.433%\n",
"[ 5500 - 5750] 19.000 32.000 0.018% 2.326%\n",
"[ 5750 - 6000] 12.000 17.000 0.012% 1.235%\n",
"[ 6000 - 6250] 6.000 14.000 0.006% 1.017%\n",
"[ 6250 - 6500] 0.000 11.000 0.000% 0.799%\n",
"[ 6500 - 6750] 0.000 6.000 0.000% 0.436%\n",
"[ 6750 - 7000] 1.000 4.000 0.001% 0.291%\n"
]
}
],
"source": [
"print \"{:^18s} {:^22s} {:^22s} {:^22s} {:^22s}\".format('M range', 'events (1000-2000)', 'events (2000-4000)', 'fraction (1000-2000)', 'fraction (2000-4000)')\n",
"print \"=\" * 100\n",
"events_1000_2000 = [27206, 21828, 14010, 8427, 4923, 3140, 1893, 1074, 564, 305, 202, 104, 55, 37, 19, 12, 6, 0, 0, 1]\n",
"events_2000_4000 = [4, 2, 8, 14, 37, 44, 66, 130, 267, 242, 185, 118, 89, 61, 32, 17, 14, 11, 6, 4]\n",
"for l, e1, e2 in zip(lefts, events_1000_2000, events_2000_4000):\n",
" fraction_events_1000_2000 = float(e1) / ENTRIES_MC_1000_2000_TOTAL\n",
" fraction_events_2000_4000 = float(e2) / ENTRIES_MC_2000_4000_TOTAL\n",
" print \"[{:5.0f} - {:5.0f}] {:20.3f} {:20.3f} {:20.3%} {:20.3%}\".format(l, l + binsize, e1, e2, fraction_events_1000_2000, fraction_events_2000_4000)\n",
"events_1000_2000 = np.array(events_1000_2000, dtype=float)\n",
"events_2000_4000 = np.array(events_2000_4000, dtype=float)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Minimum events needed to have small statistical error"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" statistical error minimum events\n",
"================================================================================\n",
" 30% 11.11\n",
" 10% 100.00\n",
" 5% 400.00\n"
]
}
],
"source": [
"statistical_errors = np.array([30., 10., 5.]) / 100.\n",
"minimum_events = 1. / statistical_errors ** 2\n",
"print \" statistical error minimum events\"\n",
"print \"=\" * 80\n",
"for s, m in zip(statistical_errors, minimum_events):\n",
" print \"{:10.0%} {:5.2f}\".format(s, m)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Ratio wrt present simulation"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"How many events do we need in a bin to get a target statistical error wrt the present simulation? How much do we need to increase the statistics?"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/lib/python2.7/site-packages/IPython/kernel/__main__.py:1: RuntimeWarning: divide by zero encountered in divide\n",
" if __name__ == '__main__':\n"
]
}
],
"source": [
"ratios_1000_2000 = minimum_events / events_1000_2000[:, np.newaxis]\n",
"ratios_2000_4000 = minimum_events / events_2000_4000[:, np.newaxis]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Sample 1000-2000"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 30.00% 10.00% 5.00%\n",
"================================================================================\n",
"[ 2000 - 2250] 0.00 0.00 0.01\n",
"[ 2250 - 2500] 0.00 0.00 0.02\n",
"[ 2500 - 2750] 0.00 0.01 0.03\n",
"[ 2750 - 3000] 0.00 0.01 0.05\n",
"[ 3000 - 3250] 0.00 0.02 0.08\n",
"[ 3250 - 3500] 0.00 0.03 0.13\n",
"[ 3500 - 3750] 0.01 0.05 0.21\n",
"[ 3750 - 4000] 0.01 0.09 0.37\n",
"[ 4000 - 4250] 0.02 0.18 0.71\n",
"[ 4250 - 4500] 0.04 0.33 1.31\n",
"[ 4500 - 4750] 0.06 0.50 1.98\n",
"[ 4750 - 5000] 0.11 0.96 3.85\n",
"[ 5000 - 5250] 0.20 1.82 7.27\n",
"[ 5250 - 5500] 0.30 2.70 10.81\n",
"[ 5500 - 5750] 0.58 5.26 21.05\n",
"[ 5750 - 6000] 0.93 8.33 33.33\n",
"[ 6000 - 6250] 1.85 16.67 66.67\n",
"[ 6250 - 6500] inf inf inf\n",
"[ 6500 - 6750] inf inf inf\n",
"[ 6750 - 7000] 11.11 100.00 400.00\n"
]
}
],
"source": [
"print \" \" + \" \".join([\"{:>10.2%}\".format(_) for _ in statistical_errors])\n",
"print \"=\" * 80\n",
"for l, row in zip(lefts, ratios_1000_2000):\n",
" print \"[{:5.0f} - {:5.0f}]\".format(l, l + binsize) + \" \".join([\"{:>10.2f}\".format(_) for _ in row])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Sample 2000-4000"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 30.00% 10.00% 5.00%\n",
"================================================================================\n",
"[ 2000 - 2250] 2.78 25.00 100.00\n",
"[ 2250 - 2500] 5.56 50.00 200.00\n",
"[ 2500 - 2750] 1.39 12.50 50.00\n",
"[ 2750 - 3000] 0.79 7.14 28.57\n",
"[ 3000 - 3250] 0.30 2.70 10.81\n",
"[ 3250 - 3500] 0.25 2.27 9.09\n",
"[ 3500 - 3750] 0.17 1.52 6.06\n",
"[ 3750 - 4000] 0.09 0.77 3.08\n",
"[ 4000 - 4250] 0.04 0.37 1.50\n",
"[ 4250 - 4500] 0.05 0.41 1.65\n",
"[ 4500 - 4750] 0.06 0.54 2.16\n",
"[ 4750 - 5000] 0.09 0.85 3.39\n",
"[ 5000 - 5250] 0.12 1.12 4.49\n",
"[ 5250 - 5500] 0.18 1.64 6.56\n",
"[ 5500 - 5750] 0.35 3.12 12.50\n",
"[ 5750 - 6000] 0.65 5.88 23.53\n",
"[ 6000 - 6250] 0.79 7.14 28.57\n",
"[ 6250 - 6500] 1.01 9.09 36.36\n",
"[ 6500 - 6750] 1.85 16.67 66.67\n",
"[ 6750 - 7000] 2.78 25.00 100.00\n"
]
}
],
"source": [
"print \" \" + \" \".join([\"{:>10.2%}\".format(_) for _ in statistical_errors])\n",
"print \"=\" * 80\n",
"for l, row in zip(lefts, ratios_2000_4000):\n",
" print \"[{:5.0f} - {:5.0f}]\".format(l, l + binsize) + \" \".join([\"{:>10.2f}\".format(_) for _ in row])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Number of events we want (after selection)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Sample 1000-2000"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 30.00% 10.00% 5.00%\n",
"================================================================================\n",
"[ 2000 - 2250] 42 378 1513\n",
"[ 2250 - 2500] 52 472 1886\n",
"[ 2500 - 2750] 82 735 2939\n",
"[ 2750 - 3000] 136 1221 4885\n",
"[ 3000 - 3250] 232 2091 8363\n",
"[ 3250 - 3500] 364 3278 13111\n",
"[ 3500 - 3750] 604 5437 21748\n",
"[ 3750 - 4000] 1065 9583 38332\n",
"[ 4000 - 4250] 2028 18249 72994\n",
"[ 4250 - 4500] 3749 33745 134980\n",
"[ 4500 - 4750] 5661 50951 203806\n",
"[ 4750 - 5000] 10996 98963 395854\n",
"[ 5000 - 5250] 20792 187131 748524\n",
"[ 5250 - 5500] 30908 278168 1112670\n",
"[ 5500 - 5750] 60188 541695 2166779\n",
"[ 5750 - 6000] 95298 857683 3430733\n",
"[ 6000 - 6250] 190596 1715367 6861467\n",
"[ 6250 - 6500] inf inf inf\n",
"[ 6500 - 6750] inf inf inf\n",
"[ 6750 - 7000] 1143578 10292200 41168800\n"
]
}
],
"source": [
"events_desidered = ratios_1000_2000 * ENTRIES_MC_1000_2000_TOTAL\n",
"print \" \" + \" \".join([\"{:>10.2%}\".format(_) for _ in statistical_errors])\n",
"print \"=\" * 80\n",
"for l, row in zip(lefts, events_desidered):\n",
" print \"[{:5.0f} - {:5.0f}]\".format(l, l + binsize) + \" \".join([\"{:>10.0f}\".format(_) for _ in row])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Sample 2000-4000"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 30.00% 10.00% 5.00%\n",
"================================================================================\n",
"[ 2000 - 2250] 3822 34400 137600\n",
"[ 2250 - 2500] 7644 68800 275200\n",
"[ 2500 - 2750] 1911 17200 68800\n",
"[ 2750 - 3000] 1092 9829 39314\n",
"[ 3000 - 3250] 413 3719 14876\n",
"[ 3250 - 3500] 347 3127 12509\n",
"[ 3500 - 3750] 232 2085 8339\n",
"[ 3750 - 4000] 118 1058 4234\n",
"[ 4000 - 4250] 57 515 2061\n",
"[ 4250 - 4500] 63 569 2274\n",
"[ 4500 - 4750] 83 744 2975\n",
"[ 4750 - 5000] 130 1166 4664\n",
"[ 5000 - 5250] 172 1546 6184\n",
"[ 5250 - 5500] 251 2256 9023\n",
"[ 5500 - 5750] 478 4300 17200\n",
"[ 5750 - 6000] 899 8094 32376\n",
"[ 6000 - 6250] 1092 9829 39314\n",
"[ 6250 - 6500] 1390 12509 50036\n",
"[ 6500 - 6750] 2548 22933 91733\n",
"[ 6750 - 7000] 3822 34400 137600\n"
]
}
],
"source": [
"events_desidered = ratios_2000_4000 * ENTRIES_MC_2000_4000_TOTAL\n",
"print \" \" + \" \".join([\"{:>10.2%}\".format(_) for _ in statistical_errors])\n",
"print \"=\" * 80\n",
"for l, row in zip(lefts, events_desidered):\n",
" print \"[{:5.0f} - {:5.0f}]\".format(l, l + binsize) + \" \".join([\"{:>10.0f}\".format(_) for _ in row])"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.5"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment