Last active
August 29, 2015 14:06
-
-
Save quaquel/95f798a417f170f5d56b to your computer and use it in GitHub Desktop.
f prim
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"name": "", | |
"signature": "sha256:3be6924ae61470a8a34595f3db6c93f05b64ad6be9c7479a60f529de67adc5fb" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"from analysis import prim\n", | |
"from expWorkbench import load_results, ema_logging\n", | |
"\n", | |
"class fPrim(prim.Prim):\n", | |
" '''\n", | |
" This is a small extension to the normal prim. This \n", | |
" extension adds functionality for automatically\n", | |
" selecting a specific box on the peeling_trajectory\n", | |
" found by normal prim. In the literature, this is\n", | |
" known as fPrim. \n", | |
" \n", | |
" The automatic selection is based on making a \n", | |
" tradeoff between coverage and density. More \n", | |
" specifically, the user specifies an f_value (between 0 and 1)\n", | |
" that determines the weight of coverage, the weight\n", | |
" of density then becomes 1-f_value. \n", | |
" \n", | |
" The box on the peeling trajectory that is automatically chosen\n", | |
" is the box that has the maximum score on the objective function.\n", | |
" \n", | |
" Outside of the automatic selection of a box, this extension has\n", | |
" all the functionality of normal prim. \n", | |
" \n", | |
" \n", | |
" '''\n", | |
" \n", | |
" \n", | |
" def __init__(self, \n", | |
" results,\n", | |
" classify, \n", | |
" f_value,\n", | |
" obj_function=prim.DEFAULT, \n", | |
" peel_alpha=0.05, \n", | |
" paste_alpha=0.05,\n", | |
" mass_min=0.05, \n", | |
" threshold=None, \n", | |
" threshold_type=prim.ABOVE,\n", | |
" incl_unc=[]):\n", | |
" self.f_value = f_value\n", | |
" \n", | |
" super(fPrim, self).__init__(results, \n", | |
" classify,\n", | |
" obj_function=obj_function, \n", | |
" peel_alpha=peel_alpha, \n", | |
" paste_alpha=paste_alpha,\n", | |
" mass_min=mass_min, \n", | |
" threshold=threshold, \n", | |
" threshold_type=threshold_type,\n", | |
" incl_unc=incl_unc)\n", | |
" \n", | |
" \n", | |
" \n", | |
" def find_box(self):\n", | |
" box = super(fPrim, self).find_box()\n", | |
" \n", | |
" # here the f prim part should go\n", | |
" obj = self.f_value *box.peeling_trajectory['coverage'] + (1-self.f_value)*box.peeling_trajectory['density']\n", | |
" i = np.where(obj==np.max(obj))[0][0]\n", | |
" \n", | |
" box.select(i)\n", | |
" \n", | |
" return box\n", | |
" " | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 15 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"ema_logging.log_to_stderr(ema_logging.INFO)\n", | |
"\n", | |
"def classify(data):\n", | |
" #get the output for deceased population\n", | |
" result = data['deceased population region 1']\n", | |
" \n", | |
" #make an empty array of length equal to number of cases \n", | |
" classes = np.zeros(result.shape[0])\n", | |
" \n", | |
" #if deceased population is higher then 1.000.000 people, classify as 1 \n", | |
" classes[result[:, -1] > 1000000] = 1\n", | |
" \n", | |
" return classes\n", | |
"\n", | |
"#load data\n", | |
"fn = r'/Domain/tudelft.net/Users/jhkwakkel/EMAworkbench/src/examples/data/1000 flu cases.tar.gz'\n", | |
"results = load_results(fn)\n", | |
"experiments, results = results\n", | |
"\n", | |
"#extract results for 1 policy\n", | |
"logical = experiments['policy'] == 'no policy'\n", | |
"new_experiments = experiments[ logical ]\n", | |
"new_results = {}\n", | |
"for key, value in results.items():\n", | |
" new_results[key] = value[logical]\n", | |
"\n", | |
"results = (new_experiments, new_results)\n", | |
"\n", | |
"#perform prim on modified results tuple\n", | |
"\n", | |
"prim = fPrim(results, classify, f_value=0.5, threshold=0.8, threshold_type=1)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stderr", | |
"text": [ | |
"[INFO/MainProcess] results loaded succesfully from /Domain/tudelft.net/Users/jhkwakkel/EMAworkbench/src/examples/data/1000 flu cases.tar.gz\n" | |
] | |
} | |
], | |
"prompt_number": 16 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"box1 = prim.find_box()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stderr", | |
"text": [ | |
"[INFO/MainProcess] 1000 points remaining, containing 369 cases of interest\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stderr", | |
"text": [ | |
"[INFO/MainProcess] mean: 0.994652406417, mass: 0.187, coverage: 0.50406504065, density: 0.994652406417 restricted_dimensions: 9.0\n" | |
] | |
} | |
], | |
"prompt_number": 17 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"box1.inspect()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"box mean mass coverage density res dim\n", | |
"18 0.79 0.39 0.83 0.79 4\n", | |
"\n", | |
"uncertainty box 18 \n", | |
" min max qp values\n", | |
"infection ratio region 1 0.05 - 0.15 2.33e-20\n", | |
"normal contact rate region 1 34.22 - 99.93 4.16e-11\n", | |
"recovery time region 1 0.22 - 0.75 2.54e-06\n", | |
"fatality ratio region 1 0.00 - 0.10 3.55e-02\n", | |
"\n", | |
"\n" | |
] | |
} | |
], | |
"prompt_number": 18 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"from operator import itemgetter\n", | |
"import pandas as pd\n", | |
"\n", | |
"def inspect(box, i=None):\n", | |
" '''\n", | |
"\n", | |
" Write the stats and box limits of the user specified box to standard \n", | |
" out. if i is not provided, the last box will be printed\n", | |
"\n", | |
" '''\n", | |
"\n", | |
" if i == None:\n", | |
" i = box._cur_box\n", | |
"\n", | |
" stats = box.peeling_trajectory.iloc[i].to_dict()\n", | |
" stats['restricted_dim'] = stats['res dim']\n", | |
"\n", | |
" qp_values = box._calculate_quasi_p(i)\n", | |
" uncs = [(key, value) for key, value in qp_values.iteritems()]\n", | |
" uncs.sort(key=itemgetter(1))\n", | |
" uncs = [uncs[0] for uncs in uncs]\n", | |
" \n", | |
" \n", | |
" #make the descriptive statistics for the box\n", | |
" print box.peeling_trajectory.iloc[i]\n", | |
" print \n", | |
" \n", | |
" # make the box definition\n", | |
" columns = pd.MultiIndex.from_product([['box {}'.format(i)],['min', 'max', 'qp values']])\n", | |
" box_lim = pd.DataFrame(np.zeros((len(uncs), 3)), index=uncs, columns=columns)\n", | |
" \n", | |
" for unc in uncs:\n", | |
" values = box.box_lim[unc][:]\n", | |
" box_lim.loc[unc] = [values[0], values[1], qp_values[unc]]\n", | |
" \n", | |
" print box_lim\n", | |
"\n", | |
"inspect(box1)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"coverage 0.975000\n", | |
"density 0.722222\n", | |
"mass 0.620690\n", | |
"mean 0.722222\n", | |
"res dim 2.000000\n", | |
"Name: 8, dtype: float64\n", | |
"\n", | |
" box 8 \n", | |
" min max qp values\n", | |
"shareemp 0.573356 0.914554 0.000214\n", | |
"sharemanu 0.071305 0.459816 0.330173\n" | |
] | |
} | |
], | |
"prompt_number": 70 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Question" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"import pandas as pd\n", | |
"import numpy.lib.recfunctions as recfunctions\n", | |
"\n", | |
"def load_data(folder,countrycode):\n", | |
" fn_exp = r'{}experiments{}.csv'.format(folder,countrycode)\n", | |
"\n", | |
" experiments = pd.io.parsers.read_table(fn_exp, sep=',')\n", | |
" experiments = experiments.to_records()\n", | |
" experiments = recfunctions.drop_fields(experiments, 'index')\n", | |
"\n", | |
" fn_out = r'{}outcomes{}.csv'.format(folder,countrycode)\n", | |
" outcomes = pd.io.parsers.read_table(fn_out, sep=',')\n", | |
" outcome_names = list(outcomes.columns.values)\n", | |
"\n", | |
" outcomes = {name:np.asarray(outcomes[name]) for name in outcome_names}\n", | |
" return experiments, outcomes" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 62 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"fn = r'./data/'\n", | |
"cc = 'THA'\n", | |
"\n", | |
"results = load_data(fn,cc)\n", | |
"experiments, outcomes = results" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 63 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"#extract results for 1 policy\n", | |
"logical = experiments['ssp'] == 'ssp5'\n", | |
"new_experiments = experiments[ logical ]\n", | |
"new_results = {}\n", | |
"for key, value in outcomes.items():\n", | |
" new_results[key] = value[logical]\n", | |
"\n", | |
"results = (new_experiments, new_results)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 64 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"ema_logging.log_to_stderr(ema_logging.INFO)\n", | |
"\n", | |
"def threshold(new_results,variable):\n", | |
" th=np.percentile(new_results[variable],[50])\n", | |
" return th\n", | |
"\n", | |
"th_poor=threshold(new_results,'below1.25')\n", | |
"th_nearpoor=threshold(new_results,'below4')\n", | |
"th_bottom=threshold(new_results,'quintile2')\n", | |
"\n", | |
"def classify(data):\n", | |
" #get the output for poorpeople\n", | |
" poor = data['below1.25']\n", | |
" \n", | |
" #make an empty array of length equal to number of cases \n", | |
" classes = np.zeros(poor.shape[0])\n", | |
" \n", | |
" #if poor people are below the 2nd quartile, classify as 1 \n", | |
" keep=(data['below1.25']<th_poor)&(data['quintile2']>th_bottom)\n", | |
" classes[keep] = 1\n", | |
" \n", | |
" return classes" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 65 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"#perform prim on modified results tuple\n", | |
"\n", | |
"prim = fPrim(results, classify, f_value=0.8, threshold=0.8, threshold_type=1)\n", | |
"box1 = prim.find_box()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stderr", | |
"text": [ | |
"[INFO/MainProcess] 87 points remaining, containing 40 cases of interest\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stderr", | |
"text": [ | |
"[INFO/MainProcess] mean: 0.916666666667, mass: 0.413793103448, coverage: 0.825, density: 0.916666666667 restricted_dimensions: 5.0\n" | |
] | |
} | |
], | |
"prompt_number": 66 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"box1.inspect()" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"box mean mass coverage density res dim\n", | |
"8 0.72 0.62 0.97 0.72 2\n", | |
"\n", | |
"uncertainty box 8 \n", | |
" min max qp values\n", | |
"shareemp 0.57 - 0.91 2.14e-04\n", | |
"sharemanu 0.07 - 0.46 3.30e-01\n", | |
"\n", | |
"\n" | |
] | |
} | |
], | |
"prompt_number": 11 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"inspect(box1)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"coverage 0.975000\n", | |
"density 0.722222\n", | |
"mass 0.620690\n", | |
"mean 0.722222\n", | |
"res dim 2.000000\n", | |
"Name: 8, dtype: float64\n", | |
"\n", | |
" box 8 \n", | |
" min max qp values\n", | |
"shareemp 0.573356 0.914554 0.000214\n", | |
"sharemanu 0.071305 0.459816 0.330173\n" | |
] | |
} | |
], | |
"prompt_number": 69 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [] | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment