Skip to content

Instantly share code, notes, and snippets.

@quaquel
Last active August 29, 2015 14:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save quaquel/95f798a417f170f5d56b to your computer and use it in GitHub Desktop.
Save quaquel/95f798a417f170f5d56b to your computer and use it in GitHub Desktop.
f prim
Display the source blob
Display the rendered blob
Raw
{
"metadata": {
"name": "",
"signature": "sha256:3be6924ae61470a8a34595f3db6c93f05b64ad6be9c7479a60f529de67adc5fb"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"from analysis import prim\n",
"from expWorkbench import load_results, ema_logging\n",
"\n",
"class fPrim(prim.Prim):\n",
" '''\n",
" This is a small extension to the normal prim. This \n",
" extension adds functionality for automatically\n",
" selecting a specific box on the peeling_trajectory\n",
" found by normal prim. In the literature, this is\n",
" known as fPrim. \n",
" \n",
" The automatic selection is based on making a \n",
" tradeoff between coverage and density. More \n",
" specifically, the user specifies an f_value (between 0 and 1)\n",
" that determines the weight of coverage, the weight\n",
" of density then becomes 1-f_value. \n",
" \n",
" The box on the peeling trajectory that is automatically chosen\n",
" is the box that has the maximum score on the objective function.\n",
" \n",
" Outside of the automatic selection of a box, this extension has\n",
" all the functionality of normal prim. \n",
" \n",
" \n",
" '''\n",
" \n",
" \n",
" def __init__(self, \n",
" results,\n",
" classify, \n",
" f_value,\n",
" obj_function=prim.DEFAULT, \n",
" peel_alpha=0.05, \n",
" paste_alpha=0.05,\n",
" mass_min=0.05, \n",
" threshold=None, \n",
" threshold_type=prim.ABOVE,\n",
" incl_unc=[]):\n",
" self.f_value = f_value\n",
" \n",
" super(fPrim, self).__init__(results, \n",
" classify,\n",
" obj_function=obj_function, \n",
" peel_alpha=peel_alpha, \n",
" paste_alpha=paste_alpha,\n",
" mass_min=mass_min, \n",
" threshold=threshold, \n",
" threshold_type=threshold_type,\n",
" incl_unc=incl_unc)\n",
" \n",
" \n",
" \n",
" def find_box(self):\n",
" box = super(fPrim, self).find_box()\n",
" \n",
" # here the f prim part should go\n",
" obj = self.f_value *box.peeling_trajectory['coverage'] + (1-self.f_value)*box.peeling_trajectory['density']\n",
" i = np.where(obj==np.max(obj))[0][0]\n",
" \n",
" box.select(i)\n",
" \n",
" return box\n",
" "
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 15
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"ema_logging.log_to_stderr(ema_logging.INFO)\n",
"\n",
"def classify(data):\n",
" #get the output for deceased population\n",
" result = data['deceased population region 1']\n",
" \n",
" #make an empty array of length equal to number of cases \n",
" classes = np.zeros(result.shape[0])\n",
" \n",
" #if deceased population is higher then 1.000.000 people, classify as 1 \n",
" classes[result[:, -1] > 1000000] = 1\n",
" \n",
" return classes\n",
"\n",
"#load data\n",
"fn = r'/Domain/tudelft.net/Users/jhkwakkel/EMAworkbench/src/examples/data/1000 flu cases.tar.gz'\n",
"results = load_results(fn)\n",
"experiments, results = results\n",
"\n",
"#extract results for 1 policy\n",
"logical = experiments['policy'] == 'no policy'\n",
"new_experiments = experiments[ logical ]\n",
"new_results = {}\n",
"for key, value in results.items():\n",
" new_results[key] = value[logical]\n",
"\n",
"results = (new_experiments, new_results)\n",
"\n",
"#perform prim on modified results tuple\n",
"\n",
"prim = fPrim(results, classify, f_value=0.5, threshold=0.8, threshold_type=1)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stderr",
"text": [
"[INFO/MainProcess] results loaded succesfully from /Domain/tudelft.net/Users/jhkwakkel/EMAworkbench/src/examples/data/1000 flu cases.tar.gz\n"
]
}
],
"prompt_number": 16
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"box1 = prim.find_box()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stderr",
"text": [
"[INFO/MainProcess] 1000 points remaining, containing 369 cases of interest\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
"[INFO/MainProcess] mean: 0.994652406417, mass: 0.187, coverage: 0.50406504065, density: 0.994652406417 restricted_dimensions: 9.0\n"
]
}
],
"prompt_number": 17
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"box1.inspect()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"box mean mass coverage density res dim\n",
"18 0.79 0.39 0.83 0.79 4\n",
"\n",
"uncertainty box 18 \n",
" min max qp values\n",
"infection ratio region 1 0.05 - 0.15 2.33e-20\n",
"normal contact rate region 1 34.22 - 99.93 4.16e-11\n",
"recovery time region 1 0.22 - 0.75 2.54e-06\n",
"fatality ratio region 1 0.00 - 0.10 3.55e-02\n",
"\n",
"\n"
]
}
],
"prompt_number": 18
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from operator import itemgetter\n",
"import pandas as pd\n",
"\n",
"def inspect(box, i=None):\n",
" '''\n",
"\n",
" Write the stats and box limits of the user specified box to standard \n",
" out. if i is not provided, the last box will be printed\n",
"\n",
" '''\n",
"\n",
" if i == None:\n",
" i = box._cur_box\n",
"\n",
" stats = box.peeling_trajectory.iloc[i].to_dict()\n",
" stats['restricted_dim'] = stats['res dim']\n",
"\n",
" qp_values = box._calculate_quasi_p(i)\n",
" uncs = [(key, value) for key, value in qp_values.iteritems()]\n",
" uncs.sort(key=itemgetter(1))\n",
" uncs = [uncs[0] for uncs in uncs]\n",
" \n",
" \n",
" #make the descriptive statistics for the box\n",
" print box.peeling_trajectory.iloc[i]\n",
" print \n",
" \n",
" # make the box definition\n",
" columns = pd.MultiIndex.from_product([['box {}'.format(i)],['min', 'max', 'qp values']])\n",
" box_lim = pd.DataFrame(np.zeros((len(uncs), 3)), index=uncs, columns=columns)\n",
" \n",
" for unc in uncs:\n",
" values = box.box_lim[unc][:]\n",
" box_lim.loc[unc] = [values[0], values[1], qp_values[unc]]\n",
" \n",
" print box_lim\n",
"\n",
"inspect(box1)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"coverage 0.975000\n",
"density 0.722222\n",
"mass 0.620690\n",
"mean 0.722222\n",
"res dim 2.000000\n",
"Name: 8, dtype: float64\n",
"\n",
" box 8 \n",
" min max qp values\n",
"shareemp 0.573356 0.914554 0.000214\n",
"sharemanu 0.071305 0.459816 0.330173\n"
]
}
],
"prompt_number": 70
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Question"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import pandas as pd\n",
"import numpy.lib.recfunctions as recfunctions\n",
"\n",
"def load_data(folder,countrycode):\n",
" fn_exp = r'{}experiments{}.csv'.format(folder,countrycode)\n",
"\n",
" experiments = pd.io.parsers.read_table(fn_exp, sep=',')\n",
" experiments = experiments.to_records()\n",
" experiments = recfunctions.drop_fields(experiments, 'index')\n",
"\n",
" fn_out = r'{}outcomes{}.csv'.format(folder,countrycode)\n",
" outcomes = pd.io.parsers.read_table(fn_out, sep=',')\n",
" outcome_names = list(outcomes.columns.values)\n",
"\n",
" outcomes = {name:np.asarray(outcomes[name]) for name in outcome_names}\n",
" return experiments, outcomes"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 62
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"fn = r'./data/'\n",
"cc = 'THA'\n",
"\n",
"results = load_data(fn,cc)\n",
"experiments, outcomes = results"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 63
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#extract results for 1 policy\n",
"logical = experiments['ssp'] == 'ssp5'\n",
"new_experiments = experiments[ logical ]\n",
"new_results = {}\n",
"for key, value in outcomes.items():\n",
" new_results[key] = value[logical]\n",
"\n",
"results = (new_experiments, new_results)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 64
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"ema_logging.log_to_stderr(ema_logging.INFO)\n",
"\n",
"def threshold(new_results,variable):\n",
" th=np.percentile(new_results[variable],[50])\n",
" return th\n",
"\n",
"th_poor=threshold(new_results,'below1.25')\n",
"th_nearpoor=threshold(new_results,'below4')\n",
"th_bottom=threshold(new_results,'quintile2')\n",
"\n",
"def classify(data):\n",
" #get the output for poorpeople\n",
" poor = data['below1.25']\n",
" \n",
" #make an empty array of length equal to number of cases \n",
" classes = np.zeros(poor.shape[0])\n",
" \n",
" #if poor people are below the 2nd quartile, classify as 1 \n",
" keep=(data['below1.25']<th_poor)&(data['quintile2']>th_bottom)\n",
" classes[keep] = 1\n",
" \n",
" return classes"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 65
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#perform prim on modified results tuple\n",
"\n",
"prim = fPrim(results, classify, f_value=0.8, threshold=0.8, threshold_type=1)\n",
"box1 = prim.find_box()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stderr",
"text": [
"[INFO/MainProcess] 87 points remaining, containing 40 cases of interest\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
"[INFO/MainProcess] mean: 0.916666666667, mass: 0.413793103448, coverage: 0.825, density: 0.916666666667 restricted_dimensions: 5.0\n"
]
}
],
"prompt_number": 66
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"box1.inspect()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"box mean mass coverage density res dim\n",
"8 0.72 0.62 0.97 0.72 2\n",
"\n",
"uncertainty box 8 \n",
" min max qp values\n",
"shareemp 0.57 - 0.91 2.14e-04\n",
"sharemanu 0.07 - 0.46 3.30e-01\n",
"\n",
"\n"
]
}
],
"prompt_number": 11
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"inspect(box1)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"coverage 0.975000\n",
"density 0.722222\n",
"mass 0.620690\n",
"mean 0.722222\n",
"res dim 2.000000\n",
"Name: 8, dtype: float64\n",
"\n",
" box 8 \n",
" min max qp values\n",
"shareemp 0.573356 0.914554 0.000214\n",
"sharemanu 0.071305 0.459816 0.330173\n"
]
}
],
"prompt_number": 69
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment