Skip to content

Instantly share code, notes, and snippets.

@mtzl
Created October 11, 2016 17:19
Show Gist options
  • Save mtzl/904f26090dcc636d4cf49ec6576367db to your computer and use it in GitHub Desktop.
Save mtzl/904f26090dcc636d4cf49ec6576367db to your computer and use it in GitHub Desktop.
Orca PID
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from km3pipe.io import H5Chain\n",
"\n",
"prefix = '/home/mlotze/km3net/orca-9m/uncut/'\n",
"n_events = {\n",
" prefix + 'numuon_cc.h5': 2400,\n",
" prefix + 'nuelec_cc.h5': 1200,\n",
" prefix + 'nuelec_nc.h5': 1200,\n",
" prefix + 'atmuon.h5': 2400,\n",
"}\n",
"files = list(n_events.keys())\n",
"with H5Chain(files) as c:\n",
" df = c(n_events)['mva']"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"plt.style.use('foursigma')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"df = df.fillna(0)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from km3pipe.io import guess_mc_feats\n",
"\n",
"mc_feats = guess_mc_feats(df)\n",
"reco_feats = [feat for feat in df.columns if feat not in mc_feats]\n",
"reco_feats\n",
"X = df[reco_feats]\n",
"mc_info = df[mc_feats]\n",
"del(df)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from trawler import TargetGen\n",
"\n",
"mc_type = mc_info['dusj_MCtype']\n",
"is_cc = np.array([True] * mc_type.shape[0])\n",
"tg = TargetGen(is_cc, mc_type)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Counter({'cscd': 2400, 'atmu': 2400, 'track': 2400})\n",
"Counter({0: 2400, 1: 2400, 2: 2400})\n",
"['atmu' 'cscd' 'track']\n"
]
}
],
"source": [
"from collections import Counter \n",
"from sklearn.preprocessing import LabelEncoder\n",
"\n",
"target = tg.atmu_track_cscd\n",
"le = LabelEncoder()\n",
"y = le.fit_transform(target)\n",
"\n",
"print(Counter(target))\n",
"print(Counter(y))\n",
"print(le.classes_)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"{'atmu': 0, 'cscd': 1, 'track': 2}"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"num2tag = {num: le.inverse_transform(num) for num in np.unique(y)}\n",
"tag2num = {num2tag[num]: num for num, tag in num2tag.items()}\n",
"tag2num"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# make energy binning\n",
"e_min = 0\n",
"e_max = 100\n",
"nbins = 25\n",
"\n",
"binlims = np.linspace(e_min, e_max, nbins+1)\n",
"log_binlims = np.logspace(0, 2, nbins+1)\n",
"energy = mc_info['dusj_MCen']\n",
"log_energy = np.log10(energy) \n",
"r = np.sqrt(np.power(X['tf_r_x'], 2) + np.power(X['tf_r_y'], 2))\n",
"e_rec_cscd = X['dusj_d_E']\n",
"e_rec_trak = X['tf_r_NuE']"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"['EventID',\n",
" 'dusj_EventID',\n",
" 'dusj_GlobalWeight',\n",
" 'dusj_LivetimeSec',\n",
" 'dusj_MCCh',\n",
" 'dusj_MC_phi',\n",
" 'dusj_MC_theta',\n",
" 'dusj_MC_x',\n",
" 'dusj_MC_y',\n",
" 'dusj_MC_z',\n",
" 'dusj_MCby',\n",
" 'dusj_MCen',\n",
" 'dusj_MCpr',\n",
" 'dusj_MCtype',\n",
" 'dusj_NEvents',\n",
" 'dusj_OneWeight',\n",
" 'tf_EventID',\n",
" 'tf_MCen']"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sorted(mc_info.columns)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"tm = y == tag2num['track']\n",
"cm = y == tag2num['cscd']\n",
"mm = y == tag2num['atmu']"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"lab = {'y': y}\n",
"lab['w2'] = mc_info['dusj_OneWeight']\n",
"lab['n_evts'] = mc_info['dusj_NEvents']\n",
"lab['livetime'] = mc_info['dusj_LivetimeSec']\n",
"lab['target'] = target\n",
"lab['mc_bjorken_y'] = mc_info['dusj_MCby']\n",
"lab['mc_energy'] = mc_info['dusj_MCen']\n",
"lab['log_mc_energy'] = np.log10(mc_info['dusj_MCen'])\n",
"lab = pd.DataFrame.from_dict(lab)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from honda_2015 import HondaFlux\n",
"zenith = mc_info['dusj_MC_theta']\n",
"hf = HondaFlux('pkg/honda_2015/honda_2015/honda2015_frejus_solarmin.h5')\n",
"# TODO: currently, atmu weights are = 1\n",
"wgt_honda = np.ones_like(lab['mc_energy'])\n",
"wgt_honda[tm] = hf('nu_mu', zenith[tm], energy[tm]) * lab['w2'][tm] / lab['n_evts'][tm]\n",
"wgt_honda[cm] = hf('nu_e', zenith[cm], energy[cm]) * lab['w2'][cm] / lab['n_evts'][cm]\n",
"wgt_honda[mm] = 1 / lab['livetime'][mm]\n",
"lab['honda'] = wgt_honda"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"scrolled": false
},
"outputs": [],
"source": [
"for target in ['track', 'atmu', 'cscd']:\n",
" plt.hist('mc_energy', weights='honda', data=lab[lab.target == target], histtype='stepfilled', alpha=.5, bins=log_binlims, label=target, )\n",
"plt.legend()\n",
"#plt.yscale('log')\n",
"#plt.xscale('log')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"lab['mc_energy_bin'] = pd.cut(lab['mc_energy'], binlims, labels=False)\n",
"lab['log_mc_energy_bin'] = pd.cut(lab['log_mc_energy'], binlims, labels=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"X_train, X_test, lab_train, lab_test, = train_test_split(X, lab, stratify=lab['y'], test_size=.33, random_state=42)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from sklearn.metrics import roc_auc_score, make_scorer\n",
"clf_pipe = Pipeline([\n",
" ('remove_constant', VarianceThreshold()), \n",
" ('quantile_scaler', RobustScaler()),\n",
" #('pca', PCA(svd_solver='auto')),\n",
" ('rf', RandomForestClassifier(\n",
" n_estimators=319, \n",
" max_features='auto',\n",
" criterion='entropy',\n",
" class_weight='balanced', \n",
" n_jobs=-1,\n",
" verbose=1\n",
" )),\n",
"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#skf = StratifiedKFold(n_splits=3, shuffle=True)\n",
"#grid = GridSearchCV(\n",
"# clf_pipe, \n",
"# param_grid={\n",
"# 'rf__criterion': ['entropy', 'gini'],\n",
"# #'rf__max_features': ['auto', None, 'log2']\n",
"# }, \n",
"# refit=True, \n",
"# #scoring='roc_auc', \n",
"# scoring=roc_auc_score(), \n",
"# cv=skf, verbose=1,\n",
"# #fit_params={'rf__sample_weight': wgt_train,},\n",
"#)\n",
"grid = clf_pipe"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"clf = grid.fit(X_train, lab_train['y'])\n",
"score = clf.predict_proba(X_test)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"for t in ['cscd', 'track', 'atmu']:\n",
" lab_test[t] = score[:, tag2num[t]]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"lab_test.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"lab_test['pred'] = lab_test[['track', 'cscd', 'atmu']].idxmax(axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"binlims\n",
"binwidth = (binlims[1] - binlims[0])\n",
"x = binlims[:-1]\n",
"lines = 'steps-post'"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#lab_test.groupby(pd.cut(lab_test['mc_energy'], binlims)).query()\n",
"\n",
"col = []\n",
"for i in range(binlims.shape[0] - 1):\n",
" low = binlims[i]\n",
" up = binlims[i+1]\n",
" dat = lab_test.query('%f <= mc_energy < %f' % (low, up))\n",
" c = {}\n",
" for pred in ['cscd', 'track', 'atmu']:\n",
" n = dat.query('pred == \"%s\"' % pred).honda.sum()\n",
" for true in ['cscd', 'track', 'atmu']:\n",
" res = dat.query('target == \"%s\" and pred == \"%s\"' % (true, pred))\n",
" try:\n",
" co = res.honda.sum()/n\n",
" except ZeroDivisionError:\n",
" co = 0\n",
" c[\"%s_as_%s\" % (true, pred)] = co\n",
" col.append(c)\n",
"col = pd.DataFrame(col)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"col"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"col.atmu_as_track.plot(ls='steps-post')\n",
"col.track_as_track.plot(ls='steps-post')\n",
"col.cscd_as_track.plot(ls='steps-post')\n",
"plt.title('Prediction: Track')\n",
"plt.ylim(-0.05, 1.05)\n",
"plt.legend(loc=0)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"col.atmu_as_cscd.plot(ls='steps-post')\n",
"col.track_as_cscd.plot(ls='steps-post')\n",
"col.cscd_as_cscd.plot(ls='steps-post')\n",
"plt.title('Prediction: Cascade')\n",
"plt.ylim(-0.05, 1.05)\n",
"plt.legend(loc=0)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"fig, (ax1, ax2, ax3) = plt.subplots(ncols=3, figsize=(10, 3))\n",
"bins = (\n",
" np.linspace(0, 2, 11),\n",
" np.linspace(0, 1, 11), \n",
")\n",
"ax1.hist2d('log_mc_energy', 'track', weights='honda', data=lab_test.query('target==\"track\"'), bins=bins)\n",
"ax2.hist2d('log_mc_energy', 'track', weights='honda', data=lab_test.query('target==\"cscd\"'), bins=bins)\n",
"ax3.hist2d('log_mc_energy', 'track', weights='honda', data=lab_test.query('target==\"atmu\"'))\n",
"ax2.set_title('pred = track')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"fig, (ax1, ax2, ax3) = plt.subplots(ncols=3, figsize=(10, 3))\n",
"bins = (\n",
" np.linspace(0, 2, 11),\n",
" np.linspace(0, 1, 11), \n",
")\n",
"ax1.hist2d('log_mc_energy', 'cscd', weights='honda', data=lab_test.query('target==\"track\"'), bins=bins)\n",
"ax2.hist2d('log_mc_energy', 'cscd', weights='honda', data=lab_test.query('target==\"cscd\"'), bins=bins)\n",
"ax3.hist2d('log_mc_energy', 'cscd', weights='honda', data=lab_test.query('target==\"atmu\"'))\n",
"ax2.set_title('pred = cscd')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"fig, (ax1, ax2, ax3) = plt.subplots(ncols=3, figsize=(10, 3))\n",
"bins = (\n",
" np.linspace(0, 2, 11),\n",
" np.linspace(0, 1, 11), \n",
")\n",
"ax1.hist2d('log_mc_energy', 'atmu', weights='honda', data=lab_test.query('target==\"track\"'), bins=bins)\n",
"ax2.hist2d('log_mc_energy', 'atmu', weights='honda', data=lab_test.query('target==\"cscd\"'), bins=bins)\n",
"ax3.hist2d('log_mc_energy', 'atmu', weights='honda', data=lab_test.query('target==\"atmu\"'))\n",
"ax2.set_title('pred = atmu')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"plt.title('''Prediction: \"It's a Track\"''')\n",
"norm = np.sum(np.array((track_as_track, cscd_as_track)), axis=0)\n",
"plt.plot(x, track_as_track/norm, label=r'$\\nu_\\mu$', ls=lines)\n",
"plt.plot(x, cscd_as_track/norm, label=r'$\\nu_e$', ls=lines)\n",
"plt.ylim(-0.05, 1.05)\n",
"plt.ylabel('Fraction of \"Track\" population')\n",
"plt.legend(loc=0)\n",
"plt.xlabel('Energy / GeV')\n",
"plt.savefig('plots/pred_track_composition.svg')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"plt.title('''Prediction: \"It's a Cascade\"''')\n",
"norm = np.sum(np.array((track_as_cscd, cscd_as_cscd)), axis=0)\n",
"plt.plot(x, track_as_cscd/norm, label=r'$\\nu_\\mu$', ls=lines)\n",
"plt.plot(x, cscd_as_cscd/norm, label=r'$\\nu_e$', ls=lines)\n",
"plt.ylim(-0.05, 1.05)\n",
"plt.ylabel('Fraction of \"Cascade\" population')\n",
"plt.legend(loc=0)\n",
"plt.xlabel('Energy / GeV')\n",
"plt.savefig('plots/pred_cscd_composition.svg')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"plt.title(r'''How get $\\nu_\\mu$ labeled?''')\n",
"norm = np.sum(np.array((track_as_track, track_as_cscd)), axis=0)\n",
"plt.plot(x, track_as_track/norm, label='Tagged: Track', ls=lines)\n",
"plt.plot(x, track_as_cscd/norm, label='Tagged: Cascade', ls=lines)\n",
"plt.ylim(-0.05, 1.05)\n",
"plt.ylabel(r'% of $\\nu_\\mu$ ')\n",
"plt.legend(loc=0)\n",
"plt.xlabel('Energy / GeV')\n",
"plt.savefig('plots/nu_mu_predictions.svg')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"plt.title(r'''How get $\\nu_e$ labeled?''')\n",
"norm = np.sum(np.array((cscd_as_track, cscd_as_cscd)), axis=0)\n",
"plt.plot(x, cscd_as_track/norm, label='Tagged: Track', ls=lines)\n",
"plt.plot(x, cscd_as_cscd/norm, label='Tagged: Cascade', ls=lines)\n",
"plt.ylim(-0.05, 1.05)\n",
"plt.ylabel(r'% of $\\nu_e$ ')\n",
"plt.legend(loc=0)\n",
"plt.xlabel('Energy / GeV')\n",
"plt.savefig('plots/nu_e_and_NC_predictions.svg')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"plt.plot(x, aucs, ls=lines)\n",
"plt.ylim(0, 1)\n",
"plt.title('Overall (Cut-Independent) PID Performance') \n",
"plt.ylabel('Area under ROC Curve (AUC)')\n",
"plt.xlabel('Energy / GeV')\n",
"plt.savefig('plots/auc.svg')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"print('Hi!')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"plt.title(r'How do $\\nu$ get labeled?')\n",
"plt.hist(score[y_test == 0], bins=np.linspace(0, 1, 50+1), histtype='stepfilled', alpha=.5, label=r'$\\nu_e$')\n",
"plt.hist(score[y_test == 1], bins=np.linspace(0, 1, 50+1), histtype='stepfilled', alpha=.5, label=r'$\\nu_\\mu$')\n",
"plt.legend()\n",
"plt.xlabel('Confidence of \"Event is Track\"')\n",
"plt.yscale('log')\n",
"plt.savefig('plots/rf_score.svg')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"print(sorted(mc_test.columns))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"out = mc_test.copy()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df = pd.DataFrame.from_dict({\n",
" 'zenith_cscd': X_test['dusj_d_theta'],\n",
" 'zenith_track': np.arccos(-1 * X_test['tf_r_dir_z']) * 180 / np.pi,\n",
" 'zenith_mc': mc_test['dusj_MC_theta'],\n",
" 'azimuth_cscd': X_test['dusj_d_phi'],\n",
" 'azimuth_track': np.arctan2(X_test['tf_r_dir_x'], X_test['tf_r_dir_y']) * 180 / np.pi,\n",
" 'azimuth_mc': mc_test['dusj_MC_phi'],\n",
" 'energy_cscd': X_test['dusj_d_E'],\n",
" 'energy_track': X_test['tf_r_NuE'],\n",
" 'energy_mc': mc_test['dusj_MCen'], \n",
" 'bjorken_y': X_test['tf_r_by'], \n",
" 'bjorken_y_mc': mc_test['dusj_MCby'],\n",
" 'proba_track': score, \n",
" 'w2': mc_test['dusj_OneWeight'],\n",
" 'n_evts': mc_test['dusj_NEvents'],\n",
" 'livetime_sec': mc_test['dusj_LivetimeSec'],\n",
" 'type_mc': mc_test['dusj_MCtype'],\n",
" 'true_label': target_test,\n",
" 'true_y': y_test,\n",
" 'flux_honda2015': honda_test,\n",
" })"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#h5 = h5py.File('df_test.h5', 'a')\n",
"#h5.create_dataset('pid_out', data=df.to_records())\n",
"#h5.close()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"e_max = 10"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"cut = 'energy_mc < %d and true_label == \"cscd\"' % e_max\n",
"plt.hexbin('proba_track', 'bjorken_y_mc', data=df.query(cut), gridsize=10, extent=(0, 1, 0, 1))\n",
"plt.ylabel('bjorken_y_mc')\n",
"plt.xlabel('confidence \"Event is Track\"')\n",
"plt.title(cut)\n",
"plt.colorbar()\n",
"plt.savefig('plots/pid_vs_mc_bjorken-y_cscd_%d.png' % e_max)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"cut = 'energy_mc < %d and true_label == \"track\"' % e_max\n",
"plt.hexbin('proba_track', 'bjorken_y_mc', data=df.query(cut), gridsize=10, extent=(0, 1, 0, 1))\n",
"plt.ylabel('bjorken_y_mc')\n",
"plt.xlabel('confidence \"Event is Track\"')\n",
"plt.title(cut)\n",
"plt.colorbar()\n",
"plt.savefig('plots/pid_vs_mc_bjorken-y_track_%d.png' % e_max)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
" #variances = clf.best_estimator_.steps[0][1].variances_\n",
"#rf = clf.best_estimator_.steps[-1][1]\n",
"#imp = pd.DataFrame.from_dict({\n",
"# 'features': X_test.columns[variances != 0],\n",
"# 'importance': rf.feature_importances_\n",
"#})\n",
"#imp.sort_values(by='importance')[::-1].head(5)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"fig, (ax_cscd, ax_track) = plt.subplots(ncols=2, figsize=(10, 4))\n",
"from matplotlib.colors import LogNorm\n",
"lims = (0, 1, 0, 2)\n",
"p1 = ax_cscd.hexbin(score[y_test == 0], np.log10(mc_test['dusj_MCen'][y_test == 0]), gridsize=25, norm=LogNorm(), extent=lims)\n",
"p2 = ax_track.hexbin(score[y_test == 1], np.log10(mc_test['dusj_MCen'][y_test == 1]), gridsize=25, norm=LogNorm(), extent=lims)\n",
"fig.suptitle('Unweighted Input')\n",
"plt.colorbar(p1, ax=ax_cscd)\n",
"plt.colorbar(p2, ax=ax_track)\n",
"ax_cscd.set_title('true cscd')\n",
"ax_track.set_title('true track')\n",
"ax_cscd.set_ylabel('log10(Energy)')\n",
"ax_cscd.set_xlabel('Probability(Track)')\n",
"plt.tight_layout()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"fig, (ax_cscd, ax_track) = plt.subplots(ncols=2, figsize=(10, 4))\n",
"from matplotlib.colors import LogNorm\n",
"lims = (0, 1, 0, 2)\n",
"p1 = ax_cscd.hexbin(score[y_test == 0], np.log10(mc_test['dusj_MCen'][y_test == 0]), gridsize=25, norm=LogNorm(), extent=lims)\n",
"p2 = ax_track.hexbin(score[y_test == 1], np.log10(mc_test['dusj_MCen'][y_test == 1]), gridsize=25, norm=LogNorm(), extent=lims)\n",
"fig.suptitle('Unweighted Input')\n",
"plt.colorbar(p1, ax=ax_cscd)\n",
"plt.colorbar(p2, ax=ax_track)\n",
"ax_cscd.set_title('true cscd')\n",
"ax_track.set_title('true track')\n",
"ax_cscd.set_ylabel('log10(Energy)')\n",
"ax_cscd.set_xlabel('Probability(Track)')\n",
"plt.tight_layout()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(8, 3))\n",
"p1 = ax1.hexbin(pred_atmu, np.log10(X['tf_r_NuE'][Y_abs == 13]), gridsize=25, norm=LogNorm()\n",
" )\n",
" #, extent=lims\n",
"p2 = ax2.hexbin(pred_atmu, np.log10(X['dusj_d_E'][Y_abs == 13]), gridsize=25, norm=LogNorm()\n",
" #, extent=lims\n",
" )\n",
"plt.colorbar(p1, ax=ax1)\n",
"plt.colorbar(p2, ax=ax2)\n",
"ax1.set_ylabel('log10(NuE)')\n",
"ax2.set_ylabel('log10(d_E)')\n",
"ax1.set_xlabel('Probability(Track)')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"fig, (ax_cscd, ax_trak) = plt.subplots(ncols=2, figsize=(8, 3))\n",
"ax_cscd.hist(energy[y == 1], bins=lin_bins, histtype='stepfilled', alpha=.5, label='mc')\n",
"ax_cscd.hist(e_rec_cscd[y == 1], bins=lin_bins, histtype='stepfilled', alpha=.5, label='shower reco')\n",
"ax_trak.hist(energy[y == 2], bins=lin_bins, histtype='stepfilled', alpha=.5, label='mc')\n",
"ax_trak.hist(e_rec_trak[y == 2], bins=lin_bins, histtype='stepfilled', alpha=.5, label='track reco')\n",
"ax_cscd.set_title('cscd')\n",
"ax_trak.set_title('nu_mu cc')\n",
"for ax in ax_trak, ax_cscd:\n",
" #ax.set_xscale('log')\n",
" ax.set_xlabel('Energy / GeV')\n",
" ax.legend()\n",
" #ax.set_yscale('log')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(8, 3))\n",
"lims = (0, 100, 0, 100)\n",
"p1 = ax1.hexbin(energy[Y_abs == 12], e_rec_cscd[Y_abs == 12], gridsize=25, norm=LogNorm(), extent=lims)\n",
"p2 = ax2.hexbin(energy[Y_abs == 12], e_rec_cscd[Y_abs == 12], gridsize=25, extent=lims)\n",
"plt.colorbar(p1, ax=ax1)\n",
"plt.colorbar(p2, ax=ax2)\n",
"for ax in ax1, ax2:\n",
" ax.set_xlabel('e_mc')\n",
" ax.set_ylabel('e_reco')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(8, 3))\n",
"p1 = ax1.hexbin(energy[Y_abs == 14], e_rec_trak[Y_abs == 14], gridsize=25, norm=LogNorm())\n",
"p2 = ax2.hexbin(energy[Y_abs == 14], e_rec_trak[Y_abs == 14], gridsize=25,)\n",
"plt.colorbar(p1, ax=ax1)\n",
"plt.colorbar(p2, ax=ax2)\n",
"for ax in ax1, ax2:\n",
" ax.set_xlabel('e_mc')\n",
" ax.set_ylabel('e_reco')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.1"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment