Created
May 12, 2023 14:37
-
-
Save giorgiopizz/3e247c2600cb42b8e75b9cd42603da19 to your computer and use it in GitHub Desktop.
mkPlot prototype in matplotlib
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import uproot | |
import matplotlib.pyplot as plt | |
import matplotlib | |
import numpy as np | |
import mplhep as hep | |
d = hep.style.CMS | |
# might change d | |
# d['xtick.major.size'] = 20 | |
plt.style.use([d, hep.style.firamath]) | |
f = uproot.open('rootFiles/mkShapes__new_vbf_16.root') | |
def make_error_band(bins, err): | |
_x = np.array(list(zip(bins-1e-8, bins))).flatten()[1:-1] | |
_err = np.array(list(zip(err, err))).flatten() | |
return _x, _err | |
def ratioFun(num, den, _min=1e-8, _max=10.0, tolerance=1e-12): | |
ratio=np.ones(len(num)) | |
# check where the denominator is almost zero and num is not -> set ratio to _max | |
mask1 = (den < tolerance) & (num > tolerance) | |
# check where both the den and num are almost zero -> set ratio to _min (discard points) | |
mask2 = (den < tolerance) & (num < tolerance) | |
# check where the den is greater than zero -> set ratio to num/den | |
mask3 = (den > tolerance) | |
ratio[mask1] = _max | |
ratio[mask2] = _min | |
ratio[mask3] = num[mask3] / den[mask3] | |
# check if ratio is out of bounds for min and max | |
mask_min = ratio < _min | |
ratio[mask_min] = _min | |
mask_max = ratio > _max | |
ratio[mask_max] = _max | |
return ratio | |
def get_darker_color(color): | |
rgb = list(matplotlib.colors.to_rgba(color)[:-1]) | |
darker_factor = 4/5 | |
rgb[0] = (rgb[0] * darker_factor) | |
rgb[1] = (rgb[1] * darker_factor) | |
rgb[2] = (rgb[2] * darker_factor) | |
return tuple(rgb) | |
def get_shade_color(color): | |
rgb = list(matplotlib.colors.to_rgba(color)[:-1]) | |
darker_factor = 1/5 | |
rgb[0] = ((1.0 - rgb[0]) * darker_factor) + rgb[0] | |
rgb[1] = ((1.0 - rgb[1]) * darker_factor) + rgb[1] | |
rgb[2] = ((1.0 - rgb[2]) * darker_factor) + rgb[2] | |
return tuple(rgb) | |
groupPlot = {} | |
groupPlot['Fake'] = { | |
'nameHR' : 'nonprompt', | |
'isSignal' : 0, | |
'color': 'lightgray', # kGray + 1 | |
'samples' : ['Fake_m', 'Fake_e'] | |
} | |
groupPlot['top'] = { | |
'nameHR' : r'tW and t$ \bar{t} $', | |
'isSignal' : 0, | |
'color': 'gold', # kYellow | |
'samples' : ['top'] | |
} | |
groupPlot['Multiboson'] = { | |
'nameHR' : 'Multiboson', | |
'isSignal' : 0, | |
'color': 'magenta', # kViolet + 1 | |
'samples' : ['WWewk','WW', 'ggWW', 'Vg', 'VgS_H', 'VgS_L', 'ZZ2L2Nu', 'ZZ2L2Q', 'ZZ4L', 'WZ2L2Q', 'VVV'] | |
} | |
# _i = 0 | |
# ###### DY MC ###### | |
# dys = { | |
# "DY_hardJets": "hardJets", | |
# "DY_PU_1Jets": "PU1Jets", | |
# "DY_PU_2Jets": "PU2Jets", | |
# } | |
''' | |
springgreen | |
mediumspringgreen | |
mediumseagreen | |
''' | |
# groupPlot['DY_DY_PU_2Jets'] = { | |
# 'nameHR': 'DY PU 2 Jet', | |
# 'isSignal': 0, | |
# 'color': 'mediumseagreen', | |
# 'samples': ['DY_DY_PU_2Jets'] | |
# } | |
groupPlot['DY_DY_PU'] = { | |
'nameHR': 'DY PU Jet', | |
'isSignal': 0, | |
'color': 'mediumspringgreen', | |
'samples': ['DY_DY_PU_1Jets', 'DY_DY_PU_2Jets'] | |
} | |
groupPlot['DY_DY_hardJets'] = { | |
'nameHR': 'DY hard', | |
'isSignal': 0, | |
'color': 'limegreen', | |
'samples': ['DY_DY_hardJets'] | |
} | |
groupPlot['Zjj'] = { | |
'nameHR': 'Zjj', | |
'isSignal': 1, | |
#'color': 'blue', | |
'color': 'cornflowerblue', | |
'samples': ['Zjj'] | |
} | |
groupPlot['DATA'] = { | |
'nameHR' : 'Data', | |
'color': 1 , | |
'isSignal' : 0, | |
'isData' : 1 , | |
} | |
samples = {} | |
bins = -1 | |
c_data = 0 | |
err_bar_data = 0 | |
flow = False | |
for histo in f['sr_tot']['DNN_output_scaled_histo'].keys(): | |
if type(bins) == type(-1): | |
bins = f['sr_tot']['DNN_output_scaled_histo'][histo].to_numpy(flow)[1] | |
h = f['sr_tot']['DNN_output_scaled_histo'][histo].to_numpy(flow)[0] | |
e = np.power(f['sr_tot']['DNN_output_scaled_histo'][histo].errors(flow), 2) | |
found = False | |
#histoName = histo[len('histo_'):] | |
histoName = f['sr_tot']['DNN_output_scaled_histo'][histo].name[len('histo_'):] | |
for sampleName in groupPlot.keys(): | |
if groupPlot[sampleName].get('isData', 0) == 1: | |
continue | |
if histoName in groupPlot[sampleName]['samples']: | |
if sampleName in samples.keys(): | |
samples[sampleName]['h'] += h | |
samples[sampleName]['e2']+= e | |
else: | |
samples[sampleName] = { | |
'h': h, | |
'e2': e, | |
} | |
found = True | |
break | |
if histoName.lower() == 'data': | |
c_data = h | |
err_bar_data = np.sqrt(e) | |
elif not found: | |
print('Could not find', histoName, 'in groupPlot') | |
#print(bins.shape, c_data.shape) | |
blindRegion = bins[1:]>0.4 | |
blindRegion = bins[1:]>2.0 | |
c_data [blindRegion] = 0 | |
err_bar_data [blindRegion] = 0 | |
#print(list(groupPlot.keys())) | |
x0 = bins[0] | |
x1 = bins[-1] | |
print('\n\nx0:', x0, '\tx1:', x1, '\n\n') | |
y0 = 1.0 | |
y1 = 10**4 | |
ratio0=0.6 | |
ratio1=1.4 | |
tolerance = 1e-8 | |
nuis_err2_up = np.zeros(len(list(samples.values())[0]['h'])) | |
nuis_err2_do = np.zeros(len(list(samples.values())[0]['h'])) | |
sig_nuis_err2_up = np.zeros(len(list(samples.values())[0]['h'])) | |
sig_nuis_err2_do = np.zeros(len(list(samples.values())[0]['h'])) | |
#thstack = np.zeros(len(list(samples.values())[0]['h'])) | |
thstack = [] | |
sig_thstack = [] | |
c_mcs_nominal = [] | |
sig_c_mcs_nominal = [] | |
#nuis_err2_do = np.zeros(len(c_mcs_nominal[0][1])) | |
for sample in samples.keys(): | |
if groupPlot[sample].get('isSignal', 0) == 1: | |
sig_nuis_err2_up += samples[sample]['e2'] | |
sig_nuis_err2_do += samples[sample]['e2'] | |
sig_thstack.append(samples[sample]['h']) | |
sig_c_mcs_nominal.append((sample, samples[sample]['h'])) | |
else: | |
nuis_err2_up += samples[sample]['e2'] | |
nuis_err2_do += samples[sample]['e2'] | |
thstack.append(samples[sample]['h']) | |
c_mcs_nominal.append((sample, samples[sample]['h'])) | |
stat_err = np.sqrt(nuis_err2_up) | |
sig_stat_err = np.sqrt(sig_nuis_err2_up) | |
# FIXME fill nuisances | |
nuis_err_up = np.sqrt(nuis_err2_up) | |
nuis_err_do = np.sqrt(nuis_err2_do) | |
sig_nuis_err_up = np.sqrt(sig_nuis_err2_up) | |
sig_nuis_err_do = np.sqrt(sig_nuis_err2_do) | |
#print(c_mcs_nominal[0][1]) | |
#c_mcs_nominal = sorted(c_mcs_nominal, key=lambda k: np.sum(k[1])) | |
#print(list(groupPlot.keys())) | |
c_mcs_nominal = sorted(c_mcs_nominal, key=lambda k: list(groupPlot.keys()).index(k[0]), reverse=True) | |
sig_c_mcs_nominal = sorted(sig_c_mcs_nominal, key=lambda k: list(groupPlot.keys()).index(k[0]), reverse=True) | |
centers = (bins[:-1]+bins[1:])/2 | |
#err_bar_x = (centers[1]-centers[0])/2 | |
err_bar_x = 0 | |
fig, ax = plt.subplots(3, 1, sharex=True, gridspec_kw={'height_ratios': [3,1,1]}, figsize=(10,10), dpi=100)#dpi=100 | |
fig.tight_layout(pad=-0.5) | |
hep.cms.label('Work in progress', data=True, lumi=39, year=2016, ax=ax[0]) | |
thstack_base = np.zeros(len(thstack[0])) | |
for i in range(len(c_mcs_nominal)-1, -1, -1): | |
name = groupPlot[c_mcs_nominal[i][0]]['nameHR'] | |
color = groupPlot[c_mcs_nominal[i][0]]['color'] | |
thstack_base += c_mcs_nominal[i][1] | |
integral = str(round(np.sum(c_mcs_nominal[i][1]),2)) | |
ax[0].hist(centers, weights=thstack_base, bins=bins, label=name + f' [{integral}]', color=color, | |
edgecolor=get_darker_color(color), histtype='step', fill=True, linewidth=2, zorder=(-100+i)) | |
for i in range(len(sig_c_mcs_nominal)-1, -1, -1): | |
name = groupPlot[sig_c_mcs_nominal[i][0]]['nameHR'] | |
color = groupPlot[sig_c_mcs_nominal[i][0]]['color'] | |
thstack_base += sig_c_mcs_nominal[i][1] | |
ax[0].hist(centers, weights=thstack_base, bins=bins, label=name + f' [{integral}]', color=color, | |
edgecolor=get_darker_color(color), histtype='step', fill=True, linewidth=2, zorder=(-200+i)) | |
ax[0].hist(centers, weights=sig_c_mcs_nominal[i][1], bins=bins, color=color, | |
edgecolor=get_darker_color(color), histtype='step', fill=False, linewidth=2, zorder=1) | |
integral = str(round(np.sum(c_data),2)) | |
ax[0].errorbar(centers, c_data, err_bar_data, err_bar_x, fmt='ko', markersize=6, ecolor='black', label='DATA'+ f' [{integral}]', zorder=10) | |
# signalName = '' | |
# for h in groupPlot.keys(): | |
# if groupPlot[h].get('isSignal', 0) == 1: | |
# signalName = h | |
# break | |
# print(signalName) | |
# print(signalName == '') | |
# h_signal = list(filter(lambda k: k[0] == signalName, c_mcs_nominal))[0][1] | |
includeSigInTopBand = False | |
label = 'Stat + Syst' | |
if includeSigInTopBand: | |
mc = sum(thstack) + sum(sig_thstack) | |
top_err_band_up = mc.copy() + np.sqrt( np.power(nuis_err_up, 2) + np.power(sig_nuis_err_up, 2) ) | |
top_err_band_do = mc.copy() - np.sqrt( np.power(nuis_err_do, 2) + np.power(sig_nuis_err_do, 2) ) | |
else: | |
mc = sum(thstack) | |
top_err_band_up = mc.copy() + nuis_err_up | |
top_err_band_do = mc.copy() - nuis_err_do | |
label += ' on Bkg.' | |
print(top_err_band_do, mc-top_err_band_do) | |
x, top_err_band_up = make_error_band(bins, top_err_band_up) | |
_, top_err_band_do = make_error_band(bins, top_err_band_do) | |
ax[0].fill_between(x , top_err_band_do, top_err_band_up, hatch='///', color='darkgrey', zorder=9, alpha=1, facecolor='none', linewidth=0.0, label=label) | |
mc = sum(thstack) | |
# ratio=np.ones(len(mc)) | |
# mask = mc>tolerance | |
# ratio[mask] = c_data[mask]/mc[mask] | |
# mask = (mc<tolerance) & (c_data>tolerance) | |
# ratio[mask] = 10 | |
ratio = ratioFun(c_data, mc) | |
# # If outside the plot put on limit | |
# mask = ratio > ratio1 | |
# ratio[mask] = ratio1 | |
# mask = ratio < ratio0 | |
# ratio[mask] = ratio0 | |
# Data points in ratio error | |
# err_bar = np.ones(len(mc)) | |
# mask = mc > tolerance | |
# err_bar[mask] = err_bar_data[mask] / mc[mask] | |
err_bar = ratioFun(err_bar_data, mc) | |
# err_band_up = np.ones(len(mc)) | |
# mask = mc>tolerance | |
# err_band_up[mask] = nuis_err_up[mask]/mc[mask] | |
err_band_up = ratioFun(nuis_err_up, mc) | |
# err_band_do = np.ones(len(mc)) | |
# mask = mc>tolerance | |
# err_band_do[mask] = nuis_err_do[mask]/mc[mask] | |
err_band_do = ratioFun(nuis_err_do, mc) | |
x, err_band_up = make_error_band(bins, err_band_up) | |
_, err_band_do = make_error_band(bins, err_band_do) | |
ax[1].fill_between(x , 1 - err_band_do, 1 + err_band_up, alpha=0.2, hatch='///', color='grey', label='Stat + Syst') | |
ax[1].errorbar(centers, ratio, err_bar, err_bar_x, fmt='ko', markersize=6, ecolor='black') | |
ax[1].plot(bins, np.ones(len(bins)), '-', color='black') | |
# stat err band | |
# err_band_up = np.ones(len(mc)) | |
# mask = mc>tolerance | |
# err_band_up[mask] = stat_err[mask]/mc[mask] | |
err_band_up = ratioFun(stat_err, mc) | |
err_band_do = err_band_up.copy() | |
x, err_band_up = make_error_band(bins, err_band_up) | |
_, err_band_do = make_error_band(bins, err_band_do) | |
ax[1].fill_between(x , 1 - err_band_do, 1 + err_band_up, hatch='\\\\', color='red', facecolor='none', label='Stat only') | |
# Data / Pred. | |
mc = sum(thstack) + sum(sig_thstack) | |
# ratio=np.ones(len(mc)) | |
# mask = mc>tolerance | |
# ratio[mask] = c_data[mask]/mc[mask] | |
# mask = (mc<tolerance) & (c_data>tolerance) | |
# ratio[mask] = 10 | |
ratio = ratioFun(c_data, mc) | |
# mask = ratio > ratio1 | |
# ratio[mask] = ratio1 | |
# mask = ratio < ratio0 | |
# ratio[mask] = ratio0 | |
# Data points in ratio error | |
err_bar = ratioFun(err_bar_data, mc) | |
err_band_up = ratioFun(np.sqrt(np.power(nuis_err_up, 2) + np.power(sig_nuis_err_up, 2)), mc) | |
err_band_do = ratioFun(np.sqrt(np.power(nuis_err_do, 2) + np.power(sig_nuis_err_do, 2)), mc) | |
x, err_band_up = make_error_band(bins, err_band_up) | |
_, err_band_do = make_error_band(bins, err_band_do) | |
ax[2].fill_between(x , 1 - err_band_do, 1 + err_band_up, alpha=0.2, hatch='///', color='grey') | |
ax[2].errorbar(centers, ratio, err_bar, err_bar_x, fmt='ko', markersize=6, ecolor='black') | |
ax[2].plot(bins, np.ones(len(bins)), '-', color='black') | |
nuis_sym = (np.sqrt(np.power(nuis_err_up, 2) + np.power(sig_nuis_err_up, 2)) + np.sqrt(np.power(nuis_err_do, 2) + np.power(sig_nuis_err_do, 2)))/2 | |
chi2 = np.sum(np.power(ratio-1, 2) / ( np.power(nuis_sym/mc, 2) + np.power(err_bar, 2))) | |
#chi2_v = (np.power(ratio-1, 2) / ( np.power(nuis_sym/mc, 2) + np.power(err_bar, 2))) | |
#print(chi2_v) | |
ax[2].plot([], [],' ', label=r'$ \chi ^2 _0 $=' + str(round(chi2/len(ratio),2))) | |
print('\n\nChi2:', chi2/len(ratio)) | |
# stat only | |
# err_band_up = np.ones(len(mc)) | |
# mask = mc>tolerance | |
# err_band_up[mask] = / mc[mask] | |
err_band_up = ratioFun(np.sqrt(np.power(stat_err, 2) + np.power(sig_stat_err, 2)), mc) | |
err_band_do = err_band_up.copy() | |
x, err_band_up = make_error_band(bins, err_band_up) | |
_, err_band_do = make_error_band(bins, err_band_do) | |
# ax[2].fill_between(x , 1 - err_band_do, 1 + err_band_up, hatch='\\\\', color='red', facecolor='none', label='Stat only') | |
ax[2].fill_between(x , 1 - err_band_do, 1 + err_band_up, hatch='\\\\', color='red', facecolor='none') | |
#ax[2].legend() | |
ax[2].legend(ncol=2, fontsize='x-small') | |
y0 = np.min(thstack[0]) | |
y1 = max(np.max(mc), np.max(c_data)) | |
eps = (y1-y0)/20 | |
y0 -= eps | |
y0 = max(y0, 1) | |
y1 += eps*5 | |
y0 = max(np.min(thstack), 1) | |
#y0 = 1e+1 | |
print('\n\nMin on the y:', y0) | |
y1 = max(np.max(mc), np.max(c_data))*1e+2 | |
# eps = (y1-y0)/20 | |
# y0 -= eps | |
# y0 = max(y0, 1) | |
# y1 += eps*5 | |
#y0 = 1 | |
#y1 = 1e+9 | |
ax[0].set_yscale('log') | |
ax[0].set_ylim(y0, y1) | |
ax[1].set_ylim(ratio0,ratio1) | |
ax[1].set_xlim(x0,x1) | |
ax[2].set_ylim(ratio0,ratio1) | |
ax[2].set_xlim(x0,x1) | |
# plt.draw() | |
# # check if upper limit in ratio on y-axis has a label (this will interfere with the upper pad) | |
# cond1 = ax[1].get_ylim()[1] in list(map(lambda k: k.get_position()[1], ax[1].get_yticklabels())) | |
# ax0_ticklabels_pos = list(map(lambda k: k.get_position()[1], ax[0].get_yticklabels())) | |
# # check if the lower limit in the upper pad on y-axis has a label | |
# cond2 = ax[0].get_ylim()[0] in ax0_ticklabels_pos | |
# if cond1 and cond2: | |
# index = ax0_ticklabels_pos.index(ax[0].get_ylim()[0]) | |
# plt.setp(ax[0].get_yticklabels()[index], visible=False) | |
ax[1].legend(ncol=2, fontsize='x-small') | |
ax[0].set_ylabel('Events', fontsize=30) | |
#ax[1].set_ylabel('Data / Pred.', loc='center', fontsize=20) | |
ax[1].set_ylabel('Data / Bkg.', loc='center', fontsize=20) | |
ax[2].set_ylabel('Data / Pred.', loc='center', fontsize=20) | |
#ax[1].set_xlabel(r'$p^{T}_{j}$ [GeV]', fontsize=30) | |
ax[1].tick_params(axis='y', labelsize='x-small', labelleft=True) | |
ax[2].tick_params(axis='y', labelsize='x-small', labelleft=True) | |
ax[2].set_xlabel(r'DNN output', fontsize=30) | |
ax[0].legend(ncol=2, fontsize='x-small', loc='upper center') | |
fig.savefig("/eos/user/g/gpizzati/www/rdf/provaMPLHE_3pad.png", facecolor='white', pad_inches=0.1, bbox_inches='tight') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment