Skip to content

Instantly share code, notes, and snippets.

@giorgiopizz
Created May 12, 2023 14:37
Show Gist options
  • Save giorgiopizz/3e247c2600cb42b8e75b9cd42603da19 to your computer and use it in GitHub Desktop.
Save giorgiopizz/3e247c2600cb42b8e75b9cd42603da19 to your computer and use it in GitHub Desktop.
mkPlot prototype in matplotlib
import uproot
import matplotlib.pyplot as plt
import matplotlib
import numpy as np
import mplhep as hep
d = hep.style.CMS
# might change d
# d['xtick.major.size'] = 20
plt.style.use([d, hep.style.firamath])
f = uproot.open('rootFiles/mkShapes__new_vbf_16.root')
def make_error_band(bins, err):
_x = np.array(list(zip(bins-1e-8, bins))).flatten()[1:-1]
_err = np.array(list(zip(err, err))).flatten()
return _x, _err
def ratioFun(num, den, _min=1e-8, _max=10.0, tolerance=1e-12):
ratio=np.ones(len(num))
# check where the denominator is almost zero and num is not -> set ratio to _max
mask1 = (den < tolerance) & (num > tolerance)
# check where both the den and num are almost zero -> set ratio to _min (discard points)
mask2 = (den < tolerance) & (num < tolerance)
# check where the den is greater than zero -> set ratio to num/den
mask3 = (den > tolerance)
ratio[mask1] = _max
ratio[mask2] = _min
ratio[mask3] = num[mask3] / den[mask3]
# check if ratio is out of bounds for min and max
mask_min = ratio < _min
ratio[mask_min] = _min
mask_max = ratio > _max
ratio[mask_max] = _max
return ratio
def get_darker_color(color):
rgb = list(matplotlib.colors.to_rgba(color)[:-1])
darker_factor = 4/5
rgb[0] = (rgb[0] * darker_factor)
rgb[1] = (rgb[1] * darker_factor)
rgb[2] = (rgb[2] * darker_factor)
return tuple(rgb)
def get_shade_color(color):
rgb = list(matplotlib.colors.to_rgba(color)[:-1])
darker_factor = 1/5
rgb[0] = ((1.0 - rgb[0]) * darker_factor) + rgb[0]
rgb[1] = ((1.0 - rgb[1]) * darker_factor) + rgb[1]
rgb[2] = ((1.0 - rgb[2]) * darker_factor) + rgb[2]
return tuple(rgb)
groupPlot = {}
groupPlot['Fake'] = {
'nameHR' : 'nonprompt',
'isSignal' : 0,
'color': 'lightgray', # kGray + 1
'samples' : ['Fake_m', 'Fake_e']
}
groupPlot['top'] = {
'nameHR' : r'tW and t$ \bar{t} $',
'isSignal' : 0,
'color': 'gold', # kYellow
'samples' : ['top']
}
groupPlot['Multiboson'] = {
'nameHR' : 'Multiboson',
'isSignal' : 0,
'color': 'magenta', # kViolet + 1
'samples' : ['WWewk','WW', 'ggWW', 'Vg', 'VgS_H', 'VgS_L', 'ZZ2L2Nu', 'ZZ2L2Q', 'ZZ4L', 'WZ2L2Q', 'VVV']
}
# _i = 0
# ###### DY MC ######
# dys = {
# "DY_hardJets": "hardJets",
# "DY_PU_1Jets": "PU1Jets",
# "DY_PU_2Jets": "PU2Jets",
# }
'''
springgreen
mediumspringgreen
mediumseagreen
'''
# groupPlot['DY_DY_PU_2Jets'] = {
# 'nameHR': 'DY PU 2 Jet',
# 'isSignal': 0,
# 'color': 'mediumseagreen',
# 'samples': ['DY_DY_PU_2Jets']
# }
groupPlot['DY_DY_PU'] = {
'nameHR': 'DY PU Jet',
'isSignal': 0,
'color': 'mediumspringgreen',
'samples': ['DY_DY_PU_1Jets', 'DY_DY_PU_2Jets']
}
groupPlot['DY_DY_hardJets'] = {
'nameHR': 'DY hard',
'isSignal': 0,
'color': 'limegreen',
'samples': ['DY_DY_hardJets']
}
groupPlot['Zjj'] = {
'nameHR': 'Zjj',
'isSignal': 1,
#'color': 'blue',
'color': 'cornflowerblue',
'samples': ['Zjj']
}
groupPlot['DATA'] = {
'nameHR' : 'Data',
'color': 1 ,
'isSignal' : 0,
'isData' : 1 ,
}
samples = {}
bins = -1
c_data = 0
err_bar_data = 0
flow = False
for histo in f['sr_tot']['DNN_output_scaled_histo'].keys():
if type(bins) == type(-1):
bins = f['sr_tot']['DNN_output_scaled_histo'][histo].to_numpy(flow)[1]
h = f['sr_tot']['DNN_output_scaled_histo'][histo].to_numpy(flow)[0]
e = np.power(f['sr_tot']['DNN_output_scaled_histo'][histo].errors(flow), 2)
found = False
#histoName = histo[len('histo_'):]
histoName = f['sr_tot']['DNN_output_scaled_histo'][histo].name[len('histo_'):]
for sampleName in groupPlot.keys():
if groupPlot[sampleName].get('isData', 0) == 1:
continue
if histoName in groupPlot[sampleName]['samples']:
if sampleName in samples.keys():
samples[sampleName]['h'] += h
samples[sampleName]['e2']+= e
else:
samples[sampleName] = {
'h': h,
'e2': e,
}
found = True
break
if histoName.lower() == 'data':
c_data = h
err_bar_data = np.sqrt(e)
elif not found:
print('Could not find', histoName, 'in groupPlot')
#print(bins.shape, c_data.shape)
blindRegion = bins[1:]>0.4
blindRegion = bins[1:]>2.0
c_data [blindRegion] = 0
err_bar_data [blindRegion] = 0
#print(list(groupPlot.keys()))
x0 = bins[0]
x1 = bins[-1]
print('\n\nx0:', x0, '\tx1:', x1, '\n\n')
y0 = 1.0
y1 = 10**4
ratio0=0.6
ratio1=1.4
tolerance = 1e-8
nuis_err2_up = np.zeros(len(list(samples.values())[0]['h']))
nuis_err2_do = np.zeros(len(list(samples.values())[0]['h']))
sig_nuis_err2_up = np.zeros(len(list(samples.values())[0]['h']))
sig_nuis_err2_do = np.zeros(len(list(samples.values())[0]['h']))
#thstack = np.zeros(len(list(samples.values())[0]['h']))
thstack = []
sig_thstack = []
c_mcs_nominal = []
sig_c_mcs_nominal = []
#nuis_err2_do = np.zeros(len(c_mcs_nominal[0][1]))
for sample in samples.keys():
if groupPlot[sample].get('isSignal', 0) == 1:
sig_nuis_err2_up += samples[sample]['e2']
sig_nuis_err2_do += samples[sample]['e2']
sig_thstack.append(samples[sample]['h'])
sig_c_mcs_nominal.append((sample, samples[sample]['h']))
else:
nuis_err2_up += samples[sample]['e2']
nuis_err2_do += samples[sample]['e2']
thstack.append(samples[sample]['h'])
c_mcs_nominal.append((sample, samples[sample]['h']))
stat_err = np.sqrt(nuis_err2_up)
sig_stat_err = np.sqrt(sig_nuis_err2_up)
# FIXME fill nuisances
nuis_err_up = np.sqrt(nuis_err2_up)
nuis_err_do = np.sqrt(nuis_err2_do)
sig_nuis_err_up = np.sqrt(sig_nuis_err2_up)
sig_nuis_err_do = np.sqrt(sig_nuis_err2_do)
#print(c_mcs_nominal[0][1])
#c_mcs_nominal = sorted(c_mcs_nominal, key=lambda k: np.sum(k[1]))
#print(list(groupPlot.keys()))
c_mcs_nominal = sorted(c_mcs_nominal, key=lambda k: list(groupPlot.keys()).index(k[0]), reverse=True)
sig_c_mcs_nominal = sorted(sig_c_mcs_nominal, key=lambda k: list(groupPlot.keys()).index(k[0]), reverse=True)
centers = (bins[:-1]+bins[1:])/2
#err_bar_x = (centers[1]-centers[0])/2
err_bar_x = 0
fig, ax = plt.subplots(3, 1, sharex=True, gridspec_kw={'height_ratios': [3,1,1]}, figsize=(10,10), dpi=100)#dpi=100
fig.tight_layout(pad=-0.5)
hep.cms.label('Work in progress', data=True, lumi=39, year=2016, ax=ax[0])
thstack_base = np.zeros(len(thstack[0]))
for i in range(len(c_mcs_nominal)-1, -1, -1):
name = groupPlot[c_mcs_nominal[i][0]]['nameHR']
color = groupPlot[c_mcs_nominal[i][0]]['color']
thstack_base += c_mcs_nominal[i][1]
integral = str(round(np.sum(c_mcs_nominal[i][1]),2))
ax[0].hist(centers, weights=thstack_base, bins=bins, label=name + f' [{integral}]', color=color,
edgecolor=get_darker_color(color), histtype='step', fill=True, linewidth=2, zorder=(-100+i))
for i in range(len(sig_c_mcs_nominal)-1, -1, -1):
name = groupPlot[sig_c_mcs_nominal[i][0]]['nameHR']
color = groupPlot[sig_c_mcs_nominal[i][0]]['color']
thstack_base += sig_c_mcs_nominal[i][1]
ax[0].hist(centers, weights=thstack_base, bins=bins, label=name + f' [{integral}]', color=color,
edgecolor=get_darker_color(color), histtype='step', fill=True, linewidth=2, zorder=(-200+i))
ax[0].hist(centers, weights=sig_c_mcs_nominal[i][1], bins=bins, color=color,
edgecolor=get_darker_color(color), histtype='step', fill=False, linewidth=2, zorder=1)
integral = str(round(np.sum(c_data),2))
ax[0].errorbar(centers, c_data, err_bar_data, err_bar_x, fmt='ko', markersize=6, ecolor='black', label='DATA'+ f' [{integral}]', zorder=10)
# signalName = ''
# for h in groupPlot.keys():
# if groupPlot[h].get('isSignal', 0) == 1:
# signalName = h
# break
# print(signalName)
# print(signalName == '')
# h_signal = list(filter(lambda k: k[0] == signalName, c_mcs_nominal))[0][1]
includeSigInTopBand = False
label = 'Stat + Syst'
if includeSigInTopBand:
mc = sum(thstack) + sum(sig_thstack)
top_err_band_up = mc.copy() + np.sqrt( np.power(nuis_err_up, 2) + np.power(sig_nuis_err_up, 2) )
top_err_band_do = mc.copy() - np.sqrt( np.power(nuis_err_do, 2) + np.power(sig_nuis_err_do, 2) )
else:
mc = sum(thstack)
top_err_band_up = mc.copy() + nuis_err_up
top_err_band_do = mc.copy() - nuis_err_do
label += ' on Bkg.'
print(top_err_band_do, mc-top_err_band_do)
x, top_err_band_up = make_error_band(bins, top_err_band_up)
_, top_err_band_do = make_error_band(bins, top_err_band_do)
ax[0].fill_between(x , top_err_band_do, top_err_band_up, hatch='///', color='darkgrey', zorder=9, alpha=1, facecolor='none', linewidth=0.0, label=label)
mc = sum(thstack)
# ratio=np.ones(len(mc))
# mask = mc>tolerance
# ratio[mask] = c_data[mask]/mc[mask]
# mask = (mc<tolerance) & (c_data>tolerance)
# ratio[mask] = 10
ratio = ratioFun(c_data, mc)
# # If outside the plot put on limit
# mask = ratio > ratio1
# ratio[mask] = ratio1
# mask = ratio < ratio0
# ratio[mask] = ratio0
# Data points in ratio error
# err_bar = np.ones(len(mc))
# mask = mc > tolerance
# err_bar[mask] = err_bar_data[mask] / mc[mask]
err_bar = ratioFun(err_bar_data, mc)
# err_band_up = np.ones(len(mc))
# mask = mc>tolerance
# err_band_up[mask] = nuis_err_up[mask]/mc[mask]
err_band_up = ratioFun(nuis_err_up, mc)
# err_band_do = np.ones(len(mc))
# mask = mc>tolerance
# err_band_do[mask] = nuis_err_do[mask]/mc[mask]
err_band_do = ratioFun(nuis_err_do, mc)
x, err_band_up = make_error_band(bins, err_band_up)
_, err_band_do = make_error_band(bins, err_band_do)
ax[1].fill_between(x , 1 - err_band_do, 1 + err_band_up, alpha=0.2, hatch='///', color='grey', label='Stat + Syst')
ax[1].errorbar(centers, ratio, err_bar, err_bar_x, fmt='ko', markersize=6, ecolor='black')
ax[1].plot(bins, np.ones(len(bins)), '-', color='black')
# stat err band
# err_band_up = np.ones(len(mc))
# mask = mc>tolerance
# err_band_up[mask] = stat_err[mask]/mc[mask]
err_band_up = ratioFun(stat_err, mc)
err_band_do = err_band_up.copy()
x, err_band_up = make_error_band(bins, err_band_up)
_, err_band_do = make_error_band(bins, err_band_do)
ax[1].fill_between(x , 1 - err_band_do, 1 + err_band_up, hatch='\\\\', color='red', facecolor='none', label='Stat only')
# Data / Pred.
mc = sum(thstack) + sum(sig_thstack)
# ratio=np.ones(len(mc))
# mask = mc>tolerance
# ratio[mask] = c_data[mask]/mc[mask]
# mask = (mc<tolerance) & (c_data>tolerance)
# ratio[mask] = 10
ratio = ratioFun(c_data, mc)
# mask = ratio > ratio1
# ratio[mask] = ratio1
# mask = ratio < ratio0
# ratio[mask] = ratio0
# Data points in ratio error
err_bar = ratioFun(err_bar_data, mc)
err_band_up = ratioFun(np.sqrt(np.power(nuis_err_up, 2) + np.power(sig_nuis_err_up, 2)), mc)
err_band_do = ratioFun(np.sqrt(np.power(nuis_err_do, 2) + np.power(sig_nuis_err_do, 2)), mc)
x, err_band_up = make_error_band(bins, err_band_up)
_, err_band_do = make_error_band(bins, err_band_do)
ax[2].fill_between(x , 1 - err_band_do, 1 + err_band_up, alpha=0.2, hatch='///', color='grey')
ax[2].errorbar(centers, ratio, err_bar, err_bar_x, fmt='ko', markersize=6, ecolor='black')
ax[2].plot(bins, np.ones(len(bins)), '-', color='black')
nuis_sym = (np.sqrt(np.power(nuis_err_up, 2) + np.power(sig_nuis_err_up, 2)) + np.sqrt(np.power(nuis_err_do, 2) + np.power(sig_nuis_err_do, 2)))/2
chi2 = np.sum(np.power(ratio-1, 2) / ( np.power(nuis_sym/mc, 2) + np.power(err_bar, 2)))
#chi2_v = (np.power(ratio-1, 2) / ( np.power(nuis_sym/mc, 2) + np.power(err_bar, 2)))
#print(chi2_v)
ax[2].plot([], [],' ', label=r'$ \chi ^2 _0 $=' + str(round(chi2/len(ratio),2)))
print('\n\nChi2:', chi2/len(ratio))
# stat only
# err_band_up = np.ones(len(mc))
# mask = mc>tolerance
# err_band_up[mask] = / mc[mask]
err_band_up = ratioFun(np.sqrt(np.power(stat_err, 2) + np.power(sig_stat_err, 2)), mc)
err_band_do = err_band_up.copy()
x, err_band_up = make_error_band(bins, err_band_up)
_, err_band_do = make_error_band(bins, err_band_do)
# ax[2].fill_between(x , 1 - err_band_do, 1 + err_band_up, hatch='\\\\', color='red', facecolor='none', label='Stat only')
ax[2].fill_between(x , 1 - err_band_do, 1 + err_band_up, hatch='\\\\', color='red', facecolor='none')
#ax[2].legend()
ax[2].legend(ncol=2, fontsize='x-small')
y0 = np.min(thstack[0])
y1 = max(np.max(mc), np.max(c_data))
eps = (y1-y0)/20
y0 -= eps
y0 = max(y0, 1)
y1 += eps*5
y0 = max(np.min(thstack), 1)
#y0 = 1e+1
print('\n\nMin on the y:', y0)
y1 = max(np.max(mc), np.max(c_data))*1e+2
# eps = (y1-y0)/20
# y0 -= eps
# y0 = max(y0, 1)
# y1 += eps*5
#y0 = 1
#y1 = 1e+9
ax[0].set_yscale('log')
ax[0].set_ylim(y0, y1)
ax[1].set_ylim(ratio0,ratio1)
ax[1].set_xlim(x0,x1)
ax[2].set_ylim(ratio0,ratio1)
ax[2].set_xlim(x0,x1)
# plt.draw()
# # check if upper limit in ratio on y-axis has a label (this will interfere with the upper pad)
# cond1 = ax[1].get_ylim()[1] in list(map(lambda k: k.get_position()[1], ax[1].get_yticklabels()))
# ax0_ticklabels_pos = list(map(lambda k: k.get_position()[1], ax[0].get_yticklabels()))
# # check if the lower limit in the upper pad on y-axis has a label
# cond2 = ax[0].get_ylim()[0] in ax0_ticklabels_pos
# if cond1 and cond2:
# index = ax0_ticklabels_pos.index(ax[0].get_ylim()[0])
# plt.setp(ax[0].get_yticklabels()[index], visible=False)
ax[1].legend(ncol=2, fontsize='x-small')
ax[0].set_ylabel('Events', fontsize=30)
#ax[1].set_ylabel('Data / Pred.', loc='center', fontsize=20)
ax[1].set_ylabel('Data / Bkg.', loc='center', fontsize=20)
ax[2].set_ylabel('Data / Pred.', loc='center', fontsize=20)
#ax[1].set_xlabel(r'$p^{T}_{j}$ [GeV]', fontsize=30)
ax[1].tick_params(axis='y', labelsize='x-small', labelleft=True)
ax[2].tick_params(axis='y', labelsize='x-small', labelleft=True)
ax[2].set_xlabel(r'DNN output', fontsize=30)
ax[0].legend(ncol=2, fontsize='x-small', loc='upper center')
fig.savefig("/eos/user/g/gpizzati/www/rdf/provaMPLHE_3pad.png", facecolor='white', pad_inches=0.1, bbox_inches='tight')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment