Skip to content

Instantly share code, notes, and snippets.

@giorgiopizz
Created June 18, 2024 09:43
Show Gist options
  • Save giorgiopizz/cd442c00a57fee88ff286e100741b986 to your computer and use it in GitHub Desktop.
Save giorgiopizz/cd442c00a57fee88ff286e100741b986 to your computer and use it in GitHub Desktop.
JERC with correctionlib
import json
import os
import sys
import awkward as ak
# import correctionlib._core as core
import correctionlib
import hist
import matplotlib as mpl
import numpy as np
from coffea.lookup_tools.correctionlib_wrapper import correctionlib_wrapper
from framework import read_chunks, read_events
mpl.use("Agg")
import matplotlib.pyplot as plt
fw_path = os.path.abspath("..")
h = hist.Hist(
hist.axis.Regular(30, 15, 100, name="pt"),
hist.axis.StrCategory([], name="category", growth=True),
hist.storage.Weight(),
)
chunk = {
"data": {
"dataset": "Zjj",
"filenames": [
"root://eos.cms.rcac.purdue.edu//store/mc/RunIISummer20UL18NanoAODv9/EWK_LLJJ_MLL-50_MJJ-120_TuneCP5_13TeV-madgraph-pythia8_dipole/NANOAODSIM/106X_upgrade2018_realistic_v16_L1v1-v1/40000/12AA1522-B165-FD4F-BE15-96606743AD6C.root",
],
"start": 0,
"stop": 600000,
"read_form": "mc",
},
"error": "",
"result": {},
"priority": 0,
"weight": 8,
}
events = read_events(chunk)
events["weight"] = ak.broadcast_arrays(events.genWeight, events.Jet.pt)[0]
h.fill(
pt=ak.flatten(events.Jet.pt), category="original", weight=ak.flatten(events.weight)
)
# original_pt = ak.copy(events.Jet.pt)
print(events)
originalEvents = ak.copy(events)
runnum = events.run << 20
# print(events.run, runnum)
luminum = events.luminosityBlock << 10
# print(events.luminosityBlock, luminum)
evtnum = events.event
# print(events.event, evtnum)
event_random_seed = 1 + runnum + evtnum + luminum
# print(event_random_seed)
# jet0eta = ak.mask(events.Jet.eta / 0.01, ak.num(events.Jet) > 0)
jet0eta = ak.pad_none(events.Jet.eta / 0.01, 1, clip=True)
jet0eta = ak.fill_none(jet0eta, 0.0)[:, 0]
jet0eta = ak.values_astype(jet0eta, int)
event_random_seed = 1 + runnum + evtnum + luminum + jet0eta
print(event_random_seed[:3])
events[("Jet", "trueGenJetIdx")] = ak.mask(
events.Jet.genJetIdx,
(events.Jet.genJetIdx >= 0) & (events.Jet.genJetIdx < ak.num(events.GenJet)),
)
events["pt_gen"] = ak.values_astype(
ak.fill_none(events.GenJet[events.Jet.trueGenJetIdx].pt, -1), np.float32
)
example_value_dict = {
"JetPt": events.Jet.pt,
"JetEta": events.Jet.eta,
"JetPhi": events.Jet.phi,
"JetA": events.Jet.area,
"Rho": ak.broadcast_arrays(events.fixedGridRhoFastjetAll, events.Jet.pt)[0],
"systematic": "nom",
"GenPt": events.pt_gen,
"EventID": event_random_seed,
}
def get_corr_inputs(input_dict, corr_obj):
"""
Helper function for getting values of input variables
given a dictionary and a correction object.
"""
input_values = [input_dict[inp.name] for inp in corr_obj.inputs]
return input_values
# example_value_dict = {
# # jet transverse momentum
# "JetPt": 100.0,
# # jet pseudorapidity
# "JetEta": 0.0,
# # jet azimuthal angle
# "JetPhi": 0.2,
# # jet area
# "JetA": 0.5,
# # median energy density (pileup)
# "Rho": 15.0,
# # systematic variation (only for JER SF)
# "systematic": "nom",
# # pT of matched gen-level jet (only for JER smearing)
# "GenPt": 80.0, # or -1 if no match
# # unique event ID used for deterministic
# # pseudorandom number generation (only for JER smearing)
# "EventID": 1237645,
# }
# JEC base tag
# jec = "Summer19UL16_V7_MC"
jec = "Summer19UL18_V5_MC"
# jet algorithms
algo = "AK4PFchs"
algo_ak8 = "AK8PFPuppi"
# jet energy correction level
lvl = "L2Relative"
# jet energy correction level
lvl_compound = "L1L2L3Res"
# jet energy uncertainty
unc = "Total"
# print input information
print("\n\nJEC parameters")
print("##############\n")
print("jec = {}".format(jec))
print("algo = {}".format(algo))
print("algo_ak8 = {}".format(algo_ak8))
for v in ("JetPt", "JetEta", "JetA", "JetPhi", "JetA", "Rho"):
print("{} = {}".format(v, example_value_dict[v]))
#
# load JSON files using correctionlib
#
# AK4
fname = os.path.abspath("../data/2018/clib/jet_jerc.json.gz")
print("\nLoading JSON file: {}".format(fname))
cset = correctionlib.CorrectionSet.from_file(fname)
# tool for JER smearing
fname_jersmear = os.path.abspath("../data/2018/clib/jer_smear.json.gz")
print("\nLoading JSON file: {}".format(fname_jersmear))
cset_jersmear = correctionlib.CorrectionSet.from_file(fname_jersmear)
import time
start = time.perf_counter()
key = "{}_{}_{}".format(jec, lvl_compound, algo)
sf_cset = cset.compound[key]
jet_map = {
"jet_pt": events.Jet.pt,
"jet_pt_raw": events.Jet.pt * (1.0 - events.Jet.rawFactor),
"jet_eta": events.Jet.eta,
"jet_phi": events.Jet.phi,
"jet_area": events.Jet.area,
"rho": ak.broadcast_arrays(events.fixedGridRhoFastjetAll, events.Jet.pt)[0],
"systematic": "nom",
"gen_pt": events.pt_gen,
"EventID": event_random_seed,
}
print([k.name for k in sf_cset.inputs])
sf = correctionlib_wrapper(sf_cset)(
jet_map["jet_area"],
jet_map["jet_eta"],
jet_map["jet_pt_raw"],
jet_map["rho"],
)
# print((events.Jet.pt - jet_map["jet_pt_raw"] * sf) / events.Jet.pt)
jet_map["jet_pt"] = jet_map["jet_pt_raw"] * sf
# https://twiki.cern.ch/twiki/bin/viewauth/CMS/JECUncertaintySources#Run_2_reduced_set_of_uncertainty
uncs = [
"Absolute",
"Absolute_2018",
"BBEC1",
"BBEC1_2018",
"EC2",
"EC2_2018",
"FlavorQCD",
"HF",
"HF_2018",
"RelativeBal",
"RelativeSample_2018",
]
d = {}
for unc in uncs[:]:
key = f"{jec}_Regrouped_{unc}_{algo}"
sf_unc_cset = cset[key]
sf_unc = correctionlib_wrapper(sf_unc_cset)(
jet_map["jet_eta"],
jet_map["jet_pt_raw"],
)
jet_map[f"jet_pt_{unc}_up"] = (sf + sf_unc) * jet_map["jet_pt_raw"]
jet_map[f"jet_pt_{unc}_down"] = (sf - sf_unc) * jet_map["jet_pt_raw"]
h.fill(
pt=ak.flatten(jet_map["jet_pt"]),
category="corr",
weight=ak.flatten(events.weight),
)
jer = "Summer19UL18_JRV2_MC"
print("jer")
for variation_name, jet_pt_name in zip(["JER_up", "JER_down"], ["jet_pt", "jet_pt"]):
key = "{}_{}_{}".format(jer, "ScaleFactor", algo)
sf_cset_jer = cset[key]
sf_jer = correctionlib_wrapper(sf_cset_jer)(
jet_map["jet_eta"],
variation_name.split("_")[-1],
)
key = "{}_{}_{}".format(jer, "PtResolution", algo)
ptres_jer_cset = cset[key]
ptres_jer = correctionlib_wrapper(ptres_jer_cset)(
jet_map["jet_eta"],
jet_map[jet_pt_name],
jet_map["rho"],
)
key_jersmear = "JERSmear"
sf_jersmear_cset = cset_jersmear[key_jersmear]
# add previously obtained JER/JERSF values to inputs
sf_input_names = [inp.name for inp in sf_jersmear_cset.inputs]
sf_jersmear = correctionlib_wrapper(sf_jersmear_cset)(
jet_map[jet_pt_name],
jet_map["jet_eta"],
jet_map["gen_pt"],
jet_map["rho"],
jet_map["EventID"],
ptres_jer,
sf_jer,
)
no_jer_mask = (
(jet_map[jet_pt_name] < 50)
& (abs(jet_map["jet_eta"]) >= 2.8)
& (abs(jet_map["jet_eta"]) <= 3.0)
)
jet_map[jet_pt_name + "_" + variation_name] = (
ak.where(no_jer_mask, 1.0, sf_jersmear) * jet_map[jet_pt_name]
)
for variation_name, jet_pt_name in zip(
["nom", "Absolute_up"], ["jet_pt", "jet_pt_Absolute_up"]
):
key = "{}_{}_{}".format(jer, "ScaleFactor", algo)
sf_cset_jer = cset[key]
sf_jer = correctionlib_wrapper(sf_cset_jer)(
jet_map["jet_eta"],
"nom",
)
key = "{}_{}_{}".format(jer, "PtResolution", algo)
ptres_jer_cset = cset[key]
ptres_jer = correctionlib_wrapper(ptres_jer_cset)(
jet_map["jet_eta"],
jet_map[jet_pt_name],
jet_map["rho"],
)
key_jersmear = "JERSmear"
sf_jersmear_cset = cset_jersmear[key_jersmear]
# add previously obtained JER/JERSF values to inputs
sf_input_names = [inp.name for inp in sf_jersmear_cset.inputs]
sf_jersmear = correctionlib_wrapper(sf_jersmear_cset)(
jet_map[jet_pt_name],
jet_map["jet_eta"],
jet_map["gen_pt"],
jet_map["rho"],
jet_map["EventID"],
ptres_jer,
sf_jer,
)
no_jer_mask = (
(jet_map[jet_pt_name] < 50)
& (abs(jet_map["jet_eta"]) >= 2.8)
& (abs(jet_map["jet_eta"]) <= 3.0)
)
jet_map[jet_pt_name] = (
ak.where(no_jer_mask, 1.0, sf_jersmear) * jet_map[jet_pt_name]
)
h.fill(
pt=ak.flatten(jet_map["jet_pt_Absolute_up"]),
category="Absolute_up",
weight=ak.flatten(events.weight),
)
h.fill(
pt=ak.flatten(jet_map["jet_pt"]),
category="jer",
weight=ak.flatten(events.weight),
)
h.fill(
pt=ak.flatten(jet_map["jet_pt_JER_up"]),
category="JER_up",
weight=ak.flatten(events.weight),
)
print("finished", time.perf_counter() - start)
start = time.perf_counter()
print("begin coffea")
import variation as variation_module
from modules.jme import correct_jets, getJetCorrections
with open(f"{fw_path}/data/cfg.json") as file:
cfg = json.load(file)
jec_stack = getJetCorrections(cfg)
variations = variation_module.Variation()
variations.register_variation([], "nom")
events, variations = correct_jets(
originalEvents,
variations,
jec_stack,
run_variations=True,
)
h.fill(
pt=ak.flatten(events.Jet.pt),
category="jer2",
weight=ak.flatten(events.weight),
)
h.fill(
pt=ak.flatten(events.Jet.pt_JES_Absolute_up),
category="Absolute_up_2",
weight=ak.flatten(events.weight),
)
h.fill(
pt=ak.flatten(events.Jet.pt_JER_up),
category="JER_up_2",
weight=ak.flatten(events.weight),
)
print("done coffea")
print("finished", time.perf_counter() - start)
start = time.perf_counter()
def fold(h):
_h = h.copy()
a = _h.view(True)
a.value[1, :] = a.value[1, :] + a.value[0, :]
a.value[0, :] = 0
a.value[-2, :] = a.value[-2, :] + a.value[-1, :]
a.value[-1, :] = 0
a.variance[1, :] = a.variance[1, :] + a.variance[0, :]
a.variance[0, :] = 0
a.variance[-2, :] = a.variance[-2, :] + a.variance[-1, :]
a.variance[-1, :] = 0
return _h
h = fold(h)
fig, ax = plt.subplots(7, 1, sharex=True, figsize=(10, 10))
ax[0].stairs(
h[:, hist.loc("jer")].values() / h[:, hist.loc("corr")].values(),
h.axes[0].edges,
baseline=1.0,
label="New JER",
)
ax[0].stairs(
h[:, hist.loc("jer2")].values() / h[:, hist.loc("corr")].values(),
h.axes[0].edges,
baseline=1.0,
label="Old JER",
)
ax[1].stairs(
h[:, hist.loc("Absolute_up")].values() / h[:, hist.loc("corr")].values(),
h.axes[0].edges,
baseline=1.0,
label="JES new / JEC",
)
ax[1].stairs(
h[:, hist.loc("Absolute_up_2")].values() / h[:, hist.loc("corr")].values(),
h.axes[0].edges,
baseline=1.0,
label="JES old / JEC",
)
ax[1].plot(
h.axes[0].edges,
np.ones_like(h.axes[0].edges),
color="black",
)
ax[1].legend()
ax[2].stairs(
h[:, hist.loc("jer2")].values() / h[:, hist.loc("jer")].values(),
h.axes[0].edges,
baseline=1.0,
label="New JER / Old JER",
)
ax[2].plot(
h.axes[0].edges,
np.ones_like(h.axes[0].edges),
color="black",
)
ax[3].stairs(
h[:, hist.loc("corr")].values() / h[:, hist.loc("original")].values(),
h.axes[0].edges,
baseline=1.0,
label="JEC",
)
ax[4].stairs(
h[:, hist.loc("Absolute_up")].values() / h[:, hist.loc("Absolute_up_2")].values(),
h.axes[0].edges,
baseline=1.0,
label="JES/JES",
)
ax[4].plot(
h.axes[0].edges,
np.ones_like(h.axes[0].edges),
color="black",
)
ax[5].stairs(
h[:, hist.loc("Absolute_up")].values() / h[:, hist.loc("jer")].values(),
h.axes[0].edges,
baseline=1.0,
label="JEC new / JER",
)
ax[5].stairs(
h[:, hist.loc("Absolute_up_2")].values() / h[:, hist.loc("jer")].values(),
h.axes[0].edges,
baseline=1.0,
label="JES old / JER",
)
ax[5].plot(
h.axes[0].edges,
np.ones_like(h.axes[0].edges),
color="black",
)
ax[6].stairs(
h[:, hist.loc("JER_up")].values() / h[:, hist.loc("jer")].values(),
h.axes[0].edges,
baseline=1.0,
label="JER up new / JER",
)
ax[6].stairs(
h[:, hist.loc("JER_up_2")].values() / h[:, hist.loc("jer")].values(),
h.axes[0].edges,
baseline=1.0,
label="JER up old / JER",
)
ax[6].plot(
h.axes[0].edges,
np.ones_like(h.axes[0].edges),
color="black",
)
ax[6].legend()
plt.legend()
plt.savefig("test.png")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment