Skip to content

Instantly share code, notes, and snippets.

@andrzejnovak
Created February 11, 2024 15:58
Show Gist options
  • Save andrzejnovak/282afc381813a262231405c1e95a2c29 to your computer and use it in GitHub Desktop.
Save andrzejnovak/282afc381813a262231405c1e95a2c29 to your computer and use it in GitHub Desktop.
from __future__ import print_function, division
import sys
import os
import rhalphalib as rl
import numpy as np
import scipy.stats
import pickle
import ROOT
rl.util.install_roofit_helpers()
rl.ParametericSample.PreferRooParametricHist = False
def expo_sample(norm, scale, obs):
cdf = scipy.stats.expon.cdf(scale=scale, x=obs.binning) * norm
return (np.diff(cdf), obs.binning, obs.name)
def gaus_sample(norm, loc, scale, obs):
cdf = scipy.stats.norm.cdf(loc=loc, scale=scale, x=obs.binning) * norm
return (np.diff(cdf), obs.binning, obs.name)
def test_rhalphabet(tmpdir):
throwPoisson = False
jec = rl.NuisanceParameter("CMS_jec", "lnN")
massScale = rl.NuisanceParameter("CMS_msdScale", "shape")
lumi = rl.NuisanceParameter("CMS_lumi", "lnN")
tqqeffSF = rl.IndependentParameter("tqqeffSF", 1.0, 0, 10)
tqqnormSF = rl.IndependentParameter("tqqnormSF", 1.0, 0, 10)
ptbins = np.array([525, 575, 625, 700, 800, 1500])
npt = len(ptbins) - 1
msdbins = np.linspace(40, 400, 40)
msd = rl.Observable("msd", msdbins)
# here we derive these all at once with 2D array
ptpts, msdpts = np.meshgrid(ptbins[:-1] + 0.3 * np.diff(ptbins), msdbins[:-1] + 0.5 * np.diff(msdbins), indexing="ij")
rhopts = 2 * np.log(msdpts / ptpts)
ptscaled = (ptpts - 525.0) / (1500.0 - 525.0)
rhoscaled = (rhopts - (-5.5)) / ((-2.) - (-5.5))
validbins = (rhoscaled >= 0) & (rhoscaled <= 1)
rhoscaled[~validbins] = 1 # we will mask these out later
# Build qcd MC pass+fail model and fit to polynomial
qcdmodel = rl.Model("qcdmodel")
print("XX")
qcdpass, qcdfail = 0.0, 0.0
import pandas as pd
df = pd.read_csv("histograms.csv")
for ptbin in range(npt):
failCh = rl.Channel("ptbin%d%s" % (ptbin, "fail"))
passCh = rl.Channel("ptbin%d%s" % (ptbin, "pass"))
qcdmodel.addChannel(failCh)
qcdmodel.addChannel(passCh)
# mock template
ptnorm = 1
# failTempl = expo_sample(norm=ptnorm * 1e5, scale=40, obs=msd)
# passTempl = expo_sample(norm=ptnorm * 1e3, scale=40, obs=msd)
pyields = df[f"QCD_msd_fail_{ptbin}"].to_numpy()
fyields = df[f"QCD_msd_pass_{ptbin}"].to_numpy()
func, punc = np.sqrt(pyields), np.sqrt(fyields)
failTempl = (pyields, msd.binning, "msd", func) #expo_sample(norm=ptnorm * 1e5, scale=40, obs=msd)
passTempl = (fyields, msd.binning, "msd", punc) #expo_sample(norm=ptnorm * 1e3, scale=40, obs=msd)
# passTempl[0][0:3] = 0
failCh.setObservation(failTempl, read_sumw2=True)
passCh.setObservation(passTempl, read_sumw2=True)
qcdfail += failCh.getObservation()[0].sum()
qcdpass += passCh.getObservation()[0].sum()
qcdeff = qcdpass / qcdfail
tf_MCtempl = rl.BernsteinPoly("tf_MCtempl", (1, 1), ["pt", "rho"], limits=(0, 10))
tf_MCtempl_params = qcdeff * tf_MCtempl(ptscaled, rhoscaled)
for ptbin in range(npt):
failCh = qcdmodel["ptbin%dfail" % ptbin]
passCh = qcdmodel["ptbin%dpass" % ptbin]
failObs = failCh.getObservation()[0]
qcdparams = np.array([rl.IndependentParameter("qcdparam_ptbin%d_msdbin%d" % (ptbin, i), 0) for i in range(msd.nbins)])
sigmascale = 10.0
scaledparams = failObs * (1 + sigmascale / np.maximum(1.0, np.sqrt(failObs))) ** qcdparams
fail_qcd = rl.ParametericSample("ptbin%dfail_qcd" % ptbin, rl.Sample.BACKGROUND, msd, scaledparams)
failCh.addSample(fail_qcd)
pass_qcd = rl.TransferFactorSample("ptbin%dpass_qcd" % ptbin, rl.Sample.BACKGROUND, tf_MCtempl_params[ptbin, :], fail_qcd)
passCh.addSample(pass_qcd)
failCh.mask = validbins[ptbin]
passCh.mask = validbins[ptbin]
qcdfit_ws = ROOT.RooWorkspace("qcdfit_ws")
simpdf, obs = qcdmodel.renderRoofit(qcdfit_ws)
qcdfit = simpdf.fitTo(
obs,
ROOT.RooFit.Extended(True),
ROOT.RooFit.SumW2Error(True),
ROOT.RooFit.Strategy(2),
ROOT.RooFit.Save(),
ROOT.RooFit.Minimizer("Minuit2", "migrad"),
ROOT.RooFit.PrintLevel(-1),
)
qcdfit_ws.add(qcdfit)
if "pytest" not in sys.modules:
qcdfit_ws.writeToFile(os.path.join(str(tmpdir), "testModel_qcdfit.root"))
if qcdfit.status() != 0:
raise RuntimeError("Could not fit qcd")
param_names = [p.name for p in tf_MCtempl.parameters.reshape(-1)]
decoVector = rl.DecorrelatedNuisanceVector.fromRooFitResult(tf_MCtempl.name + "_deco", qcdfit, param_names)
tf_MCtempl.parameters = decoVector.correlated_params.reshape(tf_MCtempl.parameters.shape)
tf_MCtempl_params_final = tf_MCtempl(ptscaled, rhoscaled)
tf_dataResidual = rl.BernsteinPoly("tf_dataResidual", (2, 2), ["pt", "rho"], limits=(0, 10))
tf_dataResidual_params = tf_dataResidual(ptscaled, rhoscaled)
tf_params = qcdeff * tf_MCtempl_params_final * tf_dataResidual_params
# build actual fit model now
model = rl.Model("testModel")
for ptbin in range(npt):
for region in ["pass", "fail"]:
ch = rl.Channel("ptbin%d%s" % (ptbin, region))
model.addChannel(ch)
isPass = region == "pass"
ptnorm = 1.0
templates = {
"wqq": gaus_sample(norm=ptnorm * (100 if isPass else 300), loc=80, scale=8, obs=msd),
"zqq": gaus_sample(norm=ptnorm * (200 if isPass else 100), loc=91, scale=8, obs=msd),
"tqq": gaus_sample(norm=ptnorm * (40 if isPass else 80), loc=150, scale=20, obs=msd),
"hqq": gaus_sample(norm=ptnorm * (20 if isPass else 5), loc=125, scale=8, obs=msd),
"hqq100": gaus_sample(norm=ptnorm * (20 if isPass else 5), loc=100, scale=2, obs=msd),
"hqq125": gaus_sample(norm=ptnorm * (20 if isPass else 5), loc=125, scale=2, obs=msd),
"hqq150": gaus_sample(norm=ptnorm * (20 if isPass else 5), loc=150, scale=2, obs=msd),
}
for sName in ["zqq", "wqq", "tqq", "hqq", "hqq125", "hqq100", "hqq150"]:
# some mock expectations
templ = templates[sName]
stype = rl.Sample.SIGNAL if "hqq" in sName else rl.Sample.BACKGROUND
sample = rl.TemplateSample(ch.name + "_" + sName, stype, templ, mass=True)
# mock systematics
jecup_ratio = np.random.normal(loc=1, scale=0.05, size=msd.nbins)
msdUp = np.linspace(0.9, 1.1, msd.nbins)
msdDn = np.linspace(1.2, 0.8, msd.nbins)
# for jec we set lnN prior, shape will automatically be converted to norm systematic
sample.setParamEffect(jec, jecup_ratio)
sample.setParamEffect(massScale, msdUp, msdDn)
sample.setParamEffect(lumi, 1.027)
ch.addSample(sample)
# make up a data_obs, with possibly different yield values
templates = {
"wqq": gaus_sample(norm=ptnorm * (100 if isPass else 300), loc=80, scale=8, obs=msd),
"zqq": gaus_sample(norm=ptnorm * (200 if isPass else 100), loc=91, scale=8, obs=msd),
"tqq": gaus_sample(norm=ptnorm * (40 if isPass else 80), loc=150, scale=20, obs=msd),
"hqq": gaus_sample(norm=ptnorm * (20 if isPass else 5), loc=125, scale=8, obs=msd),
"qcd": expo_sample(norm=ptnorm * (1e3 if isPass else 1e5), scale=40, obs=msd),
}
yields = sum(tpl[0] for tpl in templates.values())
if throwPoisson:
yields = np.random.poisson(yields)
data_obs = (yields, msd.binning, msd.name)
ch.setObservation(data_obs)
# drop bins outside rho validity
mask = validbins[ptbin]
# blind bins 11, 12, 13
# mask[11:14] = False
ch.mask = mask
for ptbin in range(npt):
failCh = model["ptbin%dfail" % ptbin]
passCh = model["ptbin%dpass" % ptbin]
qcdparams = np.array([rl.IndependentParameter("qcdparam_ptbin%d_msdbin%d" % (ptbin, i), 0) for i in range(msd.nbins)])
initial_qcd = failCh.getObservation().astype(float) # was integer, and numpy complained about subtracting float from it
for sample in failCh:
initial_qcd -= sample.getExpectation(nominal=True)
if np.any(initial_qcd < 0.0):
raise ValueError("initial_qcd negative for some bins..", initial_qcd)
sigmascale = 10 # to scale the deviation from initial
scaledparams = initial_qcd * (1 + sigmascale / np.maximum(1.0, np.sqrt(initial_qcd))) ** qcdparams
fail_qcd = rl.ParametericSample("ptbin%dfail_qcd" % ptbin, rl.Sample.BACKGROUND, msd, scaledparams)
failCh.addSample(fail_qcd)
pass_qcd = rl.TransferFactorSample("ptbin%dpass_qcd" % ptbin, rl.Sample.BACKGROUND, tf_params[ptbin, :], fail_qcd)
passCh.addSample(pass_qcd)
tqqpass = passCh["tqq"]
tqqfail = failCh["tqq"]
tqqPF = tqqpass.getExpectation(nominal=True).sum() / tqqfail.getExpectation(nominal=True).sum()
tqqpass.setParamEffect(tqqeffSF, 1 * tqqeffSF)
tqqfail.setParamEffect(tqqeffSF, (1 - tqqeffSF) * tqqPF + 1)
tqqpass.setParamEffect(tqqnormSF, 1 * tqqnormSF)
tqqfail.setParamEffect(tqqnormSF, 1 * tqqnormSF)
# Fill in muon CR
for region in ["pass", "fail"]:
ch = rl.Channel("muonCR%s" % (region,))
model.addChannel(ch)
isPass = region == "pass"
templates = {
"tqq": gaus_sample(norm=10 * (30 if isPass else 60), loc=150, scale=20, obs=msd),
"qcd": expo_sample(norm=10 * (5e2 if isPass else 1e3), scale=40, obs=msd),
}
for sName, templ in templates.items():
stype = rl.Sample.BACKGROUND
sample = rl.TemplateSample(ch.name + "_" + sName, stype, templ)
# mock systematics
jecup_ratio = np.random.normal(loc=1, scale=0.05, size=msd.nbins)
sample.setParamEffect(jec, jecup_ratio)
ch.addSample(sample)
# make up a data_obs
templates = {
"tqq": gaus_sample(norm=10 * (30 if isPass else 60), loc=150, scale=20, obs=msd),
"qcd": expo_sample(norm=10 * (5e2 if isPass else 1e3), scale=40, obs=msd),
}
yields = sum(tpl[0] for tpl in templates.values())
if throwPoisson:
yields = np.random.poisson(yields)
data_obs = (yields, msd.binning, msd.name)
ch.setObservation(data_obs)
tqqpass = model["muonCRpass_tqq"]
tqqfail = model["muonCRfail_tqq"]
tqqPF = tqqpass.getExpectation(nominal=True).sum() / tqqfail.getExpectation(nominal=True).sum()
tqqpass.setParamEffect(tqqeffSF, 1 * tqqeffSF)
tqqfail.setParamEffect(tqqeffSF, (1 - tqqeffSF) * tqqPF + 1)
tqqpass.setParamEffect(tqqnormSF, 1 * tqqnormSF)
tqqfail.setParamEffect(tqqnormSF, 1 * tqqnormSF)
with open(os.path.join(str(tmpdir), "testModel.pkl"), "wb") as fout:
pickle.dump(model, fout)
model.renderCombine(os.path.join(str(tmpdir), "testModel"))
def test_monojet(tmpdir):
model = rl.Model("testMonojet")
# lumi = rl.NuisanceParameter('CMS_lumi', 'lnN')
jec = rl.NuisanceParameter("CMS_jec", "shape")
ele_id_eff = rl.NuisanceParameter("CMS_ele_id_eff", "shape")
pho_id_eff = rl.NuisanceParameter("CMS_pho_id_eff", "shape")
gamma_to_z_ewk = rl.NuisanceParameter("Theory_gamma_z_ewk", "shape")
recoilbins = np.linspace(300, 1200, 13)
recoil = rl.Observable("recoil", recoilbins)
signalCh = rl.Channel("signalCh")
model.addChannel(signalCh)
zvvTemplate = expo_sample(1000, 400, recoil)
zvvJetsMC = rl.TemplateSample("zvvJetsMC", rl.Sample.BACKGROUND, zvvTemplate)
zvvJetsMC.setParamEffect(jec, np.random.normal(loc=1, scale=0.01, size=recoil.nbins))
# these parameters are large, should probably log-transform them
zvvBinYields = np.array([rl.IndependentParameter("tmp", b, 0, zvvTemplate[0].max() * 2) for b in zvvTemplate[0]]) # name will be changed by ParametericSample
zvvJets = rl.ParametericSample("signalCh_zvvJets", rl.Sample.BACKGROUND, recoil, zvvBinYields)
signalCh.addSample(zvvJets)
dmTemplate = expo_sample(100, 800, recoil)
dmSample = rl.TemplateSample("signalCh_someDarkMatter", rl.Sample.SIGNAL, dmTemplate)
signalCh.addSample(dmSample)
signalCh.setObservation(expo_sample(1000, 400, recoil))
zllCh = rl.Channel("zllCh")
model.addChannel(zllCh)
zllTemplate = expo_sample(1000 * 6.6 / 20, 400, recoil)
zllJetsMC = rl.TemplateSample("zllJetsMC", rl.Sample.BACKGROUND, zllTemplate)
zllJetsMC.setParamEffect(jec, np.random.normal(loc=1, scale=0.05, size=recoil.nbins))
zllJetsMC.setParamEffect(ele_id_eff, np.random.normal(loc=1, scale=0.02, size=recoil.nbins), np.random.normal(loc=1, scale=0.02, size=recoil.nbins))
zllTransferFactor = zllJetsMC.getExpectation() / zvvJetsMC.getExpectation()
zllJets = rl.TransferFactorSample("zllCh_zllJets", rl.Sample.BACKGROUND, zllTransferFactor, zvvJets)
zllCh.addSample(zllJets)
otherbkgTemplate = expo_sample(200, 250, recoil)
otherbkg = rl.TemplateSample("zllCh_otherbkg", rl.Sample.BACKGROUND, otherbkgTemplate)
otherbkg.setParamEffect(jec, np.random.normal(loc=1, scale=0.01, size=recoil.nbins))
zllCh.addSample(otherbkg)
zllCh.setObservation(expo_sample(1200, 380, recoil))
gammaCh = rl.Channel("gammaCh")
model.addChannel(gammaCh)
gammaTemplate = expo_sample(2000, 450, recoil)
gammaJetsMC = rl.TemplateSample("gammaJetsMC", rl.Sample.BACKGROUND, gammaTemplate)
gammaJetsMC.setParamEffect(jec, np.random.normal(loc=1, scale=0.05, size=recoil.nbins))
gammaJetsMC.setParamEffect(pho_id_eff, np.random.normal(loc=1, scale=0.02, size=recoil.nbins))
gammaTransferFactor = gammaJetsMC.getExpectation() / zvvJetsMC.getExpectation()
gammaJets = rl.TransferFactorSample("gammaCh_gammaJets", rl.Sample.BACKGROUND, gammaTransferFactor, zvvJets)
gammaJets.setParamEffect(gamma_to_z_ewk, np.linspace(1.01, 1.05, recoil.nbins))
gammaCh.addSample(gammaJets)
gammaCh.setObservation(expo_sample(2000, 450, recoil))
with open(os.path.join(str(tmpdir), "monojetModel.pkl"), "wb") as fout:
pickle.dump(model, fout)
model.renderCombine(os.path.join(str(tmpdir), "monojetModel"))
if __name__ == "__main__":
if not os.path.exists("tmp"):
os.mkdir("tmp")
test_rhalphabet("tmp")
test_monojet("tmp")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment