Created
February 12, 2024 14:59
-
-
Save giorgiopizz/48629ee8561c3e254c26e59d1957486a to your computer and use it in GitHub Desktop.
mkDatacards.py updated
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import sys | |
import ROOT | |
import optparse | |
import collections | |
import os.path | |
import shutil | |
ROOT.gROOT.SetBatch(True) | |
argv = sys.argv | |
sys.argv = argv[:1] | |
# ----------------------------------------------------- DatacardFactory -------------------------------------- | |
class DatacardFactory: | |
# _logger = logging.getLogger('DatacardFactory') | |
# _____________________________________________________________________________ | |
def __init__(self): | |
self._fileIn = None | |
self._skipMissingNuisance = False | |
self.onlyVariable = None | |
self.onlyCut = None | |
# _____________________________________________________________________________ | |
# a datacard for each "cut" and each "variable" will be produced, in separate sub-folders, names after "cut/variable" | |
# _____________________________________________________________________________ | |
def makeDatacards( | |
self, | |
inputFile, | |
outputDirDatacard, | |
variables, | |
cuts, | |
samples, | |
structureFile, | |
nuisances, | |
): | |
print("=======================") | |
print("==== makeDatacards ====") | |
print("=======================") | |
if os.path.isdir(inputFile): | |
# ONLY COMPATIBLE WITH OUTPUTS MERGED TO SAMPLE LEVEL!! | |
self._fileIn = {} | |
for sampleName in samples: | |
self._fileIn[sampleName] = ROOT.TFile.Open( | |
inputFile + "/plots_%s_ALL_%s.root" % (self._tag, sampleName) | |
) | |
if not self._fileIn[sampleName]: | |
raise RuntimeError( | |
"Input file for sample " + sampleName + " missing" | |
) | |
else: | |
self._fileIn = ROOT.TFile(inputFile, "READ") | |
ROOT.gROOT.GetListOfFiles().Remove(self._fileIn) # Speed up | |
# categorize the sample names | |
signal_ids = ( | |
collections.OrderedDict() | |
) # id map of alternative_signals + cumulative_signals | |
alternative_signals = [""] | |
cumulative_signals = [] | |
data = [] | |
background_ids = collections.OrderedDict() | |
# divide the list of samples among signal, background and data | |
isig = 0 | |
ibkg = 1 | |
for sampleName in samples: | |
if sampleName not in list(structureFile.keys()): continue | |
if structureFile[sampleName]["isSignal"] != 0: | |
signal_ids[sampleName] = isig | |
isig -= 1 | |
if structureFile[sampleName]["isSignal"] == 2: | |
alternative_signals.append(sampleName) | |
if structureFile[sampleName]["isSignal"] == 1: | |
cumulative_signals.append(sampleName) | |
if structureFile[sampleName]["isData"] == 1: | |
data.append(sampleName) | |
if ( | |
structureFile[sampleName]["isSignal"] == 0 | |
and structureFile[sampleName]["isData"] == 0 | |
): | |
background_ids[sampleName] = ibkg | |
ibkg += 1 | |
print("Number of Signals: " + str(len(signal_ids))) | |
print("Number of Alternative Signals: " + str(len(alternative_signals) - 1)) | |
print("Number of Cumulative Signals: " + str(len(cumulative_signals))) | |
print("Number of Backgrounds: " + str(len(background_ids))) | |
if not os.path.isdir(outputDirDatacard + "/"): | |
os.makedirs(outputDirDatacard + "/", exist_ok=True) | |
# loop over cuts. One directory per cut will be created | |
for cutName in cuts: | |
if self.onlyCut and self.onlyCut != cutName: continue | |
print("cut = ", cutName) | |
# try: | |
# shutil.rmtree(outputDirDatacard + "/" + cutName) | |
# except OSError: | |
# pass | |
os.makedirs(outputDirDatacard + "/" + cutName, exist_ok=True) | |
# | |
# prepare the signals and background list of samples | |
# after removing the ones not to be used in this specific phase space | |
# | |
cut_signals = [] | |
cut_backgrounds = [] | |
for snameList, idmap in [ | |
(cut_signals, signal_ids), | |
(cut_backgrounds, background_ids), | |
]: | |
for sampleName in idmap: | |
if ( | |
"removeFromCuts" in structureFile[sampleName] | |
and cutName in structureFile[sampleName]["removeFromCuts"] | |
): | |
# remove from the list | |
print(" remove ", sampleName, " from ", cutName) | |
else: | |
snameList.append(sampleName) | |
print(" sampleNames = ", cut_signals + cut_backgrounds) | |
# loop over variables | |
for variableName, variable in variables.items(): | |
if "cuts" in variable and cutName not in variable["cuts"]: | |
continue | |
if self.onlyVariable and self.onlyVariable != variableName: continue | |
print(" variableName = ", variableName) | |
tagNameToAppearInDatacard = cutName | |
# e.g. hww2l2v_13TeV_of0j | |
# to be defined in cuts.py | |
os.makedirs(outputDirDatacard + "/" + cutName + "/" + variableName, exist_ok=True) | |
os.makedirs( | |
outputDirDatacard + "/" + cutName + "/" + variableName + "/shapes/", exist_ok=True | |
) # and the folder for the root files | |
self._outFile = ROOT.TFile.Open( | |
outputDirDatacard | |
+ "/" | |
+ cutName | |
+ "/" | |
+ variableName | |
+ "/shapes/histos_" | |
+ tagNameToAppearInDatacard | |
+ ".root", | |
"recreate", | |
) | |
ROOT.gROOT.GetListOfFiles().Remove(self._outFile) | |
# prepare yields | |
yieldsSig = {} | |
yieldsBkg = {} | |
yieldsData = {} | |
# Bin to be killed | |
# killBinSig = {} | |
# killBinBkg = {} | |
for snameList, yields in [ | |
(cut_signals, yieldsSig), | |
(cut_backgrounds, yieldsBkg), | |
(data, yieldsData), | |
]: | |
for sampleName in snameList: | |
histo = self._getHisto(cutName, variableName, sampleName) | |
histo.SetDirectory(self._outFile) | |
# get the integral == rate from histogram | |
if structureFile[sampleName]["isData"] == 1: | |
yields["data"] = histo.Integral() | |
histo.SetName("histo_Data") | |
else: | |
# if 'removeNegNomVal' in structureFile[sampleName] and structureFile[sampleName]['removeNegNomVal'] : | |
# for iBin in range(0,histo.GetNbinsX()+2) : | |
# BinContent = histo.GetBinContent(iBin) | |
# if BinContent < 0.1 and not BinContent == 0: | |
# print('Killing Bin :' , sampleName , iBin , BinContent) | |
# if not sampleName in killBinSig : killBinSig[sampleName] = [] | |
# killBinSig[sampleName].append(iBin) | |
# histo.SetBinContent(iBin,0.) | |
if "scaleSampleForDatacard" in structure[sampleName]: | |
scaleFactor = 1.0 | |
if ( | |
type( | |
structure[sampleName]["scaleSampleForDatacard"] | |
) | |
is dict | |
): | |
try: | |
scaleFactor = structure[sampleName][ | |
"scaleSampleForDatacard" | |
][cutName] | |
except: | |
pass | |
if ( | |
type( | |
structure[sampleName]["scaleSampleForDatacard"] | |
) | |
is int | |
or type( | |
structure[sampleName]["scaleSampleForDatacard"] | |
) | |
is float | |
): | |
scaleFactor = structure[sampleName][ | |
"scaleSampleForDatacard" | |
] | |
histo.Scale(scaleFactor) | |
yields[sampleName] = histo.Integral() | |
# | |
# Remove statistical uncertainty from the histogram | |
# This may be used to suppress the effect on "autoMCstat" of a specific | |
# sample, by means of putting its uncertainty to 0 | |
# | |
if ( | |
"removeStatUnc" in structureFile[sampleName] | |
and structureFile[sampleName]["removeStatUnc"] | |
): | |
self._removeStatUncertainty(histo) | |
self._outFile.cd() | |
histo.Write() | |
# Loop over alternative signal samples. One card per signal (there is always at least one entry ("")) | |
for signalName in alternative_signals: | |
if signalName == "": | |
signals = [ | |
name for name in cumulative_signals if name in cut_signals | |
] | |
else: | |
if signalName not in cut_signals: | |
continue | |
signals = [signalName] + [ | |
name for name in cumulative_signals if name in cut_signals | |
] | |
processes = signals + cut_backgrounds | |
print(" Alternative signal: " + str(signalName)) | |
alternativeSignalName = str(signalName) | |
alternativeSignalTitle = "" | |
if alternativeSignalName != "": | |
alternativeSignalTitle = "_" + str(signalName) | |
# start creating the datacard | |
cardPath = ( | |
outputDirDatacard | |
+ "/" | |
+ cutName | |
+ "/" | |
+ variableName | |
+ "/datacard" | |
+ alternativeSignalTitle | |
+ ".txt" | |
) | |
print(" Writing to " + cardPath) | |
card = open(cardPath, "w") | |
print(" headers..") | |
card.write("## Shape input card\n") | |
card.write("imax 1 number of channels\n") | |
card.write("jmax * number of background\n") | |
card.write("kmax * number of nuisance parameters\n") | |
card.write("-" * 100 + "\n") | |
card.write("bin %s" % tagNameToAppearInDatacard + "\n") | |
if len(data) == 0: | |
self._logger.warning("no data, no fun! ") | |
# raise RuntimeError('No Data found!') | |
yieldsData["data"] = 0 | |
card.write("observation %.0f\n" % yieldsData["data"]) | |
card.write( | |
"shapes * * " | |
+ "shapes/histos_" | |
+ tagNameToAppearInDatacard | |
+ ".root" | |
+ " histo_$PROCESS histo_$PROCESS_$SYSTEMATIC" | |
+ "\n" | |
) | |
card.write( | |
"shapes data_obs * " | |
+ "shapes/histos_" | |
+ tagNameToAppearInDatacard | |
+ ".root" | |
+ " histo_Data" | |
+ "\n" | |
) | |
# shapes * * shapes/hww-19.36fb.mH125.of_vh2j_shape_mll.root histo_$PROCESS histo_$PROCESS_$SYSTEMATIC | |
# shapes data_obs * shapes/hww-19.36fb.mH125.of_vh2j_shape_mll.root histo_Data | |
columndef = 30 | |
# adapt column length to long bin names | |
if len(tagNameToAppearInDatacard) >= (columndef - 5): | |
columndef = len(tagNameToAppearInDatacard) + 7 | |
for name in signals: | |
if len(name) >= (columndef - 5): | |
columndef = len(name) + 7 | |
print(" processes and rates..") | |
card.write("bin".ljust(80)) | |
card.write( | |
"".join( | |
[tagNameToAppearInDatacard.ljust(columndef)] | |
* (len(signals) + len(cut_backgrounds)) | |
) | |
+ "\n" | |
) | |
card.write("process".ljust(80)) | |
card.write("".join(name.ljust(columndef) for name in signals)) | |
card.write( | |
"".join(name.ljust(columndef) for name in cut_backgrounds) | |
) | |
card.write("\n") | |
card.write("process".ljust(80)) | |
card.write( | |
"".join( | |
("%d" % signal_ids[name]).ljust(columndef) | |
for name in signals | |
) | |
) | |
card.write( | |
"".join( | |
("%d" % background_ids[name]).ljust(columndef) | |
for name in cut_backgrounds | |
) | |
) | |
card.write("\n") | |
card.write("rate".ljust(80)) | |
card.write( | |
"".join( | |
("%-.4f" % yieldsSig[name]).ljust(columndef) | |
for name in signals | |
) | |
) | |
card.write( | |
"".join( | |
("%-.4f" % yieldsBkg[name]).ljust(columndef) | |
for name in cut_backgrounds | |
) | |
) | |
card.write("\n") | |
card.write("-" * 100 + "\n") | |
print(" nuisances..") | |
nuisanceGroups = collections.defaultdict(list) | |
# add normalization and shape nuisances | |
for nuisanceName, nuisance in nuisances.items(): | |
if "type" not in nuisance: | |
raise RuntimeError( | |
"Nuisance " | |
+ nuisanceName | |
+ " is missing the type specification" | |
) | |
if nuisanceName == "stat" or nuisance["type"] == "rateParam": | |
# will deal with these later | |
continue | |
# check if a nuisance can be skipped because not in this particular cut | |
if "cuts" in nuisance and cutName not in nuisance["cuts"]: | |
continue | |
if nuisance["type"] in ["lnN", "lnU"]: | |
# why is adding CMS_ not the default for lnN/lnU? (Y.I. 2019.11.06) | |
entryName = nuisance["name"] | |
if ( | |
"perRecoBin" in nuisance.keys() | |
and nuisance["perRecoBin"] | |
): | |
entryName += "_" + cutName | |
card.write(entryName.ljust(80 - 20)) | |
if ( | |
"AsShape" in nuisance | |
and float(nuisance["AsShape"]) >= 1.0 | |
): | |
print( | |
">>>>>", | |
nuisance["name"], | |
" was derived as a lnN uncertainty but is being treated as a shape", | |
) | |
card.write(("shape").ljust(20)) | |
for sampleName in processes: | |
if ( | |
"cuts_samples" in nuisance | |
and sampleName in nuisance["cuts_samples"] | |
and cutName | |
not in nuisance["cuts_samples"][sampleName] | |
): | |
# If the cuts_samples options is there and the sample is inserted in the dictionary | |
# check if the current cutName is included. Excluded the cuts not in the list | |
print( | |
"Removing nuisance ", | |
nuisanceName, | |
" for sample ", | |
sampleName, | |
" from cut ", | |
cutName, | |
) | |
card.write(("-").ljust(columndef)) | |
elif ( | |
"all" in nuisance and nuisance["all"] == 1 | |
) or ( | |
"samples" in nuisance | |
and len(list(filter(lambda k: k in sampleName , nuisance["samples"].keys())))>0 | |
): | |
histo = self._getHisto( | |
cutName, variableName, sampleName | |
) | |
histoUp = histo.Clone( | |
"%s_%sUp" | |
% (histo.GetName(), nuisance["name"]) | |
) | |
histoDown = histo.Clone( | |
"%s_%sDown" | |
% (histo.GetName(), nuisance["name"]) | |
) | |
histoUp.SetDirectory(self._outFile) | |
histoDown.SetDirectory(self._outFile) | |
if ( | |
"scaleSampleForDatacard" | |
in structure[sampleName] | |
): | |
scaleFactor = 1.0 | |
if ( | |
type( | |
structure[sampleName][ | |
"scaleSampleForDatacard" | |
] | |
) | |
is dict | |
): | |
try: | |
scaleFactor = structure[sampleName][ | |
"scaleSampleForDatacard" | |
][cutName] | |
except: | |
pass | |
if ( | |
type( | |
structure[sampleName][ | |
"scaleSampleForDatacard" | |
] | |
) | |
is int | |
or type( | |
structure[sampleName][ | |
"scaleSampleForDatacard" | |
] | |
) | |
is float | |
): | |
scaleFactor = structure[sampleName][ | |
"scaleSampleForDatacard" | |
] | |
histoUp.Scale(scaleFactor) | |
histoDown.Scale(scaleFactor) | |
if "/" in nuisance["samples"][sampleName]: | |
up, down = nuisance["samples"][ | |
sampleName | |
].split("/") | |
histoUp.Scale(float(up)) | |
histoDown.Scale(float(down)) | |
else: | |
histoUp.Scale( | |
float(nuisance["samples"][sampleName]) | |
) | |
histoDown.Scale( | |
1.0 | |
/ float(nuisance["samples"][sampleName]) | |
) | |
self._outFile.cd() | |
histoUp.Write() | |
histoDown.Write() | |
card.write("1.000".ljust(columndef)) | |
else: | |
card.write(("-").ljust(columndef)) | |
else: | |
card.write((nuisance["type"]).ljust(20)) | |
if ( | |
"all" in nuisance and nuisance["all"] == 1 | |
): # for all samples | |
card.write( | |
"".join( | |
("%-.4f" % nuisance["value"]).ljust( | |
columndef | |
) | |
for _ in processes | |
) | |
) | |
else: | |
# apply only to selected samples | |
for sampleName in processes: | |
# if sampleName in nuisance["samples"]: | |
if len(list(filter(lambda k: k in sampleName , nuisance["samples"].keys())))>0: | |
# in this case nuisance['samples'] is a dict mapping sample name to nuisance values in string | |
card.write( | |
( | |
"%s" | |
% nuisance["samples"][sampleName] # % nuisance["samples"][sampleName.split('_')[0]] | |
).ljust(columndef) | |
) | |
else: | |
card.write(("-").ljust(columndef)) | |
elif nuisance["type"] == "shape": | |
# | |
# 'skipCMS' is a flag not to introduce "CMS" automatically in the name | |
# of the nuisance. | |
# It may be needed for combination purposes with ATLAS | |
# and agreed upon for all CMS analyses. | |
# For example: theoretical uncertainties on ggH with migration scheme 2017 | |
# | |
if "skipCMS" in nuisance and nuisance["skipCMS"] == 1: | |
entryName = nuisance["name"] | |
else: | |
entryName = "CMS_" + nuisance["name"] | |
if ( | |
"perRecoBin" in nuisance.keys() | |
and nuisance["perRecoBin"] | |
): | |
entryName += "_" + cutName | |
card.write(entryName.ljust(80 - 20)) | |
if "AsLnN" in nuisance and float(nuisance["AsLnN"]) >= 1.0: | |
print( | |
">>>>>", | |
nuisance["name"], | |
" was derived as a shape uncertainty but is being treated as a lnN", | |
) | |
card.write(("lnN").ljust(20)) | |
for sampleName in processes: | |
if ( | |
"cuts_samples" in nuisance | |
and sampleName in nuisance["cuts_samples"] | |
and cutName | |
not in nuisance["cuts_samples"][sampleName] | |
): | |
# If the cuts_samples options is there and the sample is inserted in the dictionary | |
# check if the current cutName is included. Excluded the cuts not in the list | |
print( | |
"Removing nuisance ", | |
nuisanceName, | |
" for sample ", | |
sampleName, | |
" from cut ", | |
cutName, | |
) | |
card.write(("-").ljust(columndef)) | |
elif ( | |
"all" in nuisance and nuisance["all"] == 1 | |
) or ( | |
"samples" in nuisance | |
and len(list(filter(lambda k: k in sampleName , nuisance["samples"].keys())))>0 | |
): | |
histo = self._getHisto( | |
cutName, variableName, sampleName | |
) | |
histoUp = self._getHisto( | |
cutName, | |
variableName, | |
sampleName, | |
"_" + nuisance["name"] + "Up", | |
) | |
histoDown = self._getHisto( | |
cutName, | |
variableName, | |
sampleName, | |
"_" + nuisance["name"] + "Down", | |
) | |
if not histoUp or not histoDown: | |
print( | |
"Histogram for nuisance", | |
nuisance["name"], | |
"on sample", | |
sampleName, | |
"missing", | |
) | |
if self._skipMissingNuisance: | |
card.write(("-").ljust(columndef)) | |
continue | |
histoIntegral = histo.Integral() | |
histoUpIntegral = histoUp.Integral() | |
histoDownIntegral = histoDown.Integral() | |
if ( | |
histoIntegral > 0.0 | |
and histoUpIntegral > 0.0 | |
): | |
diffUp = ( | |
(histoUpIntegral - histoIntegral) | |
/ histoIntegral | |
/ float(nuisance["AsLnN"]) | |
) | |
else: | |
diffUp = 0.0 | |
if ( | |
histoIntegral > 0.0 | |
and histoDownIntegral > 0.0 | |
): | |
diffDo = ( | |
(histoDownIntegral - histoIntegral) | |
/ histoIntegral | |
/ float(nuisance["AsLnN"]) | |
) | |
else: | |
diffDo = 0.0 | |
if ( | |
"symmetrize" in nuisance | |
and nuisance["symmetrize"] | |
): | |
diff = (diffUp - diffDo) * 0.5 | |
if diff >= 1.0: | |
# can't symmetrize | |
diffUp = diff * 2.0 - 0.999 | |
diffDo = -0.999 | |
elif diff <= -1.0: | |
diffUp = -0.999 | |
diffDo = -diff * 2.0 - 0.999 | |
else: | |
diffUp = diff | |
diffDo = -diff | |
lnNUp = 1.0 + diffUp | |
lnNDo = 1.0 + diffDo | |
# | |
# avoid 0.00/XXX and XXX/0.00 in the lnN --> ill defined nuisance | |
# if this happens put 0.00 ---> 1.00, meaning *no* effect of this nuisance | |
# Done like this because it is very likely what you are experiencing | |
# is a MC fluctuation in the varied sample, that is the up/down histogram has 0 entries! | |
# | |
# NB: with the requirement "histoUpIntegral > 0" and "histoDownIntegral > 0" this should never happen, | |
# except for some strange coincidence of "AsLnN" ... | |
# This fix is left, just for sake of safety | |
# | |
if lnNUp == 0: | |
lnNUp = 1.0 | |
if lnNDo == 0: | |
lnNDo = 1.0 | |
if ( | |
abs(lnNUp - 1.0) < 5.0e-4 | |
and abs(lnNDo - 1.0) < 5.0e-4 | |
): | |
card.write(("-").ljust(columndef)) | |
else: | |
card.write( | |
( | |
("%-.4f" % lnNUp) | |
+ "/" | |
+ ("%-.4f" % lnNDo) | |
).ljust(columndef) | |
) | |
else: | |
card.write(("-").ljust(columndef)) | |
else: | |
card.write("shape".ljust(20)) | |
for sampleName in processes: | |
if ( | |
"cuts_samples" in nuisance | |
and sampleName in nuisance["cuts_samples"] | |
and cutName | |
not in nuisance["cuts_samples"][sampleName] | |
): | |
# If the cuts_samples options is there and the sample is inserted in the dictionary | |
# check if the current cutName is included. Excluded the cuts not in the list | |
print( | |
"Removing nuisance ", | |
nuisanceName, | |
" for sample ", | |
sampleName, | |
" from cut ", | |
cutName, | |
) | |
card.write(("-").ljust(columndef)) | |
elif ( | |
"all" in nuisance and nuisance["all"] == 1 | |
) or ( | |
"samples" in nuisance | |
and len(list(filter(lambda k: k in sampleName , nuisance["samples"].keys())))>0 | |
): | |
# save the nuisance histograms in the root file | |
if ("skipCMS" in nuisance.keys()) and nuisance[ | |
"skipCMS" | |
] == 1: | |
suffixOut = None | |
else: | |
suffixOut = "_CMS_" + nuisance["name"] | |
if ( | |
"perRecoBin" in nuisance.keys() | |
and nuisance["perRecoBin"] | |
): | |
if ( | |
"skipCMS" in nuisance.keys() | |
) and nuisance["skipCMS"] == 1: | |
suffixOut = "_" + nuisance["name"] | |
else: | |
suffixOut = "_CMS_" + nuisance["name"] | |
suffixOut += "_" + cutName | |
symmetrize = ( | |
"symmetrize" in nuisance | |
and nuisance["symmetrize"] | |
) | |
saved = self._saveNuisanceHistos( | |
cutName, | |
variableName, | |
sampleName, | |
"_" + nuisance["name"], | |
suffixOut, | |
symmetrize, | |
) | |
if saved: | |
card.write("1.000".ljust(columndef)) | |
else: | |
# saved can be false if skipMissingNuisance is true and histogram cannot be found | |
card.write(("-").ljust(columndef)) | |
else: | |
card.write(("-").ljust(columndef)) | |
# closing block if type == lnN or shape | |
card.write("\n") | |
if "group" in nuisance: | |
nuisanceGroups[nuisance["group"]].append(entryName) | |
for group in sorted(nuisanceGroups.keys()): | |
card.write( | |
"%s group = %s\n" % (group, " ".join(nuisanceGroups[group])) | |
) | |
# done with normalization and shape nuisances. | |
# now add the stat nuisances | |
if "stat" in nuisances: | |
nuisance = nuisances["stat"] | |
if nuisance["type"] == "auto": | |
# use autoMCStats feature of combine | |
card.write( | |
"* autoMCStats " | |
+ nuisance["maxPoiss"] | |
+ " " | |
+ nuisance["includeSignal"] | |
) | |
# nuisance ['maxPoiss'] = Number of threshold events for Poisson modelling | |
# nuisance ['includeSignal'] = Include MC stat nuisances on signal processes (1=True, 0=False) | |
card.write("\n") | |
else: | |
# otherwise we process stat nuisances sample-by-sample | |
for sampleName in processes: | |
if sampleName not in nuisance["samples"]: | |
continue | |
sampleNuisance = nuisance["samples"][sampleName] | |
sampleIndex = processes.index(sampleName) | |
if ( | |
sampleNuisance["typeStat"] == "uni" | |
): # unified approach | |
# save the nuisance histograms in the root file | |
saved = self._saveNuisanceHistos( | |
cutName, | |
variableName, | |
sampleName, | |
"stat", | |
"_CMS_" | |
+ tagNameToAppearInDatacard | |
+ "_" | |
+ sampleName | |
+ "_stat", | |
) | |
if saved: | |
card.write( | |
( | |
"CMS_" | |
+ tagNameToAppearInDatacard | |
+ "_" | |
+ sampleName | |
+ "_stat" | |
).ljust(80 - 20) | |
) | |
card.write((nuisance["type"]).ljust(20)) | |
# write the line in datacard. Only the column for this sample gets 1.000 | |
for idx in range(len(processes)): | |
if idx == sampleIndex: | |
card.write(("1.000").ljust(columndef)) | |
else: | |
card.write(("-").ljust(columndef)) | |
card.write("\n") | |
elif sampleNuisance["typeStat"] == "bbb": # bin-by-bin | |
histoTemplate = self._getHisto( | |
cutName, variableName, sampleName | |
) | |
for iBin in range(1, histoTemplate.GetNbinsX() + 1): | |
if "correlate" in sampleNuisance: | |
# specify the sample that is source of the variation | |
tag = "_ibin" + sampleName + "_" | |
correlate = list( | |
sampleNuisance["correlate"] | |
) # list of sample names to correlate bin-by-bin with this sample | |
else: | |
tag = "_ibin_" | |
correlate = [] | |
# save the nuisance histograms in the root file | |
saved = self._saveNuisanceHistos( | |
cutName, | |
variableName, | |
sampleName, | |
tag + str(iBin) + "_stat", | |
"_CMS_" | |
+ tagNameToAppearInDatacard | |
+ "_" | |
+ sampleName | |
+ "_ibin_" | |
+ str(iBin) | |
+ "_stat", | |
) | |
if not saved: | |
continue | |
for other in list(correlate): | |
saved = self._saveNuisanceHistos( | |
cutName, | |
variableName, | |
other, | |
tag + str(iBin) + "_stat", | |
"_CMS_" | |
+ tagNameToAppearInDatacard | |
+ "_" | |
+ sampleName | |
+ "_ibin_" | |
+ str(iBin) | |
+ "_stat", | |
) | |
if not saved: | |
correlate.remove(other) | |
card.write( | |
( | |
"CMS_" | |
+ tagNameToAppearInDatacard | |
+ "_" | |
+ sampleName | |
+ "_ibin_" | |
+ str(iBin) | |
+ "_stat" | |
).ljust(100 - 20) | |
) | |
card.write((nuisance["type"]).ljust(20)) | |
# write line in datacard. One line per sample per bin | |
for idx in range(len(processes)): | |
if ( | |
idx == sampleIndex | |
or processes[idx] in correlate | |
): | |
card.write(("1.000").ljust(columndef)) | |
else: | |
card.write(("-").ljust(columndef)) | |
card.write("\n") | |
# now add the "rateParam" for the normalization | |
# e.g.: z_norm rateParam htsearch zll 1 | |
# see: https://twiki.cern.ch/twiki/bin/view/CMS/HiggsWG/SWGuideNonStandardCombineUses#Rate_Parameters | |
# 'rateParam' has a separate treatment -> it's just a line at the end of the datacard. It defines "free floating" samples | |
# I do it here and not before because I want the freee floating parameters at the end of the datacard | |
for nuisance in nuisances.values(): | |
if nuisance["type"] != "rateParam": | |
continue | |
# check if a nuisance can be skipped because not in this particular cut | |
if "cuts" in nuisance and cutName not in nuisance["cuts"]: | |
continue | |
card.write(nuisance["name"].ljust(80 - 20)) | |
card.write("rateParam".ljust(25)) | |
card.write( | |
tagNameToAppearInDatacard.ljust(columndef) | |
) # the bin | |
# there can be only 1 sample per rateParam | |
if len(nuisance["samples"]) != 1: | |
raise RuntimeError( | |
"Invalid rateParam: number of samples != 1" | |
) | |
sampleName, initialValue = list(nuisance["samples"].items())[0] | |
if sampleName not in samples: | |
raise RuntimeError( | |
"Invalid rateParam: unknown sample %s" % sampleName | |
) | |
card.write(sampleName.ljust(25)) | |
card.write(("%-.4f" % float(initialValue)).ljust(columndef)) | |
card.write("\n") | |
# now add other nuisances | |
# Are there other kind of nuisances I forgot? | |
card.write("-" * 100 + "\n") | |
card.write("\n") | |
card.close() | |
print(" done.") | |
self._outFile.Close() | |
if type(self._fileIn) is dict: | |
for source in self._fileIn.values(): | |
source.Close() | |
else: | |
self._fileIn.Close() | |
# _____________________________________________________________________________ | |
def _saveNuisanceHistos( | |
self, | |
cutName, | |
variableName, | |
sampleName, | |
suffixIn, | |
suffixOut=None, | |
symmetrize=False, | |
): | |
histoUp = self._getHisto(cutName, variableName, sampleName, suffixIn + "Up") | |
histoDown = self._getHisto(cutName, variableName, sampleName, suffixIn + "Down") | |
if not histoUp or not histoDown: | |
print( | |
"Up/down histogram for", | |
cutName, | |
variableName, | |
sampleName, | |
suffixIn, | |
"missing", | |
) | |
if self._skipMissingNuisance: | |
return False | |
# else let ROOT raise | |
if "scaleSampleForDatacard" in structure[sampleName]: | |
scaleFactor = 1.0 | |
if type(structure[sampleName]["scaleSampleForDatacard"]) is dict: | |
try: | |
scaleFactor = structure[sampleName]["scaleSampleForDatacard"][ | |
cutName | |
] | |
except: | |
pass | |
if ( | |
type(structure[sampleName]["scaleSampleForDatacard"]) is int | |
or type(structure[sampleName]["scaleSampleForDatacard"]) is float | |
): | |
scaleFactor = structure[sampleName]["scaleSampleForDatacard"] | |
histoUp.Scale(scaleFactor) | |
histoDown.Scale(scaleFactor) | |
if symmetrize: | |
histoNom = self._getHisto(cutName, variableName, sampleName) | |
histoDiff = histoUp.Clone("histoDiff") | |
histoDiff.Add(histoDown, -1.0) | |
histoDiff.Scale(0.5) | |
histoUp.Reset() | |
histoUp.Add(histoNom) | |
histoUp.Add(histoDiff) | |
histoDown.Reset() | |
histoDown.Add(histoNom) | |
histoDown.Add(histoDiff, -1.0) | |
histoUp.SetDirectory(self._outFile) | |
histoDown.SetDirectory(self._outFile) | |
if suffixOut: | |
histoUp.SetName("histo_%s%sUp" % (sampleName, suffixOut)) | |
histoDown.SetName("histo_%s%sDown" % (sampleName, suffixOut)) | |
# print('Saving histos with names') | |
# print(histoUp.GetName()) | |
# print(histoDown.GetName()) | |
self._outFile.cd() | |
histoUp.Write() | |
histoDown.Write() | |
return True | |
# _____________________________________________________________________________ | |
def _getHisto(self, cutName, variableName, sampleName, suffix=None): | |
shapeName = "%s/%s/histo_%s" % (cutName, variableName, sampleName) | |
if suffix: | |
shapeName += suffix | |
if type(self._fileIn) is dict: | |
# by-sample ROOT file | |
histo = self._fileIn[sampleName].Get(shapeName) | |
else: | |
# Merged single ROOT file | |
histo = self._fileIn.Get(shapeName) | |
if not histo: | |
print(shapeName, "not found") | |
# if 'met' in variableName.lower() and 'met' in suffix.lower(): | |
# print(variableName, 'does not contain', suffix) | |
# sys.exit() | |
if suffix: | |
print("Getting the nominal instead of varied") | |
histo = self._getHisto(cutName, variableName, sampleName) | |
histo.SetName(shapeName) | |
return histo | |
return histo | |
# _____________________________________________________________________________ | |
def _removeStatUncertainty(self, histo): | |
for iBin in range(0, histo.GetNbinsX() + 2): | |
histo.SetBinError(iBin, 0.0) | |
class list_maker: | |
def __init__(self, var, sep=",", __type=None): | |
self._type = __type | |
self._var = var | |
self._sep = sep | |
def __call__(self, option, opt_str, value, parser): | |
if not hasattr(parser.values, self._var): | |
setattr(parser.values, self._var, []) | |
try: | |
array = value.split(self._sep) | |
if self._type: | |
array = [self._type(e) for e in array] | |
setattr(parser.values, self._var, array) | |
except: | |
print("Malformed option (comma separated list expected):", value) | |
def main(): | |
sys.argv = argv | |
print( | |
""" | |
-------------------------------------------------------------------------------------------------- | |
__ \ | | \ | | | |
| | _` | __| _` | __| _` | __| _` | |\/ | _` | | / _ \ __| | |
| | ( | | ( | ( ( | | ( | | | ( | < __/ | | |
____/ \__,_| \__| \__,_| \___| \__,_| _| \__,_| _| _| \__,_| _|\_\ \___| _| | |
-------------------------------------------------------------------------------------------------- | |
""" | |
) | |
usage = "usage: %prog [options]" | |
parser = optparse.OptionParser(usage) | |
parser.add_option( | |
"--tag", dest="tag", help="Tag used for the shape file name", default=None | |
) | |
parser.add_option( | |
"--sigset", dest="sigset", help="Signal samples [SM]", default="SM" | |
) | |
parser.add_option( | |
"--outputDirDatacard", | |
dest="outputDirDatacard", | |
help="output directory", | |
default="./datacards", | |
) | |
parser.add_option( | |
"--inputFile", dest="inputFile", help="input directory", default="./input.root" | |
) | |
parser.add_option( | |
"--structureFile", | |
dest="structureFile", | |
help="file with datacard configurations", | |
default=None, | |
) | |
parser.add_option( | |
"--nuisancesFile", | |
dest="nuisancesFile", | |
help="file with nuisances configurations", | |
default=None, | |
) | |
parser.add_option( | |
"--cardList", | |
dest="cardList", | |
help="List of cuts to produce datacards", | |
default=[], | |
type="string", | |
action="callback", | |
callback=list_maker("cardList", ","), | |
) | |
parser.add_option( | |
"--skipMissingNuisance", | |
dest="skipMissingNuisance", | |
help="Don't write nuisance lines when histograms are missing", | |
default=False, | |
action="store_true", | |
) | |
parser.add_option( | |
"--onlyVariable", | |
dest="onlyVariable", | |
help="draw only one variable (may be needed in post-fit plots)", | |
default=None, | |
) | |
parser.add_option( | |
"--onlyCut", | |
dest="onlyCut", | |
help="draw only one cut phase space (may be needed in post-fit plots)", | |
default=None, | |
) | |
# read default parsing options as well | |
# hwwtools.addOptions(parser) | |
# hwwtools.loadOptDefaults(parser) | |
(opt, args) = parser.parse_args() | |
# sys.argv.append( '-b' ) | |
# print(" configuration file = ", opt.pycfg) | |
global cuts, plot | |
from mkShapesRDF.shapeAnalysis.ConfigLib import ConfigLib | |
configsFolder = "configs" | |
ConfigLib.loadLatestPickle(os.path.abspath(configsFolder), globals()) | |
print(dir()) | |
print(globals().keys()) | |
# unpacking of variables | |
cuts = cuts["cuts"] | |
inputFile = outputFolder + "/" + outputFile | |
ROOT.TH1.SetDefaultSumw2(True) | |
factory = DatacardFactory() | |
factory._tag = tag | |
factory._skipMissingNuisance = opt.skipMissingNuisance | |
factory.onlyCut = opt.onlyCut | |
factory.onlyVariable = opt.onlyVariable | |
import mkShapesRDF.shapeAnalysis.latinos.LatinosUtils as utils | |
subsamplesmap = utils.flatten_samples(samples) | |
categoriesmap = utils.flatten_cuts(cuts) | |
utils.update_variables_with_categories(variables, categoriesmap) | |
utils.update_nuisances_with_subsamples(nuisances, subsamplesmap) | |
utils.update_nuisances_with_categories(nuisances, categoriesmap) | |
# command-line cuts restrictions | |
if len(opt.cardList) > 0: | |
try: | |
newCuts = [] | |
for iCut in opt.cardList: | |
for iOptim in optim: | |
newCuts.append(iCut + "_" + iOptim) | |
opt.cardList = newCuts | |
print(opt.cardList) | |
except: | |
print("No optim dictionary") | |
cut2del = [] | |
for iCut in cuts: | |
if iCut not in opt.cardList: | |
cut2del.append(iCut) | |
for iCut in cut2del: | |
del cuts[iCut] | |
factory.makeDatacards( | |
inputFile, opt.outputDirDatacard, variables, cuts, samples, structure, nuisances | |
) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment