Created
December 21, 2016 13:42
-
-
Save alexpearce/bc6d675c749333a7c7067a062e756cd6 to your computer and use it in GitHub Desktop.
Ue RooFit PDFs with hep_ml.splot
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def pdf_probabilities(xs, dependents, yields, pdfs): | |
"""Return an array of normalised PDF probabilities. | |
For a list of N yields, each y_i, and a list of N PDFs f_i, the probability | |
for the ith PDF at the (possibly vector) point x is defined as | |
p_{i} = \frac{N_{i}f_{i}}{\sum_{i}^{N} N_{i}f_{i}} | |
Keyword arguments: | |
xs -- List of values at which to evaluate the PDFs | |
dependent -- List of same length as xs, as the RooRealVar objects upon | |
which each PDF depends | |
yields -- List of RooRealVar objects which hold the fitted values of the | |
PDF yields | |
pdfs -- List of RooAbsPdf objects to evaluate the probabilities for | |
""" | |
xs = utilities.as_list(xs) | |
dependents = utilities.as_list(dependents) | |
depset = ROOT.RooArgSet(*dependents) | |
# Set the current value of the dependents | |
for x, dep in zip(xs, dependents): | |
dep.setVal(x) | |
ps = [] | |
normalisation = 0 | |
for y, pdf in zip(yields, pdfs): | |
a = y.getVal()*pdf.getVal(depset) | |
ps.append(a) | |
normalisation += a | |
return [p/normalisation for p in ps] | |
def add_sweights(data, w=None): | |
"""Add sWeights from the workspace to the data's DataFrame member. | |
This method will filter out events from data.df that do not have | |
corresponding sWeights, i.e. those that did not enter the fit. | |
If w is None, the default workspace for this dataset will be retrieved. | |
""" | |
if w is None: | |
fpath = workspace_path(data, ['unweighted', 'no-simultaneous']) | |
with utilities.open_root(fpath) as f: | |
w = f.Get('w') | |
pdf_sig = w.pdf('pdf_sig') | |
pdf_bkg = w.pdf('pdf_bkg') | |
yield_sig = w.var('yield_sig') | |
yield_bkg = w.var('yield_bkg') | |
pdfs = [pdf_sig, pdf_bkg] | |
yields = [yield_sig, yield_bkg] | |
dependents = w.var('Lb_DTF_Lc_M') | |
rdata = w.data(data_name(binned=False)) | |
index, p_sig, p_bkg = [], [], [] | |
for idx in range(rdata.numEntries()): | |
row = rdata.get(idx) | |
x = row.find('Lb_DTF_Lc_M').getVal() | |
ps, pb = pdf_probabilities(x, dependents, yields, pdfs) | |
p_sig.append(ps) | |
p_bkg.append(pb) | |
index.append(int(row.find('__index__').getVal())) | |
probabilities = pd.DataFrame(dict(sig_sw=p_sig, bkg_sw=p_bkg)) | |
sweights = splot.compute_sweights(probabilities) | |
# By setting the index of the sWeight DF to the list of indices entering | |
# the fit, entries in data.df that didn't enter the fit will receive | |
# sWeight values of NaN, which we can filter out | |
sweights.index = index | |
sweights['sum_sw'] = sweights.sig_sw + sweights.bkg_sw | |
# Drop any existing sWeight columns | |
try: | |
data.df.drop(sweights.columns, axis=1, inplace=True) | |
except ValueError: | |
pass | |
data.df = pd.concat([data.df, sweights], axis=1) | |
data.df.dropna(subset=sweights.columns, inplace=True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment