alexpearce/splot_with_roofit.py

## splot_with_roofit.py
def pdf_probabilities(xs, dependents, yields, pdfs):
    """Return an array of normalised PDF probabilities.

    For a list of N yields, each y_i, and a list of N PDFs f_i, the probability
    for the ith PDF at the (possibly vector) point x is defined as

        p_{i} = \frac{N_{i}f_{i}}{\sum_{i}^{N} N_{i}f_{i}}

    Keyword arguments:
    xs -- List of values at which to evaluate the PDFs
    dependent -- List of same length as xs, as the RooRealVar objects upon
        which each PDF depends
    yields -- List of RooRealVar objects which hold the fitted values of the
        PDF yields
    pdfs -- List of RooAbsPdf objects to evaluate the probabilities for
    """
    xs = utilities.as_list(xs)
    dependents = utilities.as_list(dependents)
    depset = ROOT.RooArgSet(*dependents)

    # Set the current value of the dependents
    for x, dep in zip(xs, dependents):
        dep.setVal(x)

    ps = []
    normalisation = 0
    for y, pdf in zip(yields, pdfs):
        a = y.getVal()*pdf.getVal(depset)
        ps.append(a)
        normalisation += a

    return [p/normalisation for p in ps]


def add_sweights(data, w=None):
    """Add sWeights from the workspace to the data's DataFrame member.

    This method will filter out events from data.df that do not have
    corresponding sWeights, i.e. those that did not enter the fit.
    If w is None, the default workspace for this dataset will be retrieved.
    """
    if w is None:
        fpath = workspace_path(data, ['unweighted', 'no-simultaneous'])
        with utilities.open_root(fpath) as f:
            w = f.Get('w')

    pdf_sig = w.pdf('pdf_sig')
    pdf_bkg = w.pdf('pdf_bkg')
    yield_sig = w.var('yield_sig')
    yield_bkg = w.var('yield_bkg')
    pdfs = [pdf_sig, pdf_bkg]
    yields = [yield_sig, yield_bkg]
    dependents = w.var('Lb_DTF_Lc_M')

    rdata = w.data(data_name(binned=False))
    index, p_sig, p_bkg = [], [], []
    for idx in range(rdata.numEntries()):
        row = rdata.get(idx)

        x = row.find('Lb_DTF_Lc_M').getVal()
        ps, pb = pdf_probabilities(x, dependents, yields, pdfs)
        p_sig.append(ps)
        p_bkg.append(pb)

        index.append(int(row.find('__index__').getVal()))

    probabilities = pd.DataFrame(dict(sig_sw=p_sig, bkg_sw=p_bkg))
    sweights = splot.compute_sweights(probabilities)
    # By setting the index of the sWeight DF to the list of indices entering
    # the fit, entries in data.df that didn't enter the fit will receive
    # sWeight values of NaN, which we can filter out
    sweights.index = index
    sweights['sum_sw'] = sweights.sig_sw + sweights.bkg_sw

    # Drop any existing sWeight columns
    try:
        data.df.drop(sweights.columns, axis=1, inplace=True)
    except ValueError:
        pass
    data.df = pd.concat([data.df, sweights], axis=1)
    data.df.dropna(subset=sweights.columns, inplace=True)
	def pdf_probabilities(xs, dependents, yields, pdfs):
	"""Return an array of normalised PDF probabilities.

	For a list of N yields, each y_i, and a list of N PDFs f_i, the probability
	for the ith PDF at the (possibly vector) point x is defined as

	p_{i} = \frac{N_{i}f_{i}}{\sum_{i}^{N} N_{i}f_{i}}

	Keyword arguments:
	xs -- List of values at which to evaluate the PDFs
	dependent -- List of same length as xs, as the RooRealVar objects upon
	which each PDF depends
	yields -- List of RooRealVar objects which hold the fitted values of the
	PDF yields
	pdfs -- List of RooAbsPdf objects to evaluate the probabilities for
	"""
	xs = utilities.as_list(xs)
	dependents = utilities.as_list(dependents)
	depset = ROOT.RooArgSet(*dependents)

	# Set the current value of the dependents
	for x, dep in zip(xs, dependents):
	dep.setVal(x)

	ps = []
	normalisation = 0
	for y, pdf in zip(yields, pdfs):
	a = y.getVal()*pdf.getVal(depset)
	ps.append(a)
	normalisation += a

	return [p/normalisation for p in ps]


	def add_sweights(data, w=None):
	"""Add sWeights from the workspace to the data's DataFrame member.

	This method will filter out events from data.df that do not have
	corresponding sWeights, i.e. those that did not enter the fit.
	If w is None, the default workspace for this dataset will be retrieved.
	"""
	if w is None:
	fpath = workspace_path(data, ['unweighted', 'no-simultaneous'])
	with utilities.open_root(fpath) as f:
	w = f.Get('w')

	pdf_sig = w.pdf('pdf_sig')
	pdf_bkg = w.pdf('pdf_bkg')
	yield_sig = w.var('yield_sig')
	yield_bkg = w.var('yield_bkg')
	pdfs = [pdf_sig, pdf_bkg]
	yields = [yield_sig, yield_bkg]
	dependents = w.var('Lb_DTF_Lc_M')

	rdata = w.data(data_name(binned=False))
	index, p_sig, p_bkg = [], [], []
	for idx in range(rdata.numEntries()):
	row = rdata.get(idx)

	x = row.find('Lb_DTF_Lc_M').getVal()
	ps, pb = pdf_probabilities(x, dependents, yields, pdfs)
	p_sig.append(ps)
	p_bkg.append(pb)

	index.append(int(row.find('__index__').getVal()))

	probabilities = pd.DataFrame(dict(sig_sw=p_sig, bkg_sw=p_bkg))
	sweights = splot.compute_sweights(probabilities)
	# By setting the index of the sWeight DF to the list of indices entering
	# the fit, entries in data.df that didn't enter the fit will receive
	# sWeight values of NaN, which we can filter out
	sweights.index = index
	sweights['sum_sw'] = sweights.sig_sw + sweights.bkg_sw

	# Drop any existing sWeight columns
	try:
	data.df.drop(sweights.columns, axis=1, inplace=True)
	except ValueError:
	pass
	data.df = pd.concat([data.df, sweights], axis=1)
	data.df.dropna(subset=sweights.columns, inplace=True)