Skip to content

Instantly share code, notes, and snippets.

@yipengsun
Created July 18, 2022 07:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save yipengsun/840ed39e76a7051b57efd8d9cea57b33 to your computer and use it in GitHub Desktop.
Save yipengsun/840ed39e76a7051b57efd8d9cea57b33 to your computer and use it in GitHub Desktop.
// Standard headers
#include <iostream>
#include <map>
#include <regex>
#include <string>
#include <vector>
// ROOT headers
#include <TFile.h>
#include <TH3.h>
#include <TString.h>
// RooFit headers
#include <RooDataHist.h>
#include <RooMsgService.h>
#include <RooProduct.h>
#include <RooRealVar.h>
#include <RooWorkspace.h>
#include <RooStats/HistFactory/HistFactorySimultaneous.h>
#include <RooStats/HistFactory/Measurement.h>
#include <RooStats/HistFactory/PiecewiseInterpolation.h>
#include <RooStats/ModelConfig.h>
// Third-party libraries
#include <yaml-cpp/emitter.h>
#include <yaml-cpp/yaml.h>
#include <cxxopts.hpp>
// Project-specific headers
#include "base64.h"
#include "utils.h"
using namespace std;
using namespace RooFit;
using namespace RooStats;
using namespace HistFactory;
//////////////////
// Configurable //
//////////////////
bool VERBOSE = false;
bool TRACE = false;
const string OUTPUT_FILENAME = "fitted_params.yml";
const string OUTPUT_NTPNAME = "final_histos.root";
/////////////
// Helpers //
/////////////
class ParamExtract {
public:
ParamExtract(const TString &wsFileName, double binVolume);
~ParamExtract();
void loadWorkspace();
void printYields(const vector<TString> &skims);
void writeYields(const vector<TString> &skims, const string &output);
void writeHistos(const vector<TString> &skims, const string &output);
private:
map<string, string> getYields(const TString & skim,
vector<RooStats::HistFactory::Sample> &samples,
const TString & keyword);
map<string, string> getNuisance();
vector<TH3F *> getHistos(const TString & skim,
vector<RooStats::HistFactory::Sample> &samples,
const TString & keyword);
double mBinVolume;
TFile * mFile;
RooWorkspace * mWorkspace;
RooStats::HistFactory::Measurement *mMeasurement;
RooAbsData * mData;
ModelConfig * mModelConfig;
RooSimultaneous * mModel;
HistFactorySimultaneous * mModelHf;
};
// Constructor/destructor //////////////////////////////////////////////////////
ParamExtract::ParamExtract(const TString &wsFileName, double binVolume)
: mBinVolume(binVolume) {
cout << "Reading workspace and WorkspaceHistFactory::Measurement in "
<< wsFileName << endl;
mFile = new TFile(wsFileName);
RooMsgService::instance().setGlobalKillBelow(RooFit::ERROR); // Silence INFO
loadWorkspace();
}
ParamExtract::~ParamExtract() { delete mFile; }
// Public //////////////////////////////////////////////////////////////////////
void ParamExtract::loadWorkspace() {
mWorkspace = static_cast<RooWorkspace *>(mFile->Get("combined"));
mMeasurement = static_cast<RooStats::HistFactory::Measurement *>(
mFile->Get("my_measurement_sim"));
mData = static_cast<RooAbsData *>(mWorkspace->data("obsData"));
// Get model manually
mModelConfig = static_cast<ModelConfig *>(mWorkspace->obj("ModelConfig"));
mModel = static_cast<RooSimultaneous *>(mModelConfig->GetPdf());
// Loading workspace and WorkspaceHistFactory::Measurement
mModelHf = new HistFactorySimultaneous(*mModel);
}
void ParamExtract::printYields(const vector<TString> &skims) {
for (unsigned iSkim = 0; iSkim < skims.size(); iSkim++) {
TString skim = skims[iSkim];
TString skimDisplay = skim;
if (skim == "") skimDisplay = "ISO";
cout << endl
<< "Looping over skim " << skimDisplay << endl
<< "===========================" << endl;
// Channels are 0: "Dstmu" and 1: "Dmu"
vector<RooStats::HistFactory::Channel> channels =
mMeasurement->GetChannels();
vector<RooStats::HistFactory::Sample> sampleDst =
channels[2 * iSkim].GetSamples();
vector<RooStats::HistFactory::Sample> sampleD0 =
channels[2 * iSkim + 1].GetSamples();
auto yieldDst = getYields(skim, sampleDst, "Dstmu");
for (auto &[pdfName, yield] : yieldDst)
cout << pdfName << " has a yield of: " << yield << endl;
auto yieldD0 = getYields(skim, sampleD0, "Dmu");
for (auto &[pdfName, yield] : yieldD0)
cout << pdfName << " has a yield of: " << yield << endl;
}
// nuisance are global
cout << endl
<< "Nuisance parameters " << endl
<< "=============================" << endl;
auto alphas = getNuisance();
for (auto &[name, value] : alphas)
cout << name << " has a value of: " << value << endl;
}
void ParamExtract::writeYields(const vector<TString> &skims,
const string & output) {
YAML::Emitter out;
out << YAML::BeginMap;
for (unsigned iSkim = 0; iSkim < skims.size(); iSkim++) {
TString skim = skims[iSkim];
// Channels are 0: "Dstmu" and 1: "Dmu"
vector<RooStats::HistFactory::Channel> channels =
mMeasurement->GetChannels();
vector<RooStats::HistFactory::Sample> sampleDst =
channels[2 * iSkim].GetSamples();
vector<RooStats::HistFactory::Sample> sampleD0 =
channels[2 * iSkim + 1].GetSamples();
auto yieldDst = getYields(skim, sampleDst, "Dstmu");
for (auto &[pdfName, yield] : yieldDst)
out << YAML::Key << pdfName << YAML::Value << yield;
auto yieldD0 = getYields(skim, sampleD0, "Dmu");
for (auto &[pdfName, yield] : yieldD0)
out << YAML::Key << pdfName << YAML::Value << yield;
}
// nuisance are global
auto alphas = getNuisance();
for (auto &[name, value] : alphas)
out << YAML::Key << name << YAML::Value << value;
out << YAML::EndMap;
// actually write to file
ofstream fout;
fout.open(output);
fout << out.c_str();
fout.close();
}
void ParamExtract::writeHistos(const vector<TString> &skims,
const string & output) {
auto ntp = TFile(output.c_str(), "recreate");
for (unsigned iSkim = 0; iSkim < skims.size(); iSkim++) {
TString skim = skims[iSkim];
vector<RooStats::HistFactory::Channel> channels =
mMeasurement->GetChannels();
vector<RooStats::HistFactory::Sample> sampleDst =
channels[2 * iSkim].GetSamples();
vector<RooStats::HistFactory::Sample> sampleD0 =
channels[2 * iSkim + 1].GetSamples();
auto histoDst = getHistos(skim, sampleDst, "Dstmu");
for (auto h : histoDst) h->Write("", TFile::kOverwrite);
auto histoD0 = getHistos(skim, sampleDst, "Dmu");
for (auto h : histoD0) h->Write("", TFile::kOverwrite);
}
ntp.Close();
}
// Private /////////////////////////////////////////////////////////////////////
map<string, string> ParamExtract::getYields(
const TString &skim, vector<RooStats::HistFactory::Sample> &samples,
const TString &keyword) {
map<string, string> result{};
map<string, regex> nameCleanup = {{"", regex("_(Dstmu|Dmu)$")},
{"1OS", regex("_(Dstmu|Dmu)1OS$")},
{"2OS", regex("_(Dstmu|Dmu)2OS$")},
{"DD", regex("_(Dstmu|Dmu)DD")}};
// Looping over templates
for (auto &nSamp : samples) {
TString prefix = nSamp.GetName();
TString pdfName = prefix + "_" + keyword + skim;
auto overallNorm = dynamic_cast<RooProduct *>(
mWorkspace->arg(pdfName + "_overallNorm_x_sigma_epsilon"));
if (overallNorm == nullptr) {
cout << pdfName << " does not exist, continuing" << endl;
continue;
}
double totYield = overallNorm->getVal();
double pdfIntegral = 0.0;
auto pdf = static_cast<PiecewiseInterpolation *>(
mWorkspace->function(pdfName + "_Hist_alpha"));
if (pdf == nullptr) {
auto pdfHist = static_cast<RooHistFunc *>(
mWorkspace->function(pdfName + "_nominal"));
if (pdfHist == nullptr) {
cout << pdfName << " does not exist, continuing" << endl;
continue;
}
PRINT_LINE_NO
RooDataHist pdfDataHist = pdfHist->dataHist();
pdfIntegral = pdfDataHist.sum(false);
} else {
// Is this correct?
RooAbsReal *pdfInteg = pdf->createIntegral(
*(static_cast<RooArgSet *>(
mModelHf->getObservables(*mData)->selectByName(
TString("*") + keyword + skim + "*"))),
Range(""));
pdfIntegral = pdfInteg->getVal() / mBinVolume;
PRINT_LINE_NO
}
auto outputName = string(pdfName);
for (auto &[target, reg] : nameCleanup)
outputName = regex_replace(outputName, reg, target);
auto yield = to_string(totYield * pdfIntegral);
if (skim != "" ||
(!pdfName.Contains("_sigtau") && !pdfName.Contains("_Dtau") &&
!pdfName.Contains("_dDsttau") && !pdfName.Contains("_uDsttau")))
result[outputName] = yield;
else
result[outputName] = encode64(yield);
} // Loop over templates
return result;
}
map<string, string> ParamExtract::getNuisance() {
map<string, string> result{};
auto listParams = (RooArgList *)(mModelConfig->GetNuisanceParameters());
for (int i = 0; i < listParams->getSize(); i++) {
auto alpha = dynamic_cast<RooRealVar *>(listParams->at(i));
TString name = alpha->GetName();
if (!name.Contains("alpha_")) continue;
TIterator *alphaIt = alpha->clientIterator();
TObject * tempObj = 0;
while ((tempObj = alphaIt->Next())) {
auto flex = dynamic_cast<PiecewiseInterpolation *>(tempObj); // HistoSys
if (flex == nullptr) continue; // don't save OverallSys
result[string(name)] = to_string(alpha->getVal());
vector<int> interpCodes = flex->interpolationCodes();
RooArgList paramList = flex->paramList();
for (int iFlex = 0; iFlex < paramList.getSize(); iFlex++) {
auto param = dynamic_cast<RooAbsArg *>(paramList.at(iFlex));
if (param->GetName() == name) {
result[string(param->GetName()) + "__interp_code"] =
to_string(interpCodes[iFlex]);
break;
}
}
break;
}
}
return result;
}
vector<TH3F *> ParamExtract::getHistos(
const TString &skim, vector<RooStats::HistFactory::Sample> &samples,
const TString &keyword) {
vector<TH3F *> result{};
map<string, regex> nameCleanup = {{"", regex("_(Dstmu|Dmu)$")},
{"1OS", regex("_(Dstmu|Dmu)1OS$")},
{"2OS", regex("_(Dstmu|Dmu)2OS$")},
{"DD", regex("_(Dstmu|Dmu)DD")}};
// Looping over templates
for (auto &nSamp : samples) {
TString prefix = nSamp.GetName();
TString pdfName = prefix + "_" + keyword + skim;
auto overallNorm = dynamic_cast<RooProduct *>(
mWorkspace->arg(pdfName + "_overallNorm_x_sigma_epsilon"));
if (overallNorm == nullptr) {
cout << pdfName << " does not exist, continuing" << endl;
continue;
}
auto outputName = string(pdfName);
for (auto &[target, reg] : nameCleanup)
outputName = regex_replace(outputName, reg, target);
double totYield = overallNorm->getVal();
double pdfIntegral = 0.0;
auto pdf = static_cast<PiecewiseInterpolation *>(
mWorkspace->function(pdfName + "_Hist_alpha"));
TH3F *histo;
// get observables for histogramming
auto obs =
static_cast<RooArgSet *>(mModelHf->getObservables(*mData)->selectByName(
TString("*") + keyword + skim + "*"));
auto x = reinterpret_cast<RooAbsRealLValue *>(
obs->find("obs_x_" + prefix + skim));
auto y = reinterpret_cast<RooCmdArg *>(obs->find("obs_y_" + prefix + skim));
auto z = reinterpret_cast<RooCmdArg *>(obs->find("obs_z_" + prefix + skim));
if (pdf == nullptr) {
auto pdfHist = static_cast<RooHistFunc *>(
mWorkspace->function(pdfName + "_nominal"));
if (pdfHist == nullptr) {
cout << pdfName << " does not exist, continuing" << endl;
continue;
}
PRINT_LINE_NO
RooDataHist pdfDataHist = pdfHist->dataHist();
pdfIntegral = pdfDataHist.sum(false);
histo = reinterpret_cast<TH3F *>(pdfHist->createHistogram(
outputName.c_str(), *x, IntrinsicBinning(), *y, IntrinsicBinning(),
*z, IntrinsicBinning()));
} else {
RooAbsReal *pdfInteg = pdf->createIntegral(*obs, Range(""));
pdfIntegral = pdfInteg->getVal() / mBinVolume;
histo = reinterpret_cast<TH3F *>(
pdf->createHistogram(outputName.c_str(), *x, IntrinsicBinning(), *y,
IntrinsicBinning(), *z, IntrinsicBinning()));
PRINT_LINE_NO
}
auto yield = to_string(totYield * pdfIntegral);
result.emplace_back(histo);
} // Loop over templates
return result;
}
//////////
// Main //
//////////
int main(int argc, char **argv) {
cxxopts::Options argOpts("extract_fit_params", "extract fit parameters.");
// clang-format off
argOpts.add_options()
("h,help", "print help")
("w,workspace", "workspace file name with fit results",
cxxopts::value<string>())
("o,outputFolder", "specify output folder",
cxxopts::value<string>()->default_value(""))
("m,mode", "specify working mode: extracting 'sig' or 'ctrl' ws",
cxxopts::value<string>()->default_value("sig"))
("b,binVolume", "specify bin volume. Default to 0.3*3.25*75",
cxxopts::value<double>()->default_value(to_string(0.3*3.25*75)))
("d,debug", "also save histograms extracted from the workspace")
;
// clang-format on
auto parsedArgs = argOpts.parse(argc, argv);
if (parsedArgs.count("help")) {
cout << argOpts.help() << endl;
return 0;
}
auto wsFileName = TString(parsedArgs["workspace"].as<string>());
auto binVolume = parsedArgs["binVolume"].as<double>();
auto debug = parsedArgs["debug"].as<bool>();
auto mode = parsedArgs["mode"].as<string>();
vector<TString> skims;
if (mode == "sig")
skims = {""};
else if (mode == "ctrl")
skims = {"1OS", "2OS", "DD"};
else {
cout << "Unknown mode: " << mode << endl;
return 1;
}
auto outputFolder = parsedArgs["outputFolder"].as<string>();
if (outputFolder == "") outputFolder = dirname(wsFileName);
cout << "The output YAML will be written to: " << outputFolder << endl;
auto paramExtractor = ParamExtract(wsFileName, binVolume);
paramExtractor.writeYields(skims, outputFolder + "/" + OUTPUT_FILENAME);
if (debug)
paramExtractor.writeHistos(skims, outputFolder + "/" + OUTPUT_NTPNAME);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment