Skip to content

Instantly share code, notes, and snippets.

@dpiparo
Last active August 17, 2018 15:29
Show Gist options
  • Save dpiparo/9f36377f18dc753eae780f4681a1f0d4 to your computer and use it in GitHub Desktop.
Save dpiparo/9f36377f18dc753eae780f4681a1f0d4 to your computer and use it in GitHub Desktop.
/*
This macro demonstrates how to dump columns of fundamental types and vectors
thereof into forest using rdf.
We achieve this using custom actions.
In order to run it: root -b -q rdf2forest.C
WARNING: enrich your LinkDef with these lines
#ifdef __CLING__
#pragma link C++ nestedtypedefs;
#pragma link C++ nestedclasses;
#pragma link C++ class ROOT::Experimental::RTree-;
#pragma link C++ class ROOT::Experimental::RColumnSink-;
#pragma link C++ class ROOT::Experimental::RColumnRawSettings-;
#pragma link C++ class ROOT::Experimental::RTreeModel-;
#pragma link C++ class ROOT::Experimental::RColumn-;
#pragma link C++ class ROOT::Experimental::RCargo<double>-;
#pragma link C++ class ROOT::Experimental::RCargo<int>-;
#endif
*/
using RColumnSink = ROOT::Experimental::RColumnSink;
using RColumnRawSettings = ROOT::Experimental::RColumnRawSettings;
using RTree = ROOT::Experimental::RTree;
using RTreeModel = ROOT::Experimental::RTreeModel;
using RColumnSource = ROOT::Experimental::RColumnSource;
using ERangeType = ROOT::Experimental::ERangeType;
using ColNames_t = std::vector<std::string>;
// This is a custom action which respects a well defined interface.
// It does not support parallelism!
// We template it on the type of the columns to be written (forest requires this)
template <typename... ColumnTypes_t>
class RForestHelper : public ROOT::Detail::RDF::RActionImpl<RForestHelper<ColumnTypes_t...>> {
public:
using Result_t = RTree;
private:
using ColumnValues_t = std::tuple<std::shared_ptr<ColumnTypes_t>...>;
std::string fDatasetName;
ColNames_t fColNames;
ColumnValues_t fColumnValues;
static constexpr const auto fNColumns = std::tuple_size<ColumnValues_t>::value;
std::shared_ptr<RTree> fTree;
template<std::size_t... S>
void InitializeImpl(std::index_sequence<S...>) {
auto event_model = std::make_shared<RTreeModel>();
std::initializer_list<int> expander {
(std::get<S>(fColumnValues) = event_model->Branch<ColumnTypes_t>(fColNames[S], 0.0)
, 0)...};
RColumnRawSettings settings(fDatasetName);
settings.fCompressionSettings = 104; // ZLIB, level 4
fTree = make_shared<RTree>(event_model, RColumnSink::MakeSinkRaw(settings));
}
template<std::size_t... S>
void ExecImpl(std::index_sequence<S...>, ColumnTypes_t... values) {
std::initializer_list<int> expander{
(*std::get<S>(fColumnValues) = values
, 0)...};
}
public:
RForestHelper(std::string_view datasetName, const ColNames_t& colNames) :
fDatasetName(datasetName), fColNames(colNames) {
InitializeImpl(std::make_index_sequence<fNColumns>());
}
RForestHelper(RForestHelper &&) = default;
RForestHelper(const RForestHelper &) = delete;
std::shared_ptr<RTree> GetResultPtr() const {return fTree;}
void Initialize() {}
void InitTask(TTreeReader *, unsigned int) {}
/// This is a method executed at every entry
void Exec(unsigned int slot, ColumnTypes_t... values)
{
ExecImpl(std::make_index_sequence<fNColumns>(), values...);
fTree->Fill();
}
void Finalize()
{
// Do we need to flush data here?
}
};
void rdf2forestwrite()
{
ROOT::RDataFrame d(800);
auto i = 0;
auto dd = d.Define("int_col", [&](){return ++i;})
.Define("double_col", [&](){return (double)i;});
// We create the Forest Writer here
// for the templates, we could use jitting in order to have a general
// converter based on the column types and names. That is easy :)
ColNames_t colNames = {"int_col", "double_col"};
using Helper_t = RForestHelper<int, double>;
Helper_t helper {"out.forest", colNames};
// We book the action: it will be treated during the event loop.
auto myRTree = dd.Book<int, double>(std::move(helper), colNames);
dd.Snapshot("tree","out.root");
// Trigger the evt-loop
*myRTree;
}
void rdf2forestread()
{
auto event_model = std::make_shared<RTreeModel>();
RTree tree(event_model, RColumnSource::MakeSourceRaw("out.forest"));
auto int_col = tree.GetView<int>("int_col");
auto double_col = tree.GetView<double>("double_col");
// The non-lazy option: the iteration fills automatically an REntry
for (auto e : tree.GetEntryRange(ERangeType::kLazy)) {
std::cout << "int_col = " << int_col(e) << " " << "double_col = " << double_col(e) << std::endl;
}
}
void rdf2forest()
{
rdf2forestwrite();
rdf2forestread();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment