Last active
August 17, 2018 15:29
-
-
Save dpiparo/9f36377f18dc753eae780f4681a1f0d4 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
This macro demonstrates how to dump columns of fundamental types and vectors | |
thereof into forest using rdf. | |
We achieve this using custom actions. | |
In order to run it: root -b -q rdf2forest.C | |
WARNING: enrich your LinkDef with these lines | |
#ifdef __CLING__ | |
#pragma link C++ nestedtypedefs; | |
#pragma link C++ nestedclasses; | |
#pragma link C++ class ROOT::Experimental::RTree-; | |
#pragma link C++ class ROOT::Experimental::RColumnSink-; | |
#pragma link C++ class ROOT::Experimental::RColumnRawSettings-; | |
#pragma link C++ class ROOT::Experimental::RTreeModel-; | |
#pragma link C++ class ROOT::Experimental::RColumn-; | |
#pragma link C++ class ROOT::Experimental::RCargo<double>-; | |
#pragma link C++ class ROOT::Experimental::RCargo<int>-; | |
#endif | |
*/ | |
using RColumnSink = ROOT::Experimental::RColumnSink; | |
using RColumnRawSettings = ROOT::Experimental::RColumnRawSettings; | |
using RTree = ROOT::Experimental::RTree; | |
using RTreeModel = ROOT::Experimental::RTreeModel; | |
using RColumnSource = ROOT::Experimental::RColumnSource; | |
using ERangeType = ROOT::Experimental::ERangeType; | |
using ColNames_t = std::vector<std::string>; | |
// This is a custom action which respects a well defined interface. | |
// It does not support parallelism! | |
// We template it on the type of the columns to be written (forest requires this) | |
template <typename... ColumnTypes_t> | |
class RForestHelper : public ROOT::Detail::RDF::RActionImpl<RForestHelper<ColumnTypes_t...>> { | |
public: | |
using Result_t = RTree; | |
private: | |
using ColumnValues_t = std::tuple<std::shared_ptr<ColumnTypes_t>...>; | |
std::string fDatasetName; | |
ColNames_t fColNames; | |
ColumnValues_t fColumnValues; | |
static constexpr const auto fNColumns = std::tuple_size<ColumnValues_t>::value; | |
std::shared_ptr<RTree> fTree; | |
template<std::size_t... S> | |
void InitializeImpl(std::index_sequence<S...>) { | |
auto event_model = std::make_shared<RTreeModel>(); | |
std::initializer_list<int> expander { | |
(std::get<S>(fColumnValues) = event_model->Branch<ColumnTypes_t>(fColNames[S], 0.0) | |
, 0)...}; | |
RColumnRawSettings settings(fDatasetName); | |
settings.fCompressionSettings = 104; // ZLIB, level 4 | |
fTree = make_shared<RTree>(event_model, RColumnSink::MakeSinkRaw(settings)); | |
} | |
template<std::size_t... S> | |
void ExecImpl(std::index_sequence<S...>, ColumnTypes_t... values) { | |
std::initializer_list<int> expander{ | |
(*std::get<S>(fColumnValues) = values | |
, 0)...}; | |
} | |
public: | |
RForestHelper(std::string_view datasetName, const ColNames_t& colNames) : | |
fDatasetName(datasetName), fColNames(colNames) { | |
InitializeImpl(std::make_index_sequence<fNColumns>()); | |
} | |
RForestHelper(RForestHelper &&) = default; | |
RForestHelper(const RForestHelper &) = delete; | |
std::shared_ptr<RTree> GetResultPtr() const {return fTree;} | |
void Initialize() {} | |
void InitTask(TTreeReader *, unsigned int) {} | |
/// This is a method executed at every entry | |
void Exec(unsigned int slot, ColumnTypes_t... values) | |
{ | |
ExecImpl(std::make_index_sequence<fNColumns>(), values...); | |
fTree->Fill(); | |
} | |
void Finalize() | |
{ | |
// Do we need to flush data here? | |
} | |
}; | |
void rdf2forestwrite() | |
{ | |
ROOT::RDataFrame d(800); | |
auto i = 0; | |
auto dd = d.Define("int_col", [&](){return ++i;}) | |
.Define("double_col", [&](){return (double)i;}); | |
// We create the Forest Writer here | |
// for the templates, we could use jitting in order to have a general | |
// converter based on the column types and names. That is easy :) | |
ColNames_t colNames = {"int_col", "double_col"}; | |
using Helper_t = RForestHelper<int, double>; | |
Helper_t helper {"out.forest", colNames}; | |
// We book the action: it will be treated during the event loop. | |
auto myRTree = dd.Book<int, double>(std::move(helper), colNames); | |
dd.Snapshot("tree","out.root"); | |
// Trigger the evt-loop | |
*myRTree; | |
} | |
void rdf2forestread() | |
{ | |
auto event_model = std::make_shared<RTreeModel>(); | |
RTree tree(event_model, RColumnSource::MakeSourceRaw("out.forest")); | |
auto int_col = tree.GetView<int>("int_col"); | |
auto double_col = tree.GetView<double>("double_col"); | |
// The non-lazy option: the iteration fills automatically an REntry | |
for (auto e : tree.GetEntryRange(ERangeType::kLazy)) { | |
std::cout << "int_col = " << int_col(e) << " " << "double_col = " << double_col(e) << std::endl; | |
} | |
} | |
void rdf2forest() | |
{ | |
rdf2forestwrite(); | |
rdf2forestread(); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment