Skip to content

Instantly share code, notes, and snippets.

@marty1885
Last active September 3, 2019 18:13
Show Gist options
  • Save marty1885/5f063f58cc27f3dd9e9782a97d2b35ca to your computer and use it in GitHub Desktop.
Save marty1885/5f063f58cc27f3dd9e9782a97d2b35ca to your computer and use it in GitHub Desktop.
// A basic parser, it might get stuff wrong, but good enought for EDA
Int_t parse_year(std::string year)
{
if(year == "")
throw std::runtime_error("year is empty");
if(year[0] == '[' || year[0] == 'c' || year[0] == 'p') // Handle format of [2000], c2000 and p2000
return std::stoi(std::string(year.begin()+1, year.begin()+5));
else if(isdigit(year[0]) == true)
return std::stoi(std::string(year.begin(), year.begin()+4));
throw std::runtime_error("cannot parse format");
}
void to_root()
{
// Open the raw archive
using string = std::string;
auto rdf = ROOT::RDataFrame("library_raw", "library_raw.root");
//Create a new archive that we'll be coping to
auto f = new TFile("library.root", "recreate");
auto t = new TTree("library", "checkout_data");
// All the fields
TDatime date;
string usage, checkout_type, creator, material, title, publisher;
std::vector<std::string> subjects;
Int_t num_checkout, publication_year;
// Assign column/branchs to the fields
t->Branch("usage", &usage);
t->Branch("checkout_type", &checkout_type);
t->Branch("material", &material);
t->Branch("checkout_month", &date);
t->Branch("num_checkout", &num_checkout);
t->Branch("title", &title);
t->Branch("creator", &creator);
t->Branch("subjects", &subjects);
t->Branch("publisher", &publisher);
t->Branch("publication_year", &publication_year);
ProgressDisplay disp(*rdf.Count());
size_t i=0;
rdf.Foreach([&](string usage_, string checkout_type_, string material_, string checkout_year_, string checkout_month_, string checkouts_, string title_
, string creator_, string subjects_, string publisher_, string publication_year_) {
// Copy data to field
usage = usage_;
checkout_type = checkout_type_;
material = material_;
num_checkout = std::stoi(checkouts_);
title = title_;
creator = creator_;
subjects = split(subjects_);
publisher = publisher_;
try {publication_year = parse_year(publication_year_);}
catch(...) {publication_year = 0x7fffffff;}
date.Set(std::stoi(checkout_year_), std::stoi(checkout_month_),0, 0, 0, 0);
t->Fill();
i++;
if(i%100000 == 0)
disp.update(i);
}, {"UsageClass", "CheckoutType", "MaterialType", "CheckoutYear", "CheckoutMonth", "Checkouts", "Title", "Creator", "Subjects", "Publisher", "PublicationYear"});
f->Write();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment