Skip to content

Instantly share code, notes, and snippets.

@marty1885
Created March 2, 2020 13:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save marty1885/6b769de66ccb0464dd7e2535d101dbaf to your computer and use it in GitHub Desktop.
Save marty1885/6b769de66ccb0464dd7e2535d101dbaf to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"if(access(\"train.root\", F_OK ) == -1) {\n",
" auto csv_df = ROOT::RDF::MakeCsvDataFrame(\"train.csv\");\n",
" csv_df.Snapshot(\"data\", \"train.root\");\n",
"}\n",
"train_df = ROOT::RDF::MakeRootDataFrame(\"data\", \"train.root\");"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(ROOT::RDF::RInterface::ColumnNames_t) { \"time\", \"signal\", \"open_channels\" }\n"
]
}
],
"source": [
"train_df.GetColumnNames()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"blue = TColor::GetColor(\"#1f77b4\");\n",
"orange = TColor::GetColor(\"#ff7f0e\");\n",
"green = TColor::GetColor(\"#2ca02c\");\n",
"red = TColor::GetColor(\"#ff0000\");"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<IPython.core.display.Image object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"c1 = new TCanvas(\"c1\", \"canvas\", 1800, 460);\n",
"g1 = train_df.Graph(\"time\", \"signal\");\n",
"g2 = train_df.Graph(\"time\", \"open_channels\");\n",
"\n",
"g1->SetLineColor(blue);\n",
"g1->SetMarkerStyle(0);\n",
"g2->SetLineColor(orange);\n",
"\n",
"g1->Draw(\"AL\");\n",
"g2->Draw(\"same L\");\n",
"\n",
"gPad->BuildLegend();\n",
"\n",
"c1->Draw();"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"// Now let's do some feature engineering\n",
"df = train_df\n",
" .Define(\"index\", [](double time)->int{return int(time*10000)-1;}, {\"time\"})\n",
" .Define(\"batch\", [](int index)->int{return index/50000;}, {\"index\"})\n",
" .Define(\"batch_index\", [](int index, int batch)->int{return index - batch*50000;}, {\"index\", \"batch\"});\n"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"n_batches = int(*df.Max(\"batch\"))+1;\n",
"n_sample_per_batch = 50000;\n",
"\n",
"//HACK: Storing intermid values using gloval values\n",
"batch_means = vector<double>(n_batches);\n",
"batch_maxs = vector<double>(n_batches, -1000);\n",
"batch_mins = vector<double>(n_batches, 1000);\n",
"batch_stds = vector<double>(n_batches);\n",
"\n",
"df.Foreach([&](int batch, double value){\n",
" batch_means[batch] += value;\n",
" batch_maxs[batch] = max(batch_maxs[batch], value);\n",
" batch_mins[batch] = min(batch_mins[batch], value);\n",
"}, {\"batch\", \"signal\"});\n",
"\n",
"for(auto& val : batch_means)\n",
" val /= n_sample_per_batch;\n",
"\n",
"// With the mean avaliable. we can now compute the stddev\n",
"df.Foreach([&](int batch, double value){\n",
" batch_stds[batch] += pow(value - batch_means[batch], 2);\n",
"});\n",
"for(auto& val : batch_stds)\n",
" val /= n_sample_per_batch;\n",
"\n",
"df = df.Define(\"batch_mean\", \"batch_means[batch]\"). \\\n",
" Define(\"batch_maxs\", \"batch_maxs[batch]\"). \\\n",
" Define(\"batch_mins\", \"batch_mins[batch]\"). \\\n",
" Define(\"batch_stds\", \"batch_stds[batch]\");"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(ROOT::RDF::RInterface::ColumnNames_t) { \"time\", \"index\", \"batch\", \"batch_index\", \"time\", \"signal\", \"open_channels\" }\n"
]
}
],
"source": [
"df.GetColumnNames()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"df.Snapshot(\"data\", \"data_with_featutres.root\");"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "ROOT C++",
"language": "c++",
"name": "root"
},
"language_info": {
"codemirror_mode": "text/x-c++src",
"file_extension": ".C",
"mimetype": " text/x-c++src",
"name": "c++"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment