Skip to content

Instantly share code, notes, and snippets.

@jpata
Created June 3, 2016 16:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jpata/00887e8f3786877b70f63fc5908c6e9d to your computer and use it in GitHub Desktop.
Save jpata/00887e8f3786877b70f63fc5908c6e9d to your computer and use it in GitHub Desktop.
import dask
import sys, glob
import numpy as np
def fixPath():
newpath = []
for v in sys.path:
if "cvmfs" in v and "pandas" in v:
continue
newpath += [v]
return newpath
sys.path = fixPath()
import ROOT, pandas
ROOT.gROOT.SetBatch(True)
import dask.multiprocessing
import root_numpy as rnpy
import dask.dataframe as dd
flist = [
"/store/user/jpata/tth/VHBBHeppyV21_tthbbV8_May2/ttHTobb_M125_13TeV_powheg_pythia8/VHBBHeppyV21_tthbbV8_May2_ttHTobb_M125_13TeV_powheg_Py8__fall15MAv2-pu25ns15v1_76r2as_v12-v1/160502_210336/0000/tree_1.root",
"/store/user/jpata/tth/VHBBHeppyV21_tthbbV8_May2/ttHTobb_M125_13TeV_powheg_pythia8/VHBBHeppyV21_tthbbV8_May2_ttHTobb_M125_13TeV_powheg_Py8__fall15MAv2-pu25ns15v1_76r2as_v12-v1/160502_210336/0000/tree_2.root",
"/store/user/jpata/tth/VHBBHeppyV21_tthbbV8_May2/ttHTobb_M125_13TeV_powheg_pythia8/VHBBHeppyV21_tthbbV8_May2_ttHTobb_M125_13TeV_powheg_Py8__fall15MAv2-pu25ns15v1_76r2as_v12-v1/160502_210336/0000/tree_3.root",
"/store/user/jpata/tth/VHBBHeppyV21_tthbbV8_May2/ttHTobb_M125_13TeV_powheg_pythia8/VHBBHeppyV21_tthbbV8_May2_ttHTobb_M125_13TeV_powheg_Py8__fall15MAv2-pu25ns15v1_76r2as_v12-v1/160502_210336/0000/tree_4.root",
"/store/user/jpata/tth/VHBBHeppyV21_tthbbV8_May2/ttHTobb_M125_13TeV_powheg_pythia8/VHBBHeppyV21_tthbbV8_May2_ttHTobb_M125_13TeV_powheg_Py8__fall15MAv2-pu25ns15v1_76r2as_v12-v1/160502_210336/0000/tree_5.root",
"/store/user/jpata/tth/VHBBHeppyV21_tthbbV8_May2/ttHTobb_M125_13TeV_powheg_pythia8/VHBBHeppyV21_tthbbV8_May2_ttHTobb_M125_13TeV_powheg_Py8__fall15MAv2-pu25ns15v1_76r2as_v12-v1/160502_210336/0000/tree_6.root",
"/store/user/jpata/tth/VHBBHeppyV21_tthbbV8_May2/ttHTobb_M125_13TeV_powheg_pythia8/VHBBHeppyV21_tthbbV8_May2_ttHTobb_M125_13TeV_powheg_Py8__fall15MAv2-pu25ns15v1_76r2as_v12-v1/160502_210336/0000/tree_7.root",
"/store/user/jpata/tth/VHBBHeppyV21_tthbbV8_May2/ttHTobb_M125_13TeV_powheg_pythia8/VHBBHeppyV21_tthbbV8_May2_ttHTobb_M125_13TeV_powheg_Py8__fall15MAv2-pu25ns15v1_76r2as_v12-v1/160502_210336/0000/tree_8.root",
"/store/user/jpata/tth/VHBBHeppyV21_tthbbV8_May2/ttHTobb_M125_13TeV_powheg_pythia8/VHBBHeppyV21_tthbbV8_May2_ttHTobb_M125_13TeV_powheg_Py8__fall15MAv2-pu25ns15v1_76r2as_v12-v1/160502_210336/0000/tree_9.root",
"/store/user/jpata/tth/VHBBHeppyV21_tthbbV8_May2/ttHTobb_M125_13TeV_powheg_pythia8/VHBBHeppyV21_tthbbV8_May2_ttHTobb_M125_13TeV_powheg_Py8__fall15MAv2-pu25ns15v1_76r2as_v12-v1/160502_210336/0000/tree_10.root",
]
def read_root(fn, **kwargs):
#print fn
kwargs_2 = {}
if kwargs.has_key("columns"):
kwargs_2["branches"] = kwargs["columns"]
#pref = "root://cms-xrd-global.cern.ch/"
pref = "root://storage01.lcg.cscs.ch/pnfs/lcg.cscs.ch/cms/trivcat"
#pref = "file://"
tf = ROOT.TFile.Open(pref + fn)
if tf == None:
raise Exception("Could not open {0}".format(pref + fn))
data = rnpy.tree2rec(tf.Get("tree"), **kwargs_2)
tf.Close()
return pandas.DataFrame(data)
good_flist = []
for f in flist:
try:
d = read_root(f, columns=['numJets', 'jets_pt'])
good_flist += [f]
except:
print f
print len(good_flist)
name = "mydf"
dsk = {}
for i, fi in enumerate(good_flist):
dsk[(name, i)] = (read_root, fi)
columns = ['numJets', 'jets_pt', 'jets_eta', 'nBCSVM']
divisions = range(len(flist)+1)
df = dd.DataFrame(dsk, name, columns, divisions)
import time
print df.columns
t0 = time.time()
print df[df.nBCSVM >= 4].numJets.mean().compute()
t1 = time.time()
print t1-t0
from multiprocessing.pool import ThreadPool
dask.set_options(pool=ThreadPool(10))
t0 = time.time()
print df[df.nBCSVM >= 4].numJets.mean().compute(num_workers=10)
t1 = time.time()
print t1-t0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment