Skip to content

Instantly share code, notes, and snippets.

Last active September 23, 2019 21:15
Show Gist options
  • Save aminnj/fcabea39630c1cade3ecf70069db7c5b to your computer and use it in GitHub Desktop.
Save aminnj/fcabea39630c1cade3ecf70069db7c5b to your computer and use it in GitHub Desktop.
Download psets for all tasks in a chain for a given dataset
from __future__ import print_function
import requests
import os
session = requests.Session()
session.cert = "/tmp/x509up_u{0}".format(os.getuid()) # should match `voms-proxy-info -path`
session.verify = "/etc/grid-security/certificates/" # or curl this locally:
def get_url_with_cert(url,params={}):
return session.get(url,params=params)
def get_chain_info(dataset):
url = ""
params = dict(outputdataset=dataset)
print("Fetching chain information for {}".format(dataset))
r = get_url_with_cert(url,params=params)
js = r.json()
items = js["result"][0].items()
items = sorted(items,key=lambda x:x[0].rsplit("_",3)[1:])
firstproc = items[0][1]
ntasks = firstproc.get("TaskChain")
tasks = []
if ntasks is None:
firstproc["Pset"] = "{}/configFile".format(firstproc["ConfigCacheID"])
for i in range(1,ntasks+1):
task = firstproc["Task{}".format(i)]
task["Pset"] = "{}/configFile".format(task["ConfigCacheID"])
return tasks
def download_pset(task):
scram_arch = task["ScramArch"][0]
cmssw_version = task["CMSSWVersion"]
cacheid = task["ConfigCacheID"]
url = "{}/configFile".format(cacheid)
print("Downloading {}".format(url))
content = get_url_with_cert(url).content
drivercmd = content.split("with command line options",1)[1].split("\n",1)[0]
outputtiers = list(set(drivercmd.split("--datatier",1)[1].split(None,1)[0].split(","))-set(["LHE","DQM","DQMIO"]))
tier = outputtiers[0].replace("-","").replace("GENSIMRAW","RAWSIM")
pset_fname = "pset_{}.py".format(tier.lower())
if not os.path.exists(pset_fname):
with open(pset_fname,"w") as fh:
print("Saved to {}. Use with {} and SCRAM_ARCH={}".format(pset_fname,cmssw_version,scram_arch))
print("Not saving since {} already exists!".format(pset_fname))
if __name__ == "__main__":
dataset = "/TTTT_TuneCP5_13TeV-amcatnlo-pythia8/RunIIAutumn18MiniAOD-102X_upgrade2018_realistic_v15_ext1-v2/MINIAODSIM"
for task in get_chain_info(dataset):
print("Remember to edit the input/output names and number of threads/cores used!")
Copy link

aminnj commented Sep 19, 2019


  • Valid proxy with voms-proxy-init
  • Valid dataset name (data or MC) in the script
  • Python requests package (run python in any CMSSW environment, which should have requests)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment