Skip to content

Instantly share code, notes, and snippets.

@aminnj
Last active September 23, 2019 21:15
Show Gist options
  • Save aminnj/fcabea39630c1cade3ecf70069db7c5b to your computer and use it in GitHub Desktop.
Save aminnj/fcabea39630c1cade3ecf70069db7c5b to your computer and use it in GitHub Desktop.
Download psets for all tasks in a chain for a given dataset
from __future__ import print_function
import requests
import os
session = requests.Session()
session.cert = "/tmp/x509up_u{0}".format(os.getuid()) # should match `voms-proxy-info -path`
session.verify = "/etc/grid-security/certificates/" # or curl this locally: https://raw.githubusercontent.com/CMSTrackerDPG/cernrequests/master/cernrequests/cern-cacert.pem
def get_url_with_cert(url,params={}):
return session.get(url,params=params)
def get_chain_info(dataset):
url = "https://cmsweb.cern.ch/reqmgr2/data/request"
params = dict(outputdataset=dataset)
print("Fetching chain information for {}".format(dataset))
r = get_url_with_cert(url,params=params)
js = r.json()
items = js["result"][0].items()
items = sorted(items,key=lambda x:x[0].rsplit("_",3)[1:])
firstproc = items[0][1]
ntasks = firstproc.get("TaskChain")
tasks = []
if ntasks is None:
firstproc["Pset"] = "https://cmsweb.cern.ch/couchdb/reqmgr_config_cache/{}/configFile".format(firstproc["ConfigCacheID"])
tasks.append(firstproc)
else:
for i in range(1,ntasks+1):
task = firstproc["Task{}".format(i)]
task["Pset"] = "https://cmsweb.cern.ch/couchdb/reqmgr_config_cache/{}/configFile".format(task["ConfigCacheID"])
tasks.append(task)
return tasks
def download_pset(task):
scram_arch = task["ScramArch"][0]
cmssw_version = task["CMSSWVersion"]
cacheid = task["ConfigCacheID"]
url = "https://cmsweb.cern.ch/couchdb/reqmgr_config_cache/{}/configFile".format(cacheid)
print("Downloading {}".format(url))
content = get_url_with_cert(url).content
drivercmd = content.split("with command line options",1)[1].split("\n",1)[0]
outputtiers = list(set(drivercmd.split("--datatier",1)[1].split(None,1)[0].split(","))-set(["LHE","DQM","DQMIO"]))
tier = outputtiers[0].replace("-","").replace("GENSIMRAW","RAWSIM")
pset_fname = "pset_{}.py".format(tier.lower())
if not os.path.exists(pset_fname):
with open(pset_fname,"w") as fh:
fh.write(content)
print("Saved to {}. Use with {} and SCRAM_ARCH={}".format(pset_fname,cmssw_version,scram_arch))
else:
print("Not saving since {} already exists!".format(pset_fname))
if __name__ == "__main__":
dataset = "/TTTT_TuneCP5_13TeV-amcatnlo-pythia8/RunIIAutumn18MiniAOD-102X_upgrade2018_realistic_v15_ext1-v2/MINIAODSIM"
for task in get_chain_info(dataset):
download_pset(task)
print("Remember to edit the input/output names and number of threads/cores used!")
@aminnj
Copy link
Author

aminnj commented Sep 19, 2019

Requirements:

  • Valid proxy with voms-proxy-init
  • Valid dataset name (data or MC) in the script
  • Python requests package (run python download_psets.py in any CMSSW environment, which should have requests)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment