Skip to content

Instantly share code, notes, and snippets.

@jstrube
Last active January 23, 2017 05:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save jstrube/c8bcb4f8a917b3b9c374 to your computer and use it in GitHub Desktop.
Save jstrube/c8bcb4f8a917b3b9c374 to your computer and use it in GitHub Desktop.
script to parse log files from SLAC and extract meta data for the DFC.
from DIRAC.Core.Base import Script
Script.initialize()
from DIRAC.Resources.Catalog.FileCatalogClient import FileCatalogClient
from DIRAC import gLogger
from urllib2 import urlopen
import os.path as path
import sys
from pprint import pprint
# Usage: python setILCGen_Metadata.py /grid/path/of/uploaded/files http://path/to/log/file
# Example: python setILCGen_Metadata.py /ilc/prod/ilc/mc-dbd/generated/500-TDR_ws/aa_lowpt/ https://www.slac.stanford.edu/~timb/500/aa_lowpt_production/aa_lowpt/E0500-TDR_ws.Paaddhad.Gwhizard-1.95.eB.pB.I39167/E0500-TDR_ws.Paaddhad.Gwhizard-1.95.eB.pB.I39167.txt
if len(sys.argv) < 3:
print "Usage: python", sys.argv[0], "/grid/path/of/uploaded/files http://path/to/log/file"
print "Example: python", sys.argv[0], "/ilc/prod/ilc/mc-dbd/generated/500-TDR_ws/aa_lowpt/ https://www.slac.stanford.edu/~timb/500/aa_lowpt_production/aa_lowpt/E0500-TDR_ws.Paaddhad.Gwhizard-1.95.eB.pB.I39167/E0500-TDR_ws.Paaddhad.Gwhizard-1.95.eB.pB.I39167.txt"
sys.exit(-1)
fc = FileCatalogClient()
#base path of the lfn to be given on the command line
gridPath = sys.argv[1]
# logfile (URI) to be given on the command line
logfile = sys.argv[2]
# convention for translating Tim's log entries to DFC meta tags
# per dir
LogDirConvention = {
"PythiaVer": "pythia_version",
"MachineParams": "machine_configuration",
"Energy": "CM_energy_in_GeV",
"Luminosity": "luminosity",
"BeamParticle1": "beam_particle1",
"BeamParticle2": "beam_particle2",
"PolarisationBeam1": "polarization1",
"PolarisationBeam2": "polarization2",
"XSection": "cross_section_in_fb",
"CrossSectionError": "cross_section_error_in_fb",
"GenProcessID": "process_id",
"GenProcessName": "process_names",
"TauDecaysLib": "tau_decays",
"ProgramNameVersion": "program_name_version",
"HadTune": "hadronisation_tune"
}
# per file
LogFileConvention = {
"NumberOfEvents": "number_of_events_in_files",
"FileNames": "file_names"
}
# parse the log file and find the lines that specify meta data that is used in the DFC
for line in urlopen(logfile):
items = line.strip().split('=')
if items[0] == "fileurl":
FileURL = items[1]
for c in LogDirConvention:
if items[0] == LogDirConvention[c]:
LogDirConvention[c] = items[1]
break
for c in LogFileConvention:
if items[0] == LogFileConvention[c]:
LogFileConvention[c] = items[1].split(';')
break
# the EvtClass is not given in the log file. Extract from the grandparent dir of the log file
LogDirConvention["EvtClass"] = path.dirname(FileURL).split('/')[-2]
LogDirConvention["Machine"] = path.dirname(FileURL).split('/')[4]
fileMeta = {}
# print results (as calls to dirac-ilc-upload-gen-files)
for idx, f in enumerate(LogFileConvention["FileNames"]):
fileMeta['NumberOfEvents'] = LogFileConvention["NumberOfEvents"][idx]
for c in LogDirConvention:
fileMeta[c] = LogDirConvention[c]
lfn = path.join(gridPath, f)
print "Setting metadata for file", f
pprint(fileMeta)
if raw_input("proceed (y/N)").upper() != 'Y':
continue
res = fc.setMetadata(lfn, fileMeta)
if not res['OK']:
gLogger.error( "Failed to set meta data %s to %s\n" %(lfn, fileMeta), res['Message'] )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment