Last active
January 23, 2017 05:09
-
-
Save jstrube/c8bcb4f8a917b3b9c374 to your computer and use it in GitHub Desktop.
script to parse log files from SLAC and extract meta data for the DFC.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from DIRAC.Core.Base import Script | |
Script.initialize() | |
from DIRAC.Resources.Catalog.FileCatalogClient import FileCatalogClient | |
from DIRAC import gLogger | |
from urllib2 import urlopen | |
import os.path as path | |
import sys | |
from pprint import pprint | |
# Usage: python setILCGen_Metadata.py /grid/path/of/uploaded/files http://path/to/log/file | |
# Example: python setILCGen_Metadata.py /ilc/prod/ilc/mc-dbd/generated/500-TDR_ws/aa_lowpt/ https://www.slac.stanford.edu/~timb/500/aa_lowpt_production/aa_lowpt/E0500-TDR_ws.Paaddhad.Gwhizard-1.95.eB.pB.I39167/E0500-TDR_ws.Paaddhad.Gwhizard-1.95.eB.pB.I39167.txt | |
if len(sys.argv) < 3: | |
print "Usage: python", sys.argv[0], "/grid/path/of/uploaded/files http://path/to/log/file" | |
print "Example: python", sys.argv[0], "/ilc/prod/ilc/mc-dbd/generated/500-TDR_ws/aa_lowpt/ https://www.slac.stanford.edu/~timb/500/aa_lowpt_production/aa_lowpt/E0500-TDR_ws.Paaddhad.Gwhizard-1.95.eB.pB.I39167/E0500-TDR_ws.Paaddhad.Gwhizard-1.95.eB.pB.I39167.txt" | |
sys.exit(-1) | |
fc = FileCatalogClient() | |
#base path of the lfn to be given on the command line | |
gridPath = sys.argv[1] | |
# logfile (URI) to be given on the command line | |
logfile = sys.argv[2] | |
# convention for translating Tim's log entries to DFC meta tags | |
# per dir | |
LogDirConvention = { | |
"PythiaVer": "pythia_version", | |
"MachineParams": "machine_configuration", | |
"Energy": "CM_energy_in_GeV", | |
"Luminosity": "luminosity", | |
"BeamParticle1": "beam_particle1", | |
"BeamParticle2": "beam_particle2", | |
"PolarisationBeam1": "polarization1", | |
"PolarisationBeam2": "polarization2", | |
"XSection": "cross_section_in_fb", | |
"CrossSectionError": "cross_section_error_in_fb", | |
"GenProcessID": "process_id", | |
"GenProcessName": "process_names", | |
"TauDecaysLib": "tau_decays", | |
"ProgramNameVersion": "program_name_version", | |
"HadTune": "hadronisation_tune" | |
} | |
# per file | |
LogFileConvention = { | |
"NumberOfEvents": "number_of_events_in_files", | |
"FileNames": "file_names" | |
} | |
# parse the log file and find the lines that specify meta data that is used in the DFC | |
for line in urlopen(logfile): | |
items = line.strip().split('=') | |
if items[0] == "fileurl": | |
FileURL = items[1] | |
for c in LogDirConvention: | |
if items[0] == LogDirConvention[c]: | |
LogDirConvention[c] = items[1] | |
break | |
for c in LogFileConvention: | |
if items[0] == LogFileConvention[c]: | |
LogFileConvention[c] = items[1].split(';') | |
break | |
# the EvtClass is not given in the log file. Extract from the grandparent dir of the log file | |
LogDirConvention["EvtClass"] = path.dirname(FileURL).split('/')[-2] | |
LogDirConvention["Machine"] = path.dirname(FileURL).split('/')[4] | |
fileMeta = {} | |
# print results (as calls to dirac-ilc-upload-gen-files) | |
for idx, f in enumerate(LogFileConvention["FileNames"]): | |
fileMeta['NumberOfEvents'] = LogFileConvention["NumberOfEvents"][idx] | |
for c in LogDirConvention: | |
fileMeta[c] = LogDirConvention[c] | |
lfn = path.join(gridPath, f) | |
print "Setting metadata for file", f | |
pprint(fileMeta) | |
if raw_input("proceed (y/N)").upper() != 'Y': | |
continue | |
res = fc.setMetadata(lfn, fileMeta) | |
if not res['OK']: | |
gLogger.error( "Failed to set meta data %s to %s\n" %(lfn, fileMeta), res['Message'] ) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment