Skip to content

Instantly share code, notes, and snippets.

@jpata
Last active September 4, 2019 20:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save jpata/2230e028f7a676336799210262d811c2 to your computer and use it in GitHub Desktop.
Save jpata/2230e028f7a676336799210262d811c2 to your computer and use it in GitHub Desktop.
Example Tier2 batch job
Universe = vanilla
Executable = example_job.sh
Arguments = 10
#The logs directory must exist
Log = logs/example_job.$(Cluster).log
Output = logs/example_job.out.$(Cluster).$(Process)
Error = logs/example_job.err.$(Cluster).$(Process)
Requirements=(TARGET.OpSysAndVer=="CentOS7" && regexp("blade-.*", TARGET.Machine))
#This is necessary to choose either rhel7 (slc7) or rhel6 (slc6) as needed
+RunAsOwner = True
+InteractiveUser = true
+SingularityImage = "/cvmfs/singularity.opensciencegrid.org/bbockelm/cms:rhel7"
+SingularityBindCVMFS = True
run_as_owner = True
#Provide information on proxy in order to access storage
x509userproxy = $ENV(X509_USER_PROXY)
#Don't request more than needed, otherwise your job will wait longer in queue
RequestDisk = 4
RequestMemory = 2000
RequestCpus = 1
#transfer this file back to the login node
#use this for small files, like plots or txt files to an existing output directory on login-1
#Big outputs should be transferred within the job to /mnt/hadoop using `gfal-copy`
should_transfer_files = YES
when_to_transfer_output = ON_EXIT
transfer_output_files = output_small.txt
transfer_input_files = my_calibrations.txt
transfer_output_remaps = "output_small.txt=outputs/output_small.txt.$(Cluster).$(Process)"
#This number can be used to queue more than one job
Queue 1
#!/bin/sh
#Print out all bash commands
set -x
#Abort bash script on any error
set -e
#Print some basic debugging info
echo "whoami="`whoami`
echo "pwd="`pwd`
echo "hostname="`hostname`
echo "date="`date`
env
#print out proxy
voms-proxy-info -all
#Inside singularity, the scratch directory is here
#This is also where the job starts out
echo "TMP:" `df -h $TMP`
echo "looking inside scratch directory BEFORE job"
ls -al $TMP
#This will fail, cannot write to home directory (/data/...) from jobs
#echo "test" > /data/$USER/testfile.txt
#This will fail, cannot write to storage (/mnt/hadoop/...) from jobs
#echo "test" > /mnt/hadoop/store/user/$USER/testfile.txt
#Run cmsenv in an existing CMSSW directory on login-1
cd /data/jpata/CMSSW_10_2_0/src
source /cvmfs/cms.cern.ch/cmsset_default.sh
eval `scramv1 runtime -sh`
#go back to scratch directory on worker node
cd $TMP
#your transfer_input_files are located in the working directory
cat my_calibrations.txt
#Run some ROOT code, produce output
echo "my job output datacard or plot" > $TMP/output_small.txt
echo "this is a placeholder for a big output file" > $TMP/output_big.dat
#Return to non-CMSSW environment, which is required for gfal-copy
eval `scram unsetenv -sh`
#Output can be copied using `gfal-copy` to /mnt/hadoop (large files only) or handled by condor using `transfer_output_files = output.txt` (for small files only)
#do NOT copy more than one file per job to hadoop - instead create compressed archives in case you need to produce multiple outputs per job
gfal-copy -f --checksum-mode=both file://$TMP/output_big.dat gsiftp://transfer.ultralight.org//store/user/jpata/output_big.dat
echo "looking inside scratch directory AFTER job"
ls -al $TMP
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment