Skip to content

Instantly share code, notes, and snippets.

@crazyhottommy
Last active September 8, 2018 13:40
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save crazyhottommy/f0af1386ac5a6f18c07db1e566c54e3d to your computer and use it in GitHub Desktop.
Save crazyhottommy/f0af1386ac5a6f18c07db1e566c54e3d to your computer and use it in GitHub Desktop.
ChIP-seq-HPC_jobs
#! /bin/bash
set -e
set -u
set -o pipefail
root=`pwd`
mkdir macs14_pbs
cat bam_names.txt | while read -r IP Input
do
prefix=$(basename $IP .sorted.bam)
JobString="
#PBS -N ${prefix}_macs14
#PBS -l nodes=1:ppn=5,walltime=2:00:00
#PBS -l mem=10g
#PBS -M mtang1@mdanderson.org
#PBS -m a
#PBS -d $root
#PBS -o ${root}/logs
#PBS -e ${root}/logs
#PBS -V
macs14 -t ${IP} -c ${Input} -n ${prefix} -p 1e-6 -g mm
"
echo "$JobString" > ./macs14_pbs/${prefix}-macs.pbs
done
#! /bin/bash
set -e
set -u
set -o pipefail
# show help
show_help(){
cat << EOF
This is a wrapper to generate commands for ChIP-seq experiments.
It generates commands for bowtie1 mapping and generate bigwig files by deeptools.
usage: ${0##*/} -d < a directory path containing the fastq.gz files > -r < h or m>
-h display this help and exit
-d the path of the directory containing the fastq.gz files, can be relative path
-r reference genome to be used. m for mouse; h for human
EOF
}
## if there are no arguments provided, show help
if [[ $# == 0 ]]; then show_help; exit 1; fi
while getopts ":hd:r:" opt; do
case "$opt" in
h) show_help;exit 0;;
d) fqDIR=$OPTARG;;
r) REF=$OPTARG;;
'?') echo "Invalid option $OPTARG"; show_help >&2; exit 1;;
esac
done
## set up some defaults
REF=${REF:-"h"}
fqDIR=${fqDIR:-$(pwd)}
## check if the directory exist
## default is the current folder
if [ ! -d "$fqDIR" ]; then
echo "directory $fqDIR does not exit"
exit 1
fi
## check if the directory contains fastq.gz files
if ls "$fqDIR"/*fastq.gz 1> /dev/null 2>&1; then
echo "fastq.gz files found in the first level of specified directory $fqDIR"
else
echo "No fastq.gz files in the first level of specified directory $fqDIR"; exit 1
fi
## reference genome path for mouse and human
human_ref="/risapps/reference/bowtie1/hg19"
mouse_ref="/scratch/genomic_med/mtang1/scratch/mm9_bowtie_ref/mm9"
if [[ $REF == "m" ]]; then
ref_genome=${mouse_ref}
elif [[ $REF == "h" ]]; then
ref_genome=${human_ref}
else
echo "please only specify m or h for the reference genome"
exit 1
fi
## sometimes there are multiple fq.gz in the folder,
## sometimes there is only one
fqNum=$(find "$fqDIR" -name "*gz" | wc -l)
fqs=$(find "$fqDIR" -name "*gz")
if [[ $fqNum -eq 1 ]]; then
# full path of that single fastq file
fq=$(readlink -f "$fqs")
filename=$(basename "$fq" .fastq.gz)
prefix=${filename%%_L00*}
cat <<- EOF > ${prefix}_commands.txt
module load bowtie/1.1.2
module load samtools
bowtie -p 10 --best --chunkmbs 320 ${ref_genome} -q <(zcat $fq) -S ${prefix}.sam
samtools view -bS ${prefix}.sam > ${prefix}.bam
samtools sort -@9 -T ${prefix} ${prefix}.bam -o ${prefix}.sorted.bam
samtools index ${prefix}.sorted.bam
rm ${prefix}.sam
rm ${prefix}.bam
## make bigwig files
bamCoverage -b ${prefix}.sorted.bam --normalizeUsingRPKM --binSize 30 --smoothLength 300 -p 10 --extendReads 200 -o ${prefix}.bw
EOF
echo "commands file generated:${prefix}_commands.txt"
else
#merge all the fastqs to a single fastq
cd "$fqDIR"
OneFq=$(echo "$fqs" | head -1)
filename=$(basename "$OneFq" .fastq.gz)
prefix=${filename%%_L00*}
echo "merging all the fastq.gz files"
cat ./*gz > "${prefix}_merged.fastq.gz"
cat <<- EOF > ${prefix}_commands.txt
module load bowtie/1.1.2
module load samtools
bowtie -p 10 --best --chunkmbs 320 ${ref_genome} -q <(zcat ${prefix}_merged.fastq.gz) -S ${prefix}.sam
samtools view -bS ${prefix}.sam > ${prefix}.bam
samtools sort -@9 -T ${prefix} ${prefix}.bam -o ${prefix}.sorted.bam
samtools index ${prefix}.sorted.bam
rm ${prefix}.sam
rm ${prefix}.bam
## make bigwig files
bamCoverage -b ${prefix}.sorted.bam --normalizeUsingRPKM --binSize 30 --smoothLength 300 -p 10 --extendReads 200 -o ${prefix}.bw
EOF
echo "commands file generated:${prefix}_commands.txt"
#!/bin/bash
# Wrapper to make BSUB job format on HPC running LSF job scheduler.
# @sbamin | shark
## getopts schema is modified from from script by @r_sabarinathan
# usage
show_help() {
cat << EOF
Wrapper to make BSUB job format on HPC running LSF job scheduler.
Only required parameter is path to file containing commands to be run on cluster.
This file will be copied verbatim following BSUB arguments.
Default BSUB options are: medium queue with 2 hours walltime, arpprox 16GB RAM and 4 CPU cores with present work directory as current work directory.
Usage: ${0##*/} -a <path to files containing commands> > <job.bsub>"
-h display this help and exit
-j job name (default: j<random id>_username)
-w work directory (default: present work directory)
-q job queue (default: medium)
-t walltime in hours (default: 2:00)
-m memory in KB and in multiple of 8192 (default: 8192)
-c cpu cores per node (default: 4)
-o email notifications (default: -N)
-e extra options to BSUB (default: none)
-a REQUIRED: path to file containing commands to be run on cluster. This file will be copied verbatim following BSUB arguments.
Example: ${0##*/} -j "sample_job" -w "/home/foo/myworkdir" -q long -t 26:00 -m 65536 -c 24 -o "-B -N" -a "/home/foo/mycommands.txt" > /home/foo/sample.bsub
Quotes are important for variable names containig spaces and special characters.
EOF
}
if [[ $# == 0 ]];then show_help;exit 1;fi
# read input
expression=0
while getopts "j:w:q:t:m:c:o:e:a:h" opt; do
case "$opt" in
h) show_help;exit 0;;
j) JOBNAME=$OPTARG;;
w) CWD=$OPTARG;;
q) QUEUE=$OPTARG;;
t) WALLTIME=$OPTARG;;
m) MEMORY=$OPTARG;;
c) CPU=$OPTARG;;
o) EMAILOPTS=$OPTARG;;
e) EXTRA_OPTS=$OPTARG;;
a) MYARGS=$OPTARG;;
'?')show_help >&2 exit 1 ;;
esac
done
DJOBID=$(printf "j%s_%s" "$RANDOM" "$(whoami)")
JOBNAME=${JOBNAME:-$DJOBID}
CWD=${CWD:-$(pwd)}
STDOUT=$(printf "%s/log_%s.out" ${CWD} $JOBNAME)
STDERR=$(printf "%s/log_%s.err" ${CWD} $JOBNAME)
QUEUE=${QUEUE:-"medium"}
WALLTIME=${WALLTIME:-"2:00"}
MEMORY=${MEMORY:-"8192"}
CPU=${CPU:-"4"}
EMAILOPTS=${EMAILOPTS:-"-N"}
if [[ ! -s ${MYARGS} ]];then
echo -e "\nERROR: Command file either does not exist at ${MYARGS} location or empty.\n"
show_help
exit 1
fi
##### Following lsf block will be parsed based on arguments supplied #####
cat <<EOF
#!/bin/bash
#BSUB -J ${JOBNAME} # name of the job
#BSUB -cwd ${CWD} # the workding dir for each job, this is <flow_run_path>/uniqueid/tmp
#BSUB -o ${STDOUT} # output is sent to logfile, stdout + stderr by default
#BSUB -e ${STDERR} # output is sent to logfile, stdout + stderr by default
#BSUB -q ${QUEUE} # Job queue
#BSUB -W ${WALLTIME} # Walltime in minutes
#BSUB -M ${MEMORY} # Memory requirements in Kbytes
#BSUB -n ${CPU} # CPU reserved
#BSUB -R span[ptile=${CPU}] # CPU reserved, all reserved on same node
#BSUB -R rusage[mem=${MEMORY}] # memory reserved
#BSUB -u mtang1@mdanderson.org # for notifications
#BSUB ${EMAILOPTS} # send email when job ends
#BSUB -r # make the jobs re-runnable
#BSUB ${EXTRA_OPTS} # Any extra arguments passed onto queue
## following BSUB options are not being used at present.
##BSUB {{{DEPENDENCY}}} # Do not remove dependency args come here
## --- DO NOT EDIT from below here---- ##
## following will always overwrite previous output file, if any.
set +o noclobber
$(printf "echo \"BEGIN at \$(date)\" >> %s" "${STDOUT}")
## File containing commands will be copied here verbatim ##
###################### START USER SUPPLIED COMMANDS ######################
$(cat "${MYARGS}")
###################### END USER SUPPLIED COMMANDS ######################
exitstat=\$?
$(printf "echo \"END at \$(date)\" >> %s" "${STDOUT}")
$(printf "echo \"exit status was \${exitstat}\" >> %s" "${STDOUT}")
#### slack notifications ####
NOTIFY="YES"
if [[ ! -z \${SLKMSG+x} && "\$NOTIFY" == "YES" ]]; then
SLKCMD=\$(printf "%s/bin/pingme -m \"%s\n\$(hostname)\n\$(date)\" &" "\${HOME}" "\${SLKMSG}")
echo "\${SLKCMD}"
ssh shark19 "\${SLKCMD}" >> /dev/null 2>&1
fi
## Exit with exitstat from primary command
$(printf "exit \${exitstat}")
## END ##
EOF
#!/bin/bash
# Wrapper to make MSUB job format on HPC running Moab/Torque job scheduler.
# @sbamin | nautilus
## getopts schema is modified from from script by @r_sabarinathan
#set -e
#set -u
#set -o pipefail
# usage
show_help() {
cat << EOF
Wrapper to make BSUB job format on HPC running Moab/Torque job scheduler.
Only required parameter is path to file containing commands to be run on cluster.
This file will be copied verbatim following MSUB arguments.
Default MSUB options are: medium queue with 2 hours walltime, arpprox 16GB RAM and 4 CPU cores with present work directory as current work directory.
Usage: ${0##*/} -a <path to files containing commands> > <job.msub>"
-h display this help and exit
-j job name (default: j<random id>_username)
-w work directory (default: present work directory)
-t walltime in HH:MM:SS (default: 02:00:00)
-m memory in gb (default: 16gb)
-n number of nodes (default: 1)
-c cpu cores per node (default: 4)
-o email notifications (default: ae)
-e extra options to MSUB (default: none)
-a REQUIRED: path to file containing commands to be run on cluster. This file will be copied verbatim following MSUB arguments.
Example: ${0##*/} -j "sample_job" -w "/home/foo/myworkdir" -t 26:00:00 -m 64gb -n 1 -c 24 -o e -a "/home/foo/mycommands.txt" > /home/foo/sample.msub
Quotes are important for variable names containig spaces and special characters.
EOF
}
if [[ $# == 0 ]];then show_help;exit 1;fi
# read input
expression=0
while getopts "j:w:q:t:m:n:c:o:e:a:h" opt; do
case "$opt" in
h) show_help;exit 0;;
j) JOBNAME=$OPTARG;;
w) CWD=$OPTARG;;
t) WALLTIME=$OPTARG;;
m) MEMORY=$OPTARG;;
n) NODES=$OPTARG;;
c) CPU=$OPTARG;;
o) EMAILOPTS=$OPTARG;;
e) EXTRA_OPTS=$OPTARG;;
a) MYARGS=$OPTARG;;
'?')show_help >&2; exit 1 ;;
esac
done
DJOBID=$(printf "j%s_%s" "$RANDOM" "$(whoami)")
JOBNAME=${JOBNAME:-$DJOBID}
CWD=${CWD:-$(pwd)}
STDOUT=$(printf "%s/log_%s.out" ${CWD} $JOBNAME)
STDERR=$(printf "%s/log_%s.err" ${CWD} $JOBNAME)
WALLTIME=${WALLTIME:-"02:00:00"}
MEMORY=${MEMORY:-"16gb"}
NODES=${NODES:-"1"}
CPU=${CPU:-"4"}
EMAILOPTS=${EMAILOPTS:-"ae"}
if [[ ! -s ${MYARGS} ]];then
echo -e "\nERROR: Command file either does not exist at ${MYARGS} location or empty.\n"
show_help
exit 1
fi
##### Following lsf block will be parsed based on arguments supplied #####
cat <<EOF
#!/bin/bash
#MSUB -N ${JOBNAME} # name of the job
#MSUB -d ${CWD} # the workding dir for each job, this is <flow_run_path>/uniqueid/tmp
#MSUB -o ${STDOUT} # output is sent to logfile, stdout + stderr by default
#MSUB -e ${STDERR} # output is sent to logfile, stdout + stderr by default
#MSUB -l walltime=${WALLTIME} # Walltime in minutes
#MSUB -l mem=${MEMORY} # Memory requirements in Kbytes
#MSUB -l nodes=${NODES}:ppn=${CPU} # CPU reserved
#MSUB -M mtang1@mdanderson.org # for notifications
#MSUB -m ${EMAILOPTS} # send email when job ends
#MSUB -r y # make the jobs re-runnable
#MSUB -S /bin/bash # use bash shell
#MSUB -V
#MSUB ${EXTRA_OPTS} # Any extra arguments passed onto queue
## following MSUB options are not being used at present.
# For HPC Nautilus at MDAnderson: Remove QUEUE option of MSUB else job will fail. Queue will be determined based on walltime argument.
##MSUB ${DEPENDENCY} # Do not remove dependency args come here
## --- DO NOT EDIT from below here---- ##
## following will always overwrite previous output file, if any.
set +o noclobber
echo "BEGIN at \$(date)"
## File containing commands will be copied here verbatim ##
###################### START USER SUPPLIED COMMANDS ######################
$(cat ${MYARGS})
###################### END USER SUPPLIED COMMANDS ######################
exitstat=\$?
echo "END at \$(date)"
echo "exit status was \${exitstat}"
exit \${exitstat}
## END ##
EOF
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment