Last active
September 8, 2018 13:40
-
-
Save crazyhottommy/f0af1386ac5a6f18c07db1e566c54e3d to your computer and use it in GitHub Desktop.
ChIP-seq-HPC_jobs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /bin/bash | |
set -e | |
set -u | |
set -o pipefail | |
root=`pwd` | |
mkdir macs14_pbs | |
cat bam_names.txt | while read -r IP Input | |
do | |
prefix=$(basename $IP .sorted.bam) | |
JobString=" | |
#PBS -N ${prefix}_macs14 | |
#PBS -l nodes=1:ppn=5,walltime=2:00:00 | |
#PBS -l mem=10g | |
#PBS -M mtang1@mdanderson.org | |
#PBS -m a | |
#PBS -d $root | |
#PBS -o ${root}/logs | |
#PBS -e ${root}/logs | |
#PBS -V | |
macs14 -t ${IP} -c ${Input} -n ${prefix} -p 1e-6 -g mm | |
" | |
echo "$JobString" > ./macs14_pbs/${prefix}-macs.pbs | |
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /bin/bash | |
set -e | |
set -u | |
set -o pipefail | |
# show help | |
show_help(){ | |
cat << EOF | |
This is a wrapper to generate commands for ChIP-seq experiments. | |
It generates commands for bowtie1 mapping and generate bigwig files by deeptools. | |
usage: ${0##*/} -d < a directory path containing the fastq.gz files > -r < h or m> | |
-h display this help and exit | |
-d the path of the directory containing the fastq.gz files, can be relative path | |
-r reference genome to be used. m for mouse; h for human | |
EOF | |
} | |
## if there are no arguments provided, show help | |
if [[ $# == 0 ]]; then show_help; exit 1; fi | |
while getopts ":hd:r:" opt; do | |
case "$opt" in | |
h) show_help;exit 0;; | |
d) fqDIR=$OPTARG;; | |
r) REF=$OPTARG;; | |
'?') echo "Invalid option $OPTARG"; show_help >&2; exit 1;; | |
esac | |
done | |
## set up some defaults | |
REF=${REF:-"h"} | |
fqDIR=${fqDIR:-$(pwd)} | |
## check if the directory exist | |
## default is the current folder | |
if [ ! -d "$fqDIR" ]; then | |
echo "directory $fqDIR does not exit" | |
exit 1 | |
fi | |
## check if the directory contains fastq.gz files | |
if ls "$fqDIR"/*fastq.gz 1> /dev/null 2>&1; then | |
echo "fastq.gz files found in the first level of specified directory $fqDIR" | |
else | |
echo "No fastq.gz files in the first level of specified directory $fqDIR"; exit 1 | |
fi | |
## reference genome path for mouse and human | |
human_ref="/risapps/reference/bowtie1/hg19" | |
mouse_ref="/scratch/genomic_med/mtang1/scratch/mm9_bowtie_ref/mm9" | |
if [[ $REF == "m" ]]; then | |
ref_genome=${mouse_ref} | |
elif [[ $REF == "h" ]]; then | |
ref_genome=${human_ref} | |
else | |
echo "please only specify m or h for the reference genome" | |
exit 1 | |
fi | |
## sometimes there are multiple fq.gz in the folder, | |
## sometimes there is only one | |
fqNum=$(find "$fqDIR" -name "*gz" | wc -l) | |
fqs=$(find "$fqDIR" -name "*gz") | |
if [[ $fqNum -eq 1 ]]; then | |
# full path of that single fastq file | |
fq=$(readlink -f "$fqs") | |
filename=$(basename "$fq" .fastq.gz) | |
prefix=${filename%%_L00*} | |
cat <<- EOF > ${prefix}_commands.txt | |
module load bowtie/1.1.2 | |
module load samtools | |
bowtie -p 10 --best --chunkmbs 320 ${ref_genome} -q <(zcat $fq) -S ${prefix}.sam | |
samtools view -bS ${prefix}.sam > ${prefix}.bam | |
samtools sort -@9 -T ${prefix} ${prefix}.bam -o ${prefix}.sorted.bam | |
samtools index ${prefix}.sorted.bam | |
rm ${prefix}.sam | |
rm ${prefix}.bam | |
## make bigwig files | |
bamCoverage -b ${prefix}.sorted.bam --normalizeUsingRPKM --binSize 30 --smoothLength 300 -p 10 --extendReads 200 -o ${prefix}.bw | |
EOF | |
echo "commands file generated:${prefix}_commands.txt" | |
else | |
#merge all the fastqs to a single fastq | |
cd "$fqDIR" | |
OneFq=$(echo "$fqs" | head -1) | |
filename=$(basename "$OneFq" .fastq.gz) | |
prefix=${filename%%_L00*} | |
echo "merging all the fastq.gz files" | |
cat ./*gz > "${prefix}_merged.fastq.gz" | |
cat <<- EOF > ${prefix}_commands.txt | |
module load bowtie/1.1.2 | |
module load samtools | |
bowtie -p 10 --best --chunkmbs 320 ${ref_genome} -q <(zcat ${prefix}_merged.fastq.gz) -S ${prefix}.sam | |
samtools view -bS ${prefix}.sam > ${prefix}.bam | |
samtools sort -@9 -T ${prefix} ${prefix}.bam -o ${prefix}.sorted.bam | |
samtools index ${prefix}.sorted.bam | |
rm ${prefix}.sam | |
rm ${prefix}.bam | |
## make bigwig files | |
bamCoverage -b ${prefix}.sorted.bam --normalizeUsingRPKM --binSize 30 --smoothLength 300 -p 10 --extendReads 200 -o ${prefix}.bw | |
EOF | |
echo "commands file generated:${prefix}_commands.txt" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Wrapper to make BSUB job format on HPC running LSF job scheduler. | |
# @sbamin | shark | |
## getopts schema is modified from from script by @r_sabarinathan | |
# usage | |
show_help() { | |
cat << EOF | |
Wrapper to make BSUB job format on HPC running LSF job scheduler. | |
Only required parameter is path to file containing commands to be run on cluster. | |
This file will be copied verbatim following BSUB arguments. | |
Default BSUB options are: medium queue with 2 hours walltime, arpprox 16GB RAM and 4 CPU cores with present work directory as current work directory. | |
Usage: ${0##*/} -a <path to files containing commands> > <job.bsub>" | |
-h display this help and exit | |
-j job name (default: j<random id>_username) | |
-w work directory (default: present work directory) | |
-q job queue (default: medium) | |
-t walltime in hours (default: 2:00) | |
-m memory in KB and in multiple of 8192 (default: 8192) | |
-c cpu cores per node (default: 4) | |
-o email notifications (default: -N) | |
-e extra options to BSUB (default: none) | |
-a REQUIRED: path to file containing commands to be run on cluster. This file will be copied verbatim following BSUB arguments. | |
Example: ${0##*/} -j "sample_job" -w "/home/foo/myworkdir" -q long -t 26:00 -m 65536 -c 24 -o "-B -N" -a "/home/foo/mycommands.txt" > /home/foo/sample.bsub | |
Quotes are important for variable names containig spaces and special characters. | |
EOF | |
} | |
if [[ $# == 0 ]];then show_help;exit 1;fi | |
# read input | |
expression=0 | |
while getopts "j:w:q:t:m:c:o:e:a:h" opt; do | |
case "$opt" in | |
h) show_help;exit 0;; | |
j) JOBNAME=$OPTARG;; | |
w) CWD=$OPTARG;; | |
q) QUEUE=$OPTARG;; | |
t) WALLTIME=$OPTARG;; | |
m) MEMORY=$OPTARG;; | |
c) CPU=$OPTARG;; | |
o) EMAILOPTS=$OPTARG;; | |
e) EXTRA_OPTS=$OPTARG;; | |
a) MYARGS=$OPTARG;; | |
'?')show_help >&2 exit 1 ;; | |
esac | |
done | |
DJOBID=$(printf "j%s_%s" "$RANDOM" "$(whoami)") | |
JOBNAME=${JOBNAME:-$DJOBID} | |
CWD=${CWD:-$(pwd)} | |
STDOUT=$(printf "%s/log_%s.out" ${CWD} $JOBNAME) | |
STDERR=$(printf "%s/log_%s.err" ${CWD} $JOBNAME) | |
QUEUE=${QUEUE:-"medium"} | |
WALLTIME=${WALLTIME:-"2:00"} | |
MEMORY=${MEMORY:-"8192"} | |
CPU=${CPU:-"4"} | |
EMAILOPTS=${EMAILOPTS:-"-N"} | |
if [[ ! -s ${MYARGS} ]];then | |
echo -e "\nERROR: Command file either does not exist at ${MYARGS} location or empty.\n" | |
show_help | |
exit 1 | |
fi | |
##### Following lsf block will be parsed based on arguments supplied ##### | |
cat <<EOF | |
#!/bin/bash | |
#BSUB -J ${JOBNAME} # name of the job | |
#BSUB -cwd ${CWD} # the workding dir for each job, this is <flow_run_path>/uniqueid/tmp | |
#BSUB -o ${STDOUT} # output is sent to logfile, stdout + stderr by default | |
#BSUB -e ${STDERR} # output is sent to logfile, stdout + stderr by default | |
#BSUB -q ${QUEUE} # Job queue | |
#BSUB -W ${WALLTIME} # Walltime in minutes | |
#BSUB -M ${MEMORY} # Memory requirements in Kbytes | |
#BSUB -n ${CPU} # CPU reserved | |
#BSUB -R span[ptile=${CPU}] # CPU reserved, all reserved on same node | |
#BSUB -R rusage[mem=${MEMORY}] # memory reserved | |
#BSUB -u mtang1@mdanderson.org # for notifications | |
#BSUB ${EMAILOPTS} # send email when job ends | |
#BSUB -r # make the jobs re-runnable | |
#BSUB ${EXTRA_OPTS} # Any extra arguments passed onto queue | |
## following BSUB options are not being used at present. | |
##BSUB {{{DEPENDENCY}}} # Do not remove dependency args come here | |
## --- DO NOT EDIT from below here---- ## | |
## following will always overwrite previous output file, if any. | |
set +o noclobber | |
$(printf "echo \"BEGIN at \$(date)\" >> %s" "${STDOUT}") | |
## File containing commands will be copied here verbatim ## | |
###################### START USER SUPPLIED COMMANDS ###################### | |
$(cat "${MYARGS}") | |
###################### END USER SUPPLIED COMMANDS ###################### | |
exitstat=\$? | |
$(printf "echo \"END at \$(date)\" >> %s" "${STDOUT}") | |
$(printf "echo \"exit status was \${exitstat}\" >> %s" "${STDOUT}") | |
#### slack notifications #### | |
NOTIFY="YES" | |
if [[ ! -z \${SLKMSG+x} && "\$NOTIFY" == "YES" ]]; then | |
SLKCMD=\$(printf "%s/bin/pingme -m \"%s\n\$(hostname)\n\$(date)\" &" "\${HOME}" "\${SLKMSG}") | |
echo "\${SLKCMD}" | |
ssh shark19 "\${SLKCMD}" >> /dev/null 2>&1 | |
fi | |
## Exit with exitstat from primary command | |
$(printf "exit \${exitstat}") | |
## END ## | |
EOF |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Wrapper to make MSUB job format on HPC running Moab/Torque job scheduler. | |
# @sbamin | nautilus | |
## getopts schema is modified from from script by @r_sabarinathan | |
#set -e | |
#set -u | |
#set -o pipefail | |
# usage | |
show_help() { | |
cat << EOF | |
Wrapper to make BSUB job format on HPC running Moab/Torque job scheduler. | |
Only required parameter is path to file containing commands to be run on cluster. | |
This file will be copied verbatim following MSUB arguments. | |
Default MSUB options are: medium queue with 2 hours walltime, arpprox 16GB RAM and 4 CPU cores with present work directory as current work directory. | |
Usage: ${0##*/} -a <path to files containing commands> > <job.msub>" | |
-h display this help and exit | |
-j job name (default: j<random id>_username) | |
-w work directory (default: present work directory) | |
-t walltime in HH:MM:SS (default: 02:00:00) | |
-m memory in gb (default: 16gb) | |
-n number of nodes (default: 1) | |
-c cpu cores per node (default: 4) | |
-o email notifications (default: ae) | |
-e extra options to MSUB (default: none) | |
-a REQUIRED: path to file containing commands to be run on cluster. This file will be copied verbatim following MSUB arguments. | |
Example: ${0##*/} -j "sample_job" -w "/home/foo/myworkdir" -t 26:00:00 -m 64gb -n 1 -c 24 -o e -a "/home/foo/mycommands.txt" > /home/foo/sample.msub | |
Quotes are important for variable names containig spaces and special characters. | |
EOF | |
} | |
if [[ $# == 0 ]];then show_help;exit 1;fi | |
# read input | |
expression=0 | |
while getopts "j:w:q:t:m:n:c:o:e:a:h" opt; do | |
case "$opt" in | |
h) show_help;exit 0;; | |
j) JOBNAME=$OPTARG;; | |
w) CWD=$OPTARG;; | |
t) WALLTIME=$OPTARG;; | |
m) MEMORY=$OPTARG;; | |
n) NODES=$OPTARG;; | |
c) CPU=$OPTARG;; | |
o) EMAILOPTS=$OPTARG;; | |
e) EXTRA_OPTS=$OPTARG;; | |
a) MYARGS=$OPTARG;; | |
'?')show_help >&2; exit 1 ;; | |
esac | |
done | |
DJOBID=$(printf "j%s_%s" "$RANDOM" "$(whoami)") | |
JOBNAME=${JOBNAME:-$DJOBID} | |
CWD=${CWD:-$(pwd)} | |
STDOUT=$(printf "%s/log_%s.out" ${CWD} $JOBNAME) | |
STDERR=$(printf "%s/log_%s.err" ${CWD} $JOBNAME) | |
WALLTIME=${WALLTIME:-"02:00:00"} | |
MEMORY=${MEMORY:-"16gb"} | |
NODES=${NODES:-"1"} | |
CPU=${CPU:-"4"} | |
EMAILOPTS=${EMAILOPTS:-"ae"} | |
if [[ ! -s ${MYARGS} ]];then | |
echo -e "\nERROR: Command file either does not exist at ${MYARGS} location or empty.\n" | |
show_help | |
exit 1 | |
fi | |
##### Following lsf block will be parsed based on arguments supplied ##### | |
cat <<EOF | |
#!/bin/bash | |
#MSUB -N ${JOBNAME} # name of the job | |
#MSUB -d ${CWD} # the workding dir for each job, this is <flow_run_path>/uniqueid/tmp | |
#MSUB -o ${STDOUT} # output is sent to logfile, stdout + stderr by default | |
#MSUB -e ${STDERR} # output is sent to logfile, stdout + stderr by default | |
#MSUB -l walltime=${WALLTIME} # Walltime in minutes | |
#MSUB -l mem=${MEMORY} # Memory requirements in Kbytes | |
#MSUB -l nodes=${NODES}:ppn=${CPU} # CPU reserved | |
#MSUB -M mtang1@mdanderson.org # for notifications | |
#MSUB -m ${EMAILOPTS} # send email when job ends | |
#MSUB -r y # make the jobs re-runnable | |
#MSUB -S /bin/bash # use bash shell | |
#MSUB -V | |
#MSUB ${EXTRA_OPTS} # Any extra arguments passed onto queue | |
## following MSUB options are not being used at present. | |
# For HPC Nautilus at MDAnderson: Remove QUEUE option of MSUB else job will fail. Queue will be determined based on walltime argument. | |
##MSUB ${DEPENDENCY} # Do not remove dependency args come here | |
## --- DO NOT EDIT from below here---- ## | |
## following will always overwrite previous output file, if any. | |
set +o noclobber | |
echo "BEGIN at \$(date)" | |
## File containing commands will be copied here verbatim ## | |
###################### START USER SUPPLIED COMMANDS ###################### | |
$(cat ${MYARGS}) | |
###################### END USER SUPPLIED COMMANDS ###################### | |
exitstat=\$? | |
echo "END at \$(date)" | |
echo "exit status was \${exitstat}" | |
exit \${exitstat} | |
## END ## | |
EOF |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment