Instantly share code, notes, and snippets.

Embed
What would you like to do?
Template file to run Cell Ranger on Sun Grid Engine (SGE)
#!/bin/bash -l
# cellranger.qsub - A template file to run Cell Ranger on Sun Grid Engine (SGE)
# Andrés Bretón ~ http://andresbreton.com, dev@andresbreton.com
# Template from https://gist.github.com/bretonics/f6c57474bb077d2acd293092cf7fa449#file-cellranger-qsub
#-------------------------------------------------------------------------------
# ENVIRONMENTAL VARIABLES AVAILABLE
# JOB_ID Current job ID
# JOB_NAME Current job name
# NSLOTS The number of slots (threads or processors) requested by a job
# HOSTNAME Name of execution host
# SGE_TASK_ID Array Job task index number
# SGE_TASK_STEPSIZE The step size of the array job specification
# SGE_TASK_FIRST The index number of the first array job task
# SGE_TASK_LAST The index number of the last array job task
# TMPDIR The absolute path to the job's temporary working directory
#-------------------------------------------------------------------------------
# COMPUTING DIRECTIVES
# Specify hard time limit for the job; default time is 12 hours
#$ -l h_rt=48:00:00
# Memory
# Request a node with at least 128GB of memory (16 cores x 8GB per core = 128GB total)
#$ -l mem_total=128G # Request a node that has at least 128G of total memory
#$ -l mem_per_core=8G # Request a node with at least 8 GB of memory per core
# Request a parallel environment with 16 cores
#$ -pe omp 16
# Request my job to run on Buy-in Compute group hardware project has access to
#$ -l buyin
#-------------------------------------------------------------------------------
# ACTION DIRECTIVES
# Set SCC project
#$ -P project_name
# Job name
#$ -N job_name
# Merge the error and output streams into a single file
#$ -j y
# Specify the output file name
#$ -o "logs/$JOB_NAME.qlog"
# Send an email (by default no email is sent). The possible values are -
# job begins (b), ends (e), is aborted (a), is suspended (s), or never (n) - default
#$ -m beas
# Email adress to send email
#$ -M user_email
# All current environment variables should be exported to the batch job
#$ -V
# Set runtime environment variable for SGE Cluster Mode (sge.template)
#$ -v SGE_CLUSTER_NAME=SGE
#$ -v SGE_CELL=default
# Run in current directory
#$ -cwd
#--------------------------------------------------------------------------------
# JOB
# Load modules
modules=(bcl2fastq cellranger)
module load ${modules[@]}
# Project Information
investigator=""
date=""
project="${date}_${investigator}"
# Jobmode "sge" or path to custom sge.template
mode="sge"
mode="${HOME}/scscore/Templates/cellranger/sge.template" # cannot contain '~'
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# mkfastq Pipeline
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# mkfastq demultiplexing ouput directory name
id=$date
# Absolute path to sequencing run folder
run="${HOME}/scscore/Sequencing/${project}"
# Path to simple CSV file
csv="${run}/${project}.csv"
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Counts Pipeline
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# List of sample names for each library in sequencing run
samples=""
# FASTQs path folder
fastqs="outs/fastq_path/"
# Transcriptome references:
# GRCh38-1.2.0, hg19-1.2.0, mm10-1.2.0, mm10-2.1.0, hg19_and_mm10-1.2.0, hg19_and_mm10-2.1.0, ercc92-1.2.0
transcriptome=""
# Absolute path to the Cell Ranger compatible transcriptome reference
ref="${HOME}/scscore/References/cellranger/${transcriptome}"
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Setup job artifacts
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
env > "logs/${JOB_NAME}.env"
# Keep track of information related to the current job
echo "# ================================================================================"
echo "# JOB NAME: $JOB_NAME"
echo "# USER: $USER"
echo "# PWD: $(pwd)"
echo "# HOST: $HOSTNAME"
echo "# JOB ID: $JOB_ID"
echo "# TASK ID: $SGE_TASK_ID"
echo "# START: $(date)"
printf "\n\n# "
module list
echo "# ================================================================================"
echo ""
echo ""
echo ""
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Calls
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Run mkfastq pipeline (demultiplex BCL files)
cmd="cellranger mkfastq --id=${id} --run=${run} --csv=${csv} --jobmode=${mode}"
echo "#--------------------------------------------------------------------------------"
echo "# Running mkfastq:"
echo "# $cmd"
echo "#--------------------------------------------------------------------------------"
echo ""
echo ""
$cmd
echo "# ================================================================================"
# Check if mkfastq demultiplexing directory exists for sequencing run "id"
if [ -d $id ]; then
# exit
# email
else
echo "Could not change to $id directory."
echo "Check if current directory contains project $id from mkfastq call."
echo "Currently at `pwd`"
exit 126
fi
# Check mkfastq pipeline finished properly
if [ $? -ne 0 ]; then
echo ""
echo ""
echo "Changing to $id"; cd $id
echo "Changed to `pwd`"
echo ""
echo ""
echo "#--------------------------------------------------------------------------------"
echo "# Running counts on:"
echo "# ${samples[@]}"
echo "#--------------------------------------------------------------------------------"
# Run counts pipeline
for sample in ${samples[@]}; do
cmd="cellranger count --id=${sample} --sample=${sample} --fastqs=${fastqs} --transcriptome=${ref} --jobmode=${mode}"
echo ""
echo ""
echo "Running counts for $sample:"
echo "--------------------------------------------------------------------------------"
echo "$cmd"
echo ""
echo ""
$cmd
done
else
echo "mkfastq failed."
exit 126
fi
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# End job message
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
echo ""
echo ""
echo ""
echo "# ================================================================================"
echo "# End: $(date)"
echo "# User: $USER"
echo "# Cores/Threads: $NSLOTS"
echo "# Temporary Directory: $TMPDIR"
echo "# ================================================================================"
echo ""
echo ""
echo ""
#!/bin/bash -l
# sge.template - Template file for Cell Ranger cluster mode job submissions
# Andrés Bretón ~ http://andresbreton.com, dev@andresbreton.com
# Template from https://gist.github.com/bretonics/f6c57474bb077d2acd293092cf7fa449#file-sge-template
#-------------------------------------------------------------------------------
# MRO (MARTIAN RUNTIME) ENVIRONMENTAL VARIABLES AVAILABLE
# __MRO_JOB_NAME__ Job name composed of the sample ID and stage being executed (Required)
# __MRO_JOB_WORKDIR__ Specify the absolute path to the directory where the job should execute.
# __MRO_ACCOUNT__ Charge resources to specific accounts by passing mrp's environment account.
# __MRO_THREADS__ Number of threads required by the stage
# __MRO_MEM_GB__ Amount of memory in GB required by the stage
# __MRO_MEM_MB__ -or- in MB
# __MRO_MEM_GB_PER_THREAD__ Amount of memory in GB required per thread in multi-threaded stages
# __MRO_MEM_MB_PER_THREAD__ -or- in MB
# __MRO_STDOUT__ Paths to the _stdout metadata files for the stage (Required)
# __MRO_STDERR__ -or- to the _stderr metadata files for the stage (Required)
# __MRO_CMD__ Bourne shell command to run the stage code (Required)
#-------------------------------------------------------------------------------
# SGE ENVIRONMENTAL VARIABLES AVAILABLE
# JOB_ID Current job ID
# JOB_NAME Current job name
# NSLOTS The number of slots (threads or processors) requested by a job
# HOSTNAME Name of execution host
# SGE_TASK_ID Array Job task index number
# SGE_TASK_STEPSIZE The step size of the array job specification
# SGE_TASK_FIRST The index number of the first array job task
# SGE_TASK_LAST The index number of the last array job task
# TMPDIR The absolute path to the job's temporary working directory
#-------------------------------------------------------------------------------
# COMPUTING DIRECTIVES
# Specify hard time limit for the job
# The job will be aborted if it runs longer than this time
# The default time is 12 hours
#$ -l h_rt=24:00:00
# Memory
# Request a node with at least __MRO_MEM_GB__ of free memory
#$ -l mem_free=__MRO_MEM_GB__G
# Request a parallel environment with __MRO_THREADS__ cores
#$ -pe omp __MRO_THREADS__
#-------------------------------------------------------------------------------
# ACTION DIRECTIVES
# Set SCC project
#$ -P my_project
# Job name
#$ -N __MRO_JOB_NAME__
# Stdout output
#$ -o __MRO_STDOUT__
# Stderr output
#$ -e __MRO_STDERR__
# Send an email (by default no email is sent). The possible values are -
# job begins (b), ends (e), is aborted (a), is suspended (s), or never (n) – default
#$ -m as
# Email adress to send email
#$ -M user_email
# All current environment variables should be exported to the batch job
#$ -V
# Set runtime environment variable for SGE Cluster Mode (sge.template)
#$ -v SGE_CLUSTER_NAME=SGE
# Run in current directory
#$ -cwd
# Interpreting shell for the job
#$ -S "/usr/bin/env bash"
#-------------------------------------------------------------------------------
# JOB
# Keep track of information related to the current job
echo "# ================================================================================"
echo "# JOB NAME: $JOB_NAME"
echo "# USER: $USER"
echo "# PWD: $(pwd)"
echo "# HOST: $HOSTNAME"
echo "# JOB ID: $JOB_ID"
echo "# TASK ID: $SGE_TASK_ID"
echo "# START: $(date)"
printf "\n\n# "
module list
echo "# ================================================================================"
echo ""
echo ""
echo ""
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# STAGE CALL
echo "#--------------------------------------------------------------------------------"
echo "# Running $__MRO_JOB_NAME__:"
echo "# $__MRO_CMD__"
echo "#--------------------------------------------------------------------------------"
echo ""
echo ""
__MRO_CMD__
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# End job message
echo ""
echo ""
echo ""
echo "#================================================================================"
echo "# End: $(date)"
echo "# User: $USER"
echo "# Cores/Threads: $NSLOTS"
echo "# Temporary Directory: $TMPDIR"
echo "#================================================================================"
echo ""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment