Skip to content

Instantly share code, notes, and snippets.

@bretonics bretonics/cellranger.qsub
Last active Feb 20, 2019

Embed
What would you like to do?
Template file to run Cell Ranger on Sun Grid Engine (SGE)
#!/bin/bash -l
# cellranger.qsub - A template file to run Cell Ranger on Sun Grid Engine (SGE)
# Andrés Bretón ~ http://andresbreton.com, dev@andresbreton.com
# Template from https://gist.github.com/bretonics/f6c57474bb077d2acd293092cf7fa449#file-cellranger-qsub
# ================================================================================
# JOB
# Project Information
investigator=""
date=""
project="${date}_${investigator}"
# Load modules
modules=(bcl2fastq cellranger)
module load ${modules[@]}
# Jobmode "sge" or path to custom sge.template
mode="${HOME}/scscore/Templates/cellranger/sge.template" # cannot contain '~'
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# mkfastq Pipeline
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# mkfastq demultiplexing ouput directory name
id=$date
# Absolute path to sequencing run folder
run="${HOME}/scscore/Sequencing/${project}"
# Path to simple CSV file
csv="${run}/${project}.csv"
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Counts Pipeline
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Expected number of recovered cells
numcells="3000"
# Transcriptome references:
# GRCh38-1.2.0, hg19-1.2.0, mm10-1.2.0, mm10-2.1.0, hg19_and_mm10-1.2.0, hg19_and_mm10-2.1.0, ercc92-1.2.0
transcriptome=""
# List of sample names for each library in sequencing run
samples=$(cut -f 2 -d , ${csv} | sed 1d | tr ' ' '_' | tr '\n' ' ')
# FASTQs path folder
fastqs="outs/fastq_path/"
# Absolute path to the Cell Ranger compatible transcriptome reference
ref="${HOME}/scscore/References/cellranger/${transcriptome}"
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Setup job artifacts
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if [ ! -d logs ]; then mkdir logs; fi
if [ ! -d jobs ]; then mkdir jobs; fi
# Keep track of information related to the current job
echo -e "# ================================================================================\n"
echo -e "# JOB NAME: $JOB_NAME\n"
echo -e "# USER: $USER\n"
echo -e "# PWD: $(pwd)\n"
echo -e "# HOST: $HOSTNAME\n"
echo -e "# JOB ID: $JOB_ID\n"
echo -e "# TASK ID: $SGE_TASK_ID\n"
echo -e "# START: $(date)\n"
echo -e "# ================================================================================\n\n# "
module list
echo -e "\n\n\n\n"
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Calls
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Run mkfastq pipeline (demultiplex BCL files)
cmd="cellranger mkfastq --id=${id} --run=${run} --csv=${csv} --jobmode=${mode}"
echo -e "#================================================================================\n"
echo -e "# RUNNING mkfastq:\n"
echo -e "# $cmd\n"
echo -e "#================================================================================\n\n\n"
$cmd
echo -e "# ================================================================================\n\n\n"
# Check if mkfastq demultiplexing directory exists for sequencing run "id"
if [ ! -d $id ]; then
echo -e "Could not change to $id directory.\n"
echo -e "Check if current directory contains project $id from mkfastq call.\n"
echo -e "Currently at `pwd`\n"
exit 126
# email
fi
# Check mkfastq pipeline finished properly
if [ $? -eq 0 ]; then
echo -e "Changing to $id"; cd $id
echo -e "Changed to `pwd`\n\n\n"
echo -e "#================================================================================\n"
echo -e "# RUNNING counts on:\n"
echo -e "# ${samples[@]}\n"
echo -e "#================================================================================\n"
# Run counts pipeline
for sample in ${samples[@]}; do
cmd="cellranger count --id=${sample} --sample=${sample} --fastqs=${fastqs} --transcriptome=${ref} --expect-cells=${numcells} --jobmode=${mode}"
echo -e "\n\n"
echo -e "# RUNNING counts for $sample of [${samples[@]}]:\n"
echo -e "# ================================================================================\n"
echo -e "$cmd\n\n\n"
$cmd
done
else
echo -e "mkfastq failed.\n"
exit 126
fi
echo -e "# ================================================================================\n\n\n"
# Clean up and finalize
if [ $? -eq 0 ]; then
echo -e "Cleaning up...\n"
mv __${date}.mro ${date}.qsub $project
fi
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# End job message
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
echo -e "\n\n\n"
echo -e "# ================================================================================\n"
echo -e "# END: $(date)\n"
echo -e "# USER: $USER\n"
echo -e "# CORES/THREADS: $NSLOTS\n"
echo -e "# TEMPORARY DIRECTORY: $TMPDIR\n"
echo -e "# ================================================================================\n\n\n\n"
# ================================================================================
# ENVIRONMENTAL VARIABLES AVAILABLE
# JOB_ID Current job ID
# JOB_NAME Current job name
# NSLOTS The number of slots (threads or processors) requested by a job
# HOSTNAME Name of execution host
# SGE_TASK_ID Array Job task index number
# SGE_TASK_STEPSIZE The step size of the array job specification
# SGE_TASK_FIRST The index number of the first array job task
# SGE_TASK_LAST The index number of the last array job task
# TMPDIR The absolute path to the job's temporary working directory
#-------------------------------------------------------------------------------
# COMPUTING DIRECTIVES
# Specify hard time limit for the job; default time is 12 hours
#$ -l h_rt=48:00:00
# Memory
# Request a node with at least 128GB of memory (16 cores x 8GB per core = 128GB total)
#$ -l mem_total=128G # Request a node that has at least 128G of total memory
#$ -l mem_per_core=8G # Request a node with at least 8 GB of memory per core
# Request a parallel environment with 16 cores
#$ -pe omp 16
# Request my job to run on Buy-in Compute group hardware project has access to
#$ -l buyin
#-------------------------------------------------------------------------------
# ACTION DIRECTIVES
# Set SCC project
#$ -P project_name
# Job name
#$ -N job_name
# Specify the output file name
#$ -o "logs/$JOB_NAME.stdout"
# Specify stderr output
#$ -e "logs/$JOB_NAME.stderr"
# Send an email (by default no email is sent). The possible values are -
# job begins (b), ends (e), is aborted (a), is suspended (s), or never (n) - default
#$ -m beas
# Email adress to send email
#$ -M user_email
# All current environment variables should be exported to the batch job
#$ -V
# Set runtime environment variable for SGE Cluster Mode (sge.template)
#$ -v SGE_CLUSTER_NAME=SGE
#$ -v SGE_CELL=default
# Run in current directory
#$ -cwd
#!/bin/bash -l
# sge.template - Template file for Cell Ranger cluster mode job submissions
# Andrés Bretón ~ http://andresbreton.com, dev@andresbreton.com
# Template from https://gist.github.com/bretonics/f6c57474bb077d2acd293092cf7fa449#file-sge-template
#-------------------------------------------------------------------------------
# MRO (MARTIAN RUNTIME) ENVIRONMENTAL VARIABLES AVAILABLE
# __MRO_JOB_NAME__ Job name composed of the sample ID and stage being executed (Required)
# __MRO_JOB_WORKDIR__ Specify the absolute path to the directory where the job should execute.
# __MRO_ACCOUNT__ Charge resources to specific accounts by passing mrp's environment account.
# __MRO_THREADS__ Number of threads required by the stage
# __MRO_MEM_GB__ Amount of memory in GB required by the stage
# __MRO_MEM_MB__ -or- in MB
# __MRO_MEM_GB_PER_THREAD__ Amount of memory in GB required per thread in multi-threaded stages
# __MRO_MEM_MB_PER_THREAD__ -or- in MB
# __MRO_STDOUT__ Paths to the _stdout metadata files for the stage (Required)
# __MRO_STDERR__ -or- to the _stderr metadata files for the stage (Required)
# __MRO_CMD__ Bourne shell command to run the stage code (Required)
#-------------------------------------------------------------------------------
# SGE ENVIRONMENTAL VARIABLES AVAILABLE
# JOB_ID Current job ID
# JOB_NAME Current job name
# NSLOTS The number of slots (threads or processors) requested by a job
# HOSTNAME Name of execution host
# SGE_TASK_ID Array Job task index number
# SGE_TASK_STEPSIZE The step size of the array job specification
# SGE_TASK_FIRST The index number of the first array job task
# SGE_TASK_LAST The index number of the last array job task
# TMPDIR The absolute path to the job's temporary working directory
#-------------------------------------------------------------------------------
# COMPUTING DIRECTIVES
# Specify hard time limit for the job
# The job will be aborted if it runs longer than this time
# The default time is 12 hours
#$ -l h_rt=24:00:00
# Memory
# Request a node with at least __MRO_MEM_GB__ of free memory
#$ -l mem_free=__MRO_MEM_GB__G
# Request a parallel environment with __MRO_THREADS__ cores
#$ -pe omp __MRO_THREADS__
#-------------------------------------------------------------------------------
# ACTION DIRECTIVES
# Set SCC project
#$ -P my_project
# Job name
#$ -N __MRO_JOB_NAME__
# Stdout output
#$ -o __MRO_STDOUT__
# Stderr output
#$ -e __MRO_STDERR__
# Send an email (by default no email is sent). The possible values are -
# job begins (b), ends (e), is aborted (a), is suspended (s), or never (n) – default
#$ -m as
# Email adress to send email
#$ -M user_email
# All current environment variables should be exported to the batch job
#$ -V
# Set runtime environment variable for SGE Cluster Mode (sge.template)
#$ -v SGE_CLUSTER_NAME=SGE
# Run in current directory
#$ -cwd
# Interpreting shell for the job
#$ -S "/usr/bin/env bash"
#-------------------------------------------------------------------------------
# JOB
# Keep track of information related to the current job
echo "# ================================================================================"
echo "# JOB NAME: $__MRO_JOB_NAME__"
echo "# START: $(date)"
echo "# PWD: $(pwd)"
echo "# USER: $USER"
echo "# HOST: $HOSTNAME"
echo "# JOB ID: $JOB_ID"
echo "# TASK ID: $SGE_TASK_ID"
echo "# STDOUT: $__MRO_STDOUT__"
echo "# STDERR: $__MRO_STDERR__"
echo "#"
echo "# [REQUESTED]"
echo "# FREEMEM: $__MRO_MEM_GB__G"
echo "# THREADS: $__MRO_THREADS__"
echo "# ================================================================================"
echo ""
echo ""
echo ""
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# STAGE CALL
echo "#--------------------------------------------------------------------------------"
echo "# Running $__MRO_JOB_NAME__:"
echo "# $__MRO_CMD__"
echo "#--------------------------------------------------------------------------------"
echo ""
echo ""
__MRO_CMD__
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# End job message
echo ""
echo ""
echo ""
echo "#================================================================================"
echo "# End: $(date)"
echo "# User: $USER"
echo "# Cores/Threads: $NSLOTS"
echo "# Temporary Directory: $TMPDIR"
echo "#================================================================================"
echo ""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.
You signed in with another tab or window. Reload to refresh your session. You signed out in another tab or window. Reload to refresh your session.