Skip to content

Instantly share code, notes, and snippets.

@bretonics
Last active November 4, 2020 00:51
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save bretonics/f6c57474bb077d2acd293092cf7fa449 to your computer and use it in GitHub Desktop.
Save bretonics/f6c57474bb077d2acd293092cf7fa449 to your computer and use it in GitHub Desktop.
Template file to run Cell Ranger on Sun Grid Engine (SGE)
#!/bin/bash -l
# cellranger.qsub - A template file to run Cell Ranger on Sun Grid Engine (SGE)
# Andrés Bretón ~ http://andresbreton.com, dev@andresbreton.com
# Template from https://gist.github.com/bretonics/f6c57474bb077d2acd293092cf7fa449#file-cellranger-qsub
# ================================================================================
# JOB
# Project Information
investigator=""
date=""
project="${date}_${investigator}"
# Load modules
modules=(bcl2fastq cellranger)
module load ${modules[@]}
# Jobmode "sge" or path to custom sge.template
mode="${HOME}/scscore/Templates/cellranger/sge.template" # cannot contain '~'
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# mkfastq Pipeline
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# mkfastq demultiplexing ouput directory name
id=$date
# Absolute path to sequencing run folder
run="${HOME}/scscore/Sequencing/${project}"
# Path to simple CSV file
csv="${run}/${project}.csv"
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Counts Pipeline
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Expected number of recovered cells
numcells="3000"
# Transcriptome references:
# GRCh38-1.2.0, hg19-1.2.0, mm10-1.2.0, mm10-2.1.0, hg19_and_mm10-1.2.0, hg19_and_mm10-2.1.0, ercc92-1.2.0
transcriptome=""
# List of sample names for each library in sequencing run
samples=$(cut -f 2 -d , ${csv} | sed 1d | tr ' ' '_' | tr '\n' ' ')
# FASTQs path folder
fastqs="outs/fastq_path/"
# Absolute path to the Cell Ranger compatible transcriptome reference
ref="${HOME}/scscore/References/cellranger/${transcriptome}"
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Setup job artifacts
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if [ ! -d logs ]; then mkdir logs; fi
if [ ! -d jobs ]; then mkdir jobs; fi
# Keep track of information related to the current job
echo -e "# ================================================================================\n"
echo -e "# JOB NAME: $JOB_NAME\n"
echo -e "# USER: $USER\n"
echo -e "# PWD: $(pwd)\n"
echo -e "# HOST: $HOSTNAME\n"
echo -e "# JOB ID: $JOB_ID\n"
echo -e "# TASK ID: $SGE_TASK_ID\n"
echo -e "# START: $(date)\n"
echo -e "# ================================================================================\n\n# "
module list
echo -e "\n\n\n\n"
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Calls
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Run mkfastq pipeline (demultiplex BCL files)
cmd="cellranger mkfastq --id=${id} --run=${run} --csv=${csv} --jobmode=${mode}"
echo -e "#================================================================================\n"
echo -e "# RUNNING mkfastq:\n"
echo -e "# $cmd\n"
echo -e "#================================================================================\n\n\n"
$cmd
echo -e "# ================================================================================\n\n\n"
# Check if mkfastq demultiplexing directory exists for sequencing run "id"
if [ ! -d $id ]; then
echo -e "Could not change to $id directory.\n"
echo -e "Check if current directory contains project $id from mkfastq call.\n"
echo -e "Currently at `pwd`\n"
exit 126
# email
fi
# Check mkfastq pipeline finished properly
if [ $? -eq 0 ]; then
echo -e "Changing to $id"; cd $id
echo -e "Changed to `pwd`\n\n\n"
echo -e "#================================================================================\n"
echo -e "# RUNNING counts on:\n"
echo -e "# ${samples[@]}\n"
echo -e "#================================================================================\n"
# Run counts pipeline
for sample in ${samples[@]}; do
cmd="cellranger count --id=${sample} --sample=${sample} --fastqs=${fastqs} --transcriptome=${ref} --expect-cells=${numcells} --jobmode=${mode}"
echo -e "\n\n"
echo -e "# RUNNING counts for $sample of [${samples[@]}]:\n"
echo -e "# ================================================================================\n"
echo -e "$cmd\n\n\n"
$cmd
done
else
echo -e "mkfastq failed.\n"
exit 126
fi
echo -e "# ================================================================================\n\n\n"
# Clean up and finalize
if [ $? -eq 0 ]; then
echo -e "Cleaning up...\n"
mv __${date}.mro ${date}.qsub $project
fi
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# End job message
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
echo -e "\n\n\n"
echo -e "# ================================================================================\n"
echo -e "# END: $(date)\n"
echo -e "# USER: $USER\n"
echo -e "# CORES/THREADS: $NSLOTS\n"
echo -e "# TEMPORARY DIRECTORY: $TMPDIR\n"
echo -e "# ================================================================================\n\n\n\n"
# ================================================================================
# ENVIRONMENTAL VARIABLES AVAILABLE
# JOB_ID Current job ID
# JOB_NAME Current job name
# NSLOTS The number of slots (threads or processors) requested by a job
# HOSTNAME Name of execution host
# SGE_TASK_ID Array Job task index number
# SGE_TASK_STEPSIZE The step size of the array job specification
# SGE_TASK_FIRST The index number of the first array job task
# SGE_TASK_LAST The index number of the last array job task
# TMPDIR The absolute path to the job's temporary working directory
#-------------------------------------------------------------------------------
# COMPUTING DIRECTIVES
# Specify hard time limit for the job; default time is 12 hours
#$ -l h_rt=48:00:00
# Memory
# Request a node with at least 128GB of memory (16 cores x 8GB per core = 128GB total)
#$ -l mem_total=128G # Request a node that has at least 128G of total memory
#$ -l mem_per_core=8G # Request a node with at least 8 GB of memory per core
# Request a parallel environment with 16 cores
#$ -pe omp 16
# Request my job to run on Buy-in Compute group hardware project has access to
#$ -l buyin
#-------------------------------------------------------------------------------
# ACTION DIRECTIVES
# Set SCC project
#$ -P project_name
# Job name
#$ -N job_name
# Specify the output file name
#$ -o "logs/$JOB_NAME.stdout"
# Specify stderr output
#$ -e "logs/$JOB_NAME.stderr"
# Send an email (by default no email is sent). The possible values are -
# job begins (b), ends (e), is aborted (a), is suspended (s), or never (n) - default
#$ -m beas
# Email adress to send email
#$ -M user_email
# All current environment variables should be exported to the batch job
#$ -V
# Set runtime environment variable for SGE Cluster Mode (sge.template)
#$ -v SGE_CLUSTER_NAME=SGE
#$ -v SGE_CELL=default
# Run in current directory
#$ -cwd
#!/bin/bash -l
# cellranger.qsub - A template file to run Cell Ranger on Sun Grid Engine (SGE)
# Andrés Bretón ~ http://andresbreton.com, dev@andresbreton.com
# Template from https://gist.github.com/bretonics/f6c57474bb077d2acd293092cf7fa449#
#-------------------------------------------------------------------------------
# ENVIRONMENTAL VARIABLES AVAILABLE
# JOB_ID Current job ID
# JOB_NAME Current job name
# NSLOTS The number of slots (threads or processors) requested by a job
# HOSTNAME Name of execution host
# SGE_TASK_ID Array Job task index number
# SGE_TASK_STEPSIZE The step size of the array job specification
# SGE_TASK_FIRST The index number of the first array job task
# SGE_TASK_LAST The index number of the last array job task
# TMPDIR The absolute path to the job's temporary working directory
#-------------------------------------------------------------------------------
# COMPUTING DIRECTIVES
# Specify hard time limit for the job
# The job will be aborted if it runs longer than this time
# The default time is 12 hours
#$ -l h_rt=24:00:00
# Memory
# Request a node with at least 128GB of memory (16 cores x 8GB per core = 128GB total)
#$ -l mem_total=128G # Request a node that has at least 128G of total memory
#$ -l mem_per_core=8G # Request a node with at least 8 GB of memory per core
# Request a parallel environment with 16 cores
#$ -pe omp 16
# Request my job to run on Buy-in Compute group hardware project has access to
#$ -l buyin
#-------------------------------------------------------------------------------
# ACTION DIRECTIVES
# Set SCC project
#$ -P project_name
# Job name
#$ -N job_name
# Merge the error and output streams into a single file
#$ -j y
# Specify the output file name
#$ -o $JOB_NAME.qlog
# Send an email (by default no email is sent). The possible values are -
# job begins (b), ends (e), is aborted (a), is suspended (s), or never (n) - default
#$ -m eas
# Email adress to send email
#$ -M user_email
# All current environment variables should be exported to the batch job
#$ -V
# Set runtime environment variable for SGE Cluster Mode (sge.template)
#$ -v SGE_CLUSTER_NAME=SGE
#$ -v SGE_CELL=default
# Run in current directory
#$ -cwd
#--------------------------------------------------------------------------------
# JOB
# Project name
project=""
# mkfastq demultiplexing ouput directory name
id=$project
# Path to simple CSV file
csv="${run}/${project}.csv"
# List of sample names for each library in sequencing run
samples=""
# FASTQs path folder
fastqs="outs/fastq_path/"
# Transcriptome references:
# GRCh38-1.2.0, hg19-1.2.0, mm10-1.2.0, mm10-2.1.0, hg19_and_mm10-1.2.0, hg19_and_mm10-2.1.0, ercc92-1.2.0
transcriptome=""
# Path to the Cell Ranger compatible transcriptome reference
ref="~/References/cellranger/refdata-cellranger-${transcriptome}"
# Jobmode "sge" or path to custom sge.template
mode="../sge.template"
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Load modules
modules=(bcl2fastq cellranger)
module load ${modules[@]}
# Save environment to file
env > $JOB_NAME.env
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Keep track of information related to the current job
echo "================================================================================"
echo "Start: $(date)"
echo "User: $USER"
echo "Host: $HOSTNAME"
echo "Job name: $JOB_NAME"
echo "Job ID: $JOB_ID"
echo "Task ID: $SGE_TASK_ID"
echo "PWD: $(pwd)"
module list
echo "================================================================================"
echo ""
echo ""
echo ""
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# CALLS
# Check if mkfastq demultiplexing directory exists for sequencing run "id"
if [ -d $id ]; then
echo ""
echo "Changing to $id"; cd $id
echo "Changed to `pwd`"
echo ""
echo ""
echo "Running counts on: ${samples[@]}"
echo "================================================================================"
# Run counts pipeline
for sample in ${samples[@]}; do
cmd="cellranger count --sample=$sample --id=$sample --fastqs=$fastqs --transcriptome=$ref --jobmode=../$sge"
echo ""
echo "Running counts for $sample: $cmd"
echo ""
echo ""
$cmd
done
else
echo "Could not change to $id directory."
echo "Did mkfastq work?"
echo "Check if current directory contains project $id from mkfastq call."
echo "Currently at `pwd`"
exit 126
fi
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# End job message
echo ""
echo ""
echo ""
echo "================================================================================"
echo "End: $(date)"
echo "User: $USER"
echo "Cores/Threads: $NSLOTS"
echo "Temporary Directory: $TMPDIR"
echo "================================================================================"
echo ""
#!/bin/bash -l
# cellranger.qsub - A template file to run Cell Ranger on Sun Grid Engine (SGE)
# Andrés Bretón ~ http://andresbreton.com, dev@andresbreton.com
# Template from https://gist.github.com/bretonics/f6c57474bb077d2acd293092cf7fa449#
#-------------------------------------------------------------------------------
# ENVIRONMENTAL VARIABLES AVAILABLE
# JOB_ID Current job ID
# JOB_NAME Current job name
# NSLOTS The number of slots (threads or processors) requested by a job
# HOSTNAME Name of execution host
# SGE_TASK_ID Array Job task index number
# SGE_TASK_STEPSIZE The step size of the array job specification
# SGE_TASK_FIRST The index number of the first array job task
# SGE_TASK_LAST The index number of the last array job task
# TMPDIR The absolute path to the job's temporary working directory
#-------------------------------------------------------------------------------
# COMPUTING DIRECTIVES
# Specify hard time limit for the job
# The job will be aborted if it runs longer than this time
# The default time is 12 hours
#$ -l h_rt=24:00:00
# Memory
# Request a node with at least 128GB of memory (16 cores x 8GB per core = 128GB total)
#$ -l mem_total=128G # Request a node that has at least 128G of total memory
#$ -l mem_per_core=8G # Request a node with at least 8 GB of memory per core
# Request a parallel environment with 16 cores
#$ -pe omp 16
# Request my job to run on Buy-in Compute group hardware project has access to
#$ -l buyin
#-------------------------------------------------------------------------------
# ACTION DIRECTIVES
# Set SCC project
#$ -P project_name
# Job name
#$ -N job_name
# Merge the error and output streams into a single file
#$ -j y
# Specify the output file name
#$ -o $JOB_NAME.qlog
# Send an email (by default no email is sent). The possible values are -
# job begins (b), ends (e), is aborted (a), is suspended (s), or never (n) - default
#$ -m eas
# Email adress to send email
#$ -M user_email
# All current environment variables should be exported to the batch job
#$ -V
# Set runtime environment variable for SGE Cluster Mode (sge.template)
#$ -v SGE_CLUSTER_NAME=SGE
#$ -v SGE_CELL=default
# Run in current directory
#$ -cwd
#--------------------------------------------------------------------------------
# JOB
# Project name
project=""
# Path to sequencing run folder
run="../Sequencing/${project}"
# mkfastq demultiplexing ouput directory name
id=$project
# Path to simple CSV file
csv="${run}/${project}.csv"
# Jobmode "sge" or path to custom sge.template
mode="../sge.template"
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Load modules
modules=(bcl2fastq cellranger)
module load ${modules[@]}
# Save environment to file
env > $JOB_NAME.env
# Keep track of information related to the current job
echo "================================================================================"
echo "Start: $(date)"
echo "User: $USER"
echo "Host: $HOSTNAME"
echo "Job name: $JOB_NAME"
echo "Job ID: $JOB_ID"
echo "Task ID: $SGE_TASK_ID"
echo "PWD: $(pwd)"
module list
echo "================================================================================"
echo ""
echo ""
echo ""
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# CALLS
# Run mkfastq pipeline (demultiplex BCL files)
cmd="cellranger mkfastq --run=$run --id=$id --csv=$csv --jobmode=$sge"
echo "Running mkfastq: $cmd"
echo "================================================================================"
echo ""
$cmd
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# End job message
echo ""
echo ""
echo ""
echo "================================================================================"
echo "End: $(date)"
echo "User: $USER"
echo "Cores/Threads: $NSLOTS"
echo "Temporary Directory: $TMPDIR"
echo "================================================================================"
echo ""
#!/bin/bash -l
# sge.template - Template file for Cell Ranger cluster mode job submissions
# Andrés Bretón ~ http://andresbreton.com, dev@andresbreton.com
# Template from https://gist.github.com/bretonics/f6c57474bb077d2acd293092cf7fa449#file-sge-template
#-------------------------------------------------------------------------------
# MRO (MARTIAN RUNTIME) ENVIRONMENTAL VARIABLES AVAILABLE
# __MRO_JOB_NAME__ Job name composed of the sample ID and stage being executed (Required)
# __MRO_JOB_WORKDIR__ Specify the absolute path to the directory where the job should execute.
# __MRO_ACCOUNT__ Charge resources to specific accounts by passing mrp's environment account.
# __MRO_THREADS__ Number of threads required by the stage
# __MRO_MEM_GB__ Amount of memory in GB required by the stage
# __MRO_MEM_MB__ -or- in MB
# __MRO_MEM_GB_PER_THREAD__ Amount of memory in GB required per thread in multi-threaded stages
# __MRO_MEM_MB_PER_THREAD__ -or- in MB
# __MRO_STDOUT__ Paths to the _stdout metadata files for the stage (Required)
# __MRO_STDERR__ -or- to the _stderr metadata files for the stage (Required)
# __MRO_CMD__ Bourne shell command to run the stage code (Required)
#-------------------------------------------------------------------------------
# SGE ENVIRONMENTAL VARIABLES AVAILABLE
# JOB_ID Current job ID
# JOB_NAME Current job name
# NSLOTS The number of slots (threads or processors) requested by a job
# HOSTNAME Name of execution host
# SGE_TASK_ID Array Job task index number
# SGE_TASK_STEPSIZE The step size of the array job specification
# SGE_TASK_FIRST The index number of the first array job task
# SGE_TASK_LAST The index number of the last array job task
# TMPDIR The absolute path to the job's temporary working directory
#-------------------------------------------------------------------------------
# COMPUTING DIRECTIVES
# Specify hard time limit for the job
# The job will be aborted if it runs longer than this time
# The default time is 12 hours
#$ -l h_rt=24:00:00
# Memory
# Request a node with at least __MRO_MEM_GB__ of free memory
#$ -l mem_free=__MRO_MEM_GB__G
# Request a parallel environment with __MRO_THREADS__ cores
#$ -pe omp __MRO_THREADS__
#-------------------------------------------------------------------------------
# ACTION DIRECTIVES
# Set SCC project
#$ -P my_project
# Job name
#$ -N __MRO_JOB_NAME__
# Stdout output
#$ -o __MRO_STDOUT__
# Stderr output
#$ -e __MRO_STDERR__
# Send an email (by default no email is sent). The possible values are -
# job begins (b), ends (e), is aborted (a), is suspended (s), or never (n) – default
#$ -m as
# Email adress to send email
#$ -M user_email
# All current environment variables should be exported to the batch job
#$ -V
# Set runtime environment variable for SGE Cluster Mode (sge.template)
#$ -v SGE_CLUSTER_NAME=SGE
# Run in current directory
#$ -cwd
# Interpreting shell for the job
#$ -S "/usr/bin/env bash"
#-------------------------------------------------------------------------------
# JOB
# Keep track of information related to the current job
echo "# ================================================================================"
echo "# JOB NAME: $__MRO_JOB_NAME__"
echo "# START: $(date)"
echo "# PWD: $(pwd)"
echo "# USER: $USER"
echo "# HOST: $HOSTNAME"
echo "# JOB ID: $JOB_ID"
echo "# TASK ID: $SGE_TASK_ID"
echo "# STDOUT: $__MRO_STDOUT__"
echo "# STDERR: $__MRO_STDERR__"
echo "#"
echo "# [REQUESTED]"
echo "# FREEMEM: $__MRO_MEM_GB__G"
echo "# THREADS: $__MRO_THREADS__"
echo "# ================================================================================"
echo ""
echo ""
echo ""
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# STAGE CALL
echo "#--------------------------------------------------------------------------------"
echo "# Running $__MRO_JOB_NAME__:"
echo "# $__MRO_CMD__"
echo "#--------------------------------------------------------------------------------"
echo ""
echo ""
__MRO_CMD__
#- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# End job message
echo ""
echo ""
echo ""
echo "#================================================================================"
echo "# End: $(date)"
echo "# User: $USER"
echo "# Cores/Threads: $NSLOTS"
echo "# Temporary Directory: $TMPDIR"
echo "#================================================================================"
echo ""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment