Skip to content

Instantly share code, notes, and snippets.

@ShaiberAlon
Last active March 27, 2017 21:54
Show Gist options
  • Save ShaiberAlon/f53d850b27bad299eeb16bffd15a40a9 to your computer and use it in GitHub Desktop.
Save ShaiberAlon/f53d850b27bad299eeb16bffd15a40a9 to your computer and use it in GitHub Desktop.
bash script to map multiple metagenomes to multiple references
#!/bin/bash
### DEFAULTS (FEEL FREE TO EDIT THESE) ##################
NUM_THREADS_FOR_MAPPING=10
NUM_THREADS_FOR_HMMSCAN=4
NUM_THREADS_FOR_ANVI_GEN_CONTIG=4
NUM_THREADS_FOR_ANVI_PROFILE=4
NUM_THREADS_FOR_ANVI_MERGE=4
# configure whether SNV analysis will be included or not (if you want it included then leave this empty
# if you want no SNV analysis then set the variable to equal --skip-SNV-analysis
SNV_ANALYSIS_CONFIGURATION=
# configure whether SAAV analysis will be included or not (if you want it included then
# to equal --profile-AA-frequencies
SAAV_ANALYSIS_CONFIGURATION=--profile-AA-frequencies
#########################################################
# setting the pipeline to stop in case a command returns a non-zero status
set -e
job=$1
email=$2
samples=$3
if [ ! -s $samples ]
then
echo "
You must provide a samples txt file. The format of the
samples txt file is identical to the input file of iu-gen-configs. For
details, see iu-gen-congigs -h.
"
exit 1
fi
if [ ! -f "references.txt" ]
then
echo "
You must have a references.txt in this directory. "
exit 1
fi
C() {
echo -e "\033[0;30m\033[46m$1\033[0m"
}
INFO() {
echo
C "#"
C "#"
C "# $1"
C "#"
C "#"
echo
}
WAIT () {
python /workspace/meren/wait_for_cluster.py $1
}
CHECK_FILE () {
if [ -s $1 ]
then
:
else
echo "Error: Expected to find $1, but it is not there!"
exit 1
fi
}
LOG () {
msg="[`date`] $1"
echo $msg >> $job-log.txt
echo $msg | mail -s "Update on $job" $email
}
DO_INIT() {
rm -rf 00_LOGS-$job
mkdir 00_LOGS-$job
LOG "Hi!"
LOG `anvi-interactive -v`
}
DO_GEN_CONTIG () {
DIR="03_CONTIGS"
INFO $DIR && mkdir -p $DIR
while read reference path; do
if [ "$reference" == "reference" ]; then continue; fi
ref_genome=$reference
ref_genome_path=$path
clusterize -n $NUM_THREADS_FOR_ANVI_GEN_CONTIG -log 00_LOGS-$job/${ref_genome}_gen-contigs.log anvi-gen-contigs-database -f $ref_genome_path -o $DIR/${ref_genome}.db
done < references.txt
WAIT anvi-gen-contigs-database
echo "anvi-gen-contigs-database for $job on `hostname` is done." | mail -s "Update on $job!" $email
while read reference path; do
if [ "$reference" == "reference" ]; then continue; fi
ref_genome=$reference
ref_genome_path=$path
clusterize -n $NUM_THREADS_FOR_HMMSCAN -log 00_LOGS-$job/${ref_genome}_run-hmms.log anvi-run-hmms --num-threads $NUM_THREADS_FOR_HMMSCAN -c $DIR/${ref_genome}.db
done < references.txt
WAIT anvi-run-hmms
echo "anvi-run-hmms for $job on `hostname` is done." | mail -s "Update on $job!" $email
}
DO_BOWTIE2_BUILD () {
while read reference path; do
if [ "$reference" == "reference" ]; then continue; fi
ref_genome=$reference
ref_genome_path=$path
DIR="04_MAPPING/$ref_genome"
INFO $DIR
mkdir -p $DIR
clusterize -n 4 -log 00_LOGS-$job/$ref_genome\_bowtie-build.log bowtie2-build $ref_genome_path $DIR/$ref_genome
done < references.txt
WAIT bowtie2-build
}
DO_BOWTIE2_MAPPING () {
while read reference path; do
if [ "$reference" == "reference" ]; then continue; fi
ref_genome=$reference
ref_genome_path=$path
DIR="04_MAPPING/$ref_genome"
while read sample R1 R2; do
if [ "$sample" == "sample" ]; then continue; fi
clusterize -n $NUM_THREADS_FOR_MAPPING -log 00_LOGS-$job/$ref_genome\_$sample-bowtie2.log bowtie2 --threads $NUM_THREADS_FOR_MAPPING -x $DIR/$ref_genome -1 $R1 -2 $R2 -S $DIR/$sample.sam --no-unal
done < $samples
done < references.txt
WAIT bowtie2
while read reference path; do
if [ "$reference" == "reference" ]; then continue; fi
ref_genome=$reference
ref_genome_path=$path
DIR="04_MAPPING/$ref_genome"
while read sample R1 R2; do
if [ "$sample" == "sample" ]; then continue; fi
clusterize -n 4 -log 00_LOGS-$job/$ref_genome\_$sample-samtools-view.log samtools view -F 4 -bS $DIR/$sample.sam -o $DIR/$sample-RAW.bam
done < $samples
done < references.txt
WAIT samtools
while read reference path; do
if [ "$reference" == "reference" ]; then continue; fi
ref_genome=$reference
ref_genome_path=$path
DIR="04_MAPPING/$ref_genome"
while read sample R1 R2; do
if [ "$sample" == "sample" ]; then continue; fi
clusterize -n 4 -log 00_LOGS-$job/$ref_genome\_$sample-anvi-init-bam.log anvi-init-bam $DIR/$sample-RAW.bam -o $DIR/$sample\.bam
done < $samples
done < references.txt
WAIT anvi-init-bam
while read reference path; do
if [ "$reference" == "reference" ]; then continue; fi
ref_genome=$reference
ref_genome_path=$path
DIR="04_MAPPING/$ref_genome"
while read sample R1 R2; do
if [ "$sample" == "sample" ]; then continue; fi
CHECK_FILE $DIR/$sample.bam
done < $samples
done < references.txt
while read reference path; do
if [ "$reference" == "reference" ]; then continue; fi
ref_genome=$reference
ref_genome_path=$path
DIR="04_MAPPING/$ref_genome"
while read sample R1 R2; do
if [ "$sample" == "sample" ]; then continue; fi
if [ -s $DIR/$sample.bam ]
then
clusterize -log 00_LOGS-$job/$ref_genome\_$sample-rm-SAMs.log rm $DIR/$sample.sam $DIR/$sample-RAW.bam
fi
done < $samples
done < references.txt
echo "Bowtie mapping for $job on `hostname` is done." | mail -s "Update on $job!" $email
}
DO_ANVI_PROFILE () {
while read reference path; do
if [ "$reference" == "reference" ]; then continue; fi
ref_genome=$reference
DIR="05_ANVIO_PROFILE-$job/$ref_genome"
MAPPING_DIR="04_MAPPING/$ref_genome"
INFO $DIR && mkdir -p $DIR
while read sample R1 R2; do
if [ "$sample" == "sample" ]; then continue; fi
clusterize -n $NUM_THREADS_FOR_ANVI_PROFILE -log 00_LOGS-$job/$ref_genome\_$sample-anvi-profile.log anvi-profile $SNV_ANALYSIS_CONFIGURATION -i $MAPPING_DIR/$sample.bam -c 03_CONTIGS/${ref_genome}.db -o $DIR/$sample --num-threads $NUM_THREADS_FOR_ANVI_PROFILE $SAAV_ANALYSIS_CONFIGURATION
done < $samples
done < references.txt
WAIT anvi-profile
echo "Anvi'o profiling of samples_p214 for $ref_genome in $job on `hostname` is done." | mail -s "Update on $job!" $email
}
DO_ANVI_MERGE () {
while read reference path; do
if [ "$reference" == "reference" ]; then continue; fi
ref_genome=$reference
mkdir -p 06-$job-MERGED
DIR="06-$job-MERGED/$ref_genome"
# this is the only place where I leave the delete folder
INFO $DIR && rm -rf $DIR
clusterize -n $NUM_THREADS_FOR_ANVI_MERGE -log 00_LOGS-$job/$ref_genome\_anvi-merge.log anvi-merge 05_ANVIO_PROFILE-$job/${ref_genome}/*/PROFILE.db -S "$ref_genome" -o $DIR -c 03_CONTIGS/${ref_genome}.db
done < references.txt
WAIT anvi-merge
#CHECK_FILE $DIR/RUNINFO.mcp #FIXME: I don't know why this keeps on failing to find this file!!
echo "Anvi'o merge for $ref_genome in $job on `hostname` is done." | mail -s "Update on $job!" $email
}
# Start the log directory
DO_INIT
#create the contigs databases
DO_GEN_CONTIG
# DO bowtie build for the reference/s:
DO_BOWTIE2_BUILD
# Map reads with bowtie
DO_BOWTIE2_MAPPING
# Do anvi profile
DO_ANVI_PROFILE
# Do anvi merge
DO_ANVI_MERGE
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment