Last active
March 27, 2017 21:54
-
-
Save ShaiberAlon/f53d850b27bad299eeb16bffd15a40a9 to your computer and use it in GitHub Desktop.
bash script to map multiple metagenomes to multiple references
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
### DEFAULTS (FEEL FREE TO EDIT THESE) ################## | |
NUM_THREADS_FOR_MAPPING=10 | |
NUM_THREADS_FOR_HMMSCAN=4 | |
NUM_THREADS_FOR_ANVI_GEN_CONTIG=4 | |
NUM_THREADS_FOR_ANVI_PROFILE=4 | |
NUM_THREADS_FOR_ANVI_MERGE=4 | |
# configure whether SNV analysis will be included or not (if you want it included then leave this empty | |
# if you want no SNV analysis then set the variable to equal --skip-SNV-analysis | |
SNV_ANALYSIS_CONFIGURATION= | |
# configure whether SAAV analysis will be included or not (if you want it included then | |
# to equal --profile-AA-frequencies | |
SAAV_ANALYSIS_CONFIGURATION=--profile-AA-frequencies | |
######################################################### | |
# setting the pipeline to stop in case a command returns a non-zero status | |
set -e | |
job=$1 | |
email=$2 | |
samples=$3 | |
if [ ! -s $samples ] | |
then | |
echo " | |
You must provide a samples txt file. The format of the | |
samples txt file is identical to the input file of iu-gen-configs. For | |
details, see iu-gen-congigs -h. | |
" | |
exit 1 | |
fi | |
if [ ! -f "references.txt" ] | |
then | |
echo " | |
You must have a references.txt in this directory. " | |
exit 1 | |
fi | |
C() { | |
echo -e "\033[0;30m\033[46m$1\033[0m" | |
} | |
INFO() { | |
echo | |
C "#" | |
C "#" | |
C "# $1" | |
C "#" | |
C "#" | |
echo | |
} | |
WAIT () { | |
python /workspace/meren/wait_for_cluster.py $1 | |
} | |
CHECK_FILE () { | |
if [ -s $1 ] | |
then | |
: | |
else | |
echo "Error: Expected to find $1, but it is not there!" | |
exit 1 | |
fi | |
} | |
LOG () { | |
msg="[`date`] $1" | |
echo $msg >> $job-log.txt | |
echo $msg | mail -s "Update on $job" $email | |
} | |
DO_INIT() { | |
rm -rf 00_LOGS-$job | |
mkdir 00_LOGS-$job | |
LOG "Hi!" | |
LOG `anvi-interactive -v` | |
} | |
DO_GEN_CONTIG () { | |
DIR="03_CONTIGS" | |
INFO $DIR && mkdir -p $DIR | |
while read reference path; do | |
if [ "$reference" == "reference" ]; then continue; fi | |
ref_genome=$reference | |
ref_genome_path=$path | |
clusterize -n $NUM_THREADS_FOR_ANVI_GEN_CONTIG -log 00_LOGS-$job/${ref_genome}_gen-contigs.log anvi-gen-contigs-database -f $ref_genome_path -o $DIR/${ref_genome}.db | |
done < references.txt | |
WAIT anvi-gen-contigs-database | |
echo "anvi-gen-contigs-database for $job on `hostname` is done." | mail -s "Update on $job!" $email | |
while read reference path; do | |
if [ "$reference" == "reference" ]; then continue; fi | |
ref_genome=$reference | |
ref_genome_path=$path | |
clusterize -n $NUM_THREADS_FOR_HMMSCAN -log 00_LOGS-$job/${ref_genome}_run-hmms.log anvi-run-hmms --num-threads $NUM_THREADS_FOR_HMMSCAN -c $DIR/${ref_genome}.db | |
done < references.txt | |
WAIT anvi-run-hmms | |
echo "anvi-run-hmms for $job on `hostname` is done." | mail -s "Update on $job!" $email | |
} | |
DO_BOWTIE2_BUILD () { | |
while read reference path; do | |
if [ "$reference" == "reference" ]; then continue; fi | |
ref_genome=$reference | |
ref_genome_path=$path | |
DIR="04_MAPPING/$ref_genome" | |
INFO $DIR | |
mkdir -p $DIR | |
clusterize -n 4 -log 00_LOGS-$job/$ref_genome\_bowtie-build.log bowtie2-build $ref_genome_path $DIR/$ref_genome | |
done < references.txt | |
WAIT bowtie2-build | |
} | |
DO_BOWTIE2_MAPPING () { | |
while read reference path; do | |
if [ "$reference" == "reference" ]; then continue; fi | |
ref_genome=$reference | |
ref_genome_path=$path | |
DIR="04_MAPPING/$ref_genome" | |
while read sample R1 R2; do | |
if [ "$sample" == "sample" ]; then continue; fi | |
clusterize -n $NUM_THREADS_FOR_MAPPING -log 00_LOGS-$job/$ref_genome\_$sample-bowtie2.log bowtie2 --threads $NUM_THREADS_FOR_MAPPING -x $DIR/$ref_genome -1 $R1 -2 $R2 -S $DIR/$sample.sam --no-unal | |
done < $samples | |
done < references.txt | |
WAIT bowtie2 | |
while read reference path; do | |
if [ "$reference" == "reference" ]; then continue; fi | |
ref_genome=$reference | |
ref_genome_path=$path | |
DIR="04_MAPPING/$ref_genome" | |
while read sample R1 R2; do | |
if [ "$sample" == "sample" ]; then continue; fi | |
clusterize -n 4 -log 00_LOGS-$job/$ref_genome\_$sample-samtools-view.log samtools view -F 4 -bS $DIR/$sample.sam -o $DIR/$sample-RAW.bam | |
done < $samples | |
done < references.txt | |
WAIT samtools | |
while read reference path; do | |
if [ "$reference" == "reference" ]; then continue; fi | |
ref_genome=$reference | |
ref_genome_path=$path | |
DIR="04_MAPPING/$ref_genome" | |
while read sample R1 R2; do | |
if [ "$sample" == "sample" ]; then continue; fi | |
clusterize -n 4 -log 00_LOGS-$job/$ref_genome\_$sample-anvi-init-bam.log anvi-init-bam $DIR/$sample-RAW.bam -o $DIR/$sample\.bam | |
done < $samples | |
done < references.txt | |
WAIT anvi-init-bam | |
while read reference path; do | |
if [ "$reference" == "reference" ]; then continue; fi | |
ref_genome=$reference | |
ref_genome_path=$path | |
DIR="04_MAPPING/$ref_genome" | |
while read sample R1 R2; do | |
if [ "$sample" == "sample" ]; then continue; fi | |
CHECK_FILE $DIR/$sample.bam | |
done < $samples | |
done < references.txt | |
while read reference path; do | |
if [ "$reference" == "reference" ]; then continue; fi | |
ref_genome=$reference | |
ref_genome_path=$path | |
DIR="04_MAPPING/$ref_genome" | |
while read sample R1 R2; do | |
if [ "$sample" == "sample" ]; then continue; fi | |
if [ -s $DIR/$sample.bam ] | |
then | |
clusterize -log 00_LOGS-$job/$ref_genome\_$sample-rm-SAMs.log rm $DIR/$sample.sam $DIR/$sample-RAW.bam | |
fi | |
done < $samples | |
done < references.txt | |
echo "Bowtie mapping for $job on `hostname` is done." | mail -s "Update on $job!" $email | |
} | |
DO_ANVI_PROFILE () { | |
while read reference path; do | |
if [ "$reference" == "reference" ]; then continue; fi | |
ref_genome=$reference | |
DIR="05_ANVIO_PROFILE-$job/$ref_genome" | |
MAPPING_DIR="04_MAPPING/$ref_genome" | |
INFO $DIR && mkdir -p $DIR | |
while read sample R1 R2; do | |
if [ "$sample" == "sample" ]; then continue; fi | |
clusterize -n $NUM_THREADS_FOR_ANVI_PROFILE -log 00_LOGS-$job/$ref_genome\_$sample-anvi-profile.log anvi-profile $SNV_ANALYSIS_CONFIGURATION -i $MAPPING_DIR/$sample.bam -c 03_CONTIGS/${ref_genome}.db -o $DIR/$sample --num-threads $NUM_THREADS_FOR_ANVI_PROFILE $SAAV_ANALYSIS_CONFIGURATION | |
done < $samples | |
done < references.txt | |
WAIT anvi-profile | |
echo "Anvi'o profiling of samples_p214 for $ref_genome in $job on `hostname` is done." | mail -s "Update on $job!" $email | |
} | |
DO_ANVI_MERGE () { | |
while read reference path; do | |
if [ "$reference" == "reference" ]; then continue; fi | |
ref_genome=$reference | |
mkdir -p 06-$job-MERGED | |
DIR="06-$job-MERGED/$ref_genome" | |
# this is the only place where I leave the delete folder | |
INFO $DIR && rm -rf $DIR | |
clusterize -n $NUM_THREADS_FOR_ANVI_MERGE -log 00_LOGS-$job/$ref_genome\_anvi-merge.log anvi-merge 05_ANVIO_PROFILE-$job/${ref_genome}/*/PROFILE.db -S "$ref_genome" -o $DIR -c 03_CONTIGS/${ref_genome}.db | |
done < references.txt | |
WAIT anvi-merge | |
#CHECK_FILE $DIR/RUNINFO.mcp #FIXME: I don't know why this keeps on failing to find this file!! | |
echo "Anvi'o merge for $ref_genome in $job on `hostname` is done." | mail -s "Update on $job!" $email | |
} | |
# Start the log directory | |
DO_INIT | |
#create the contigs databases | |
DO_GEN_CONTIG | |
# DO bowtie build for the reference/s: | |
DO_BOWTIE2_BUILD | |
# Map reads with bowtie | |
DO_BOWTIE2_MAPPING | |
# Do anvi profile | |
DO_ANVI_PROFILE | |
# Do anvi merge | |
DO_ANVI_MERGE |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment