Run BLAST as an array job using SGE (on SANBI cluster)
#!/bin/sh | |
# requirement: | |
# working directory with: | |
# in/ - files named .fasta that are query sequences | |
# out/ - empty directory to put outputs in | |
# logs/ - empty directory to put logs in | |
# | |
# qsub with: | |
# qsub -t 1-2 -wd ./my-work-dir blastplus_array.sh | |
#$ -o logs/$JOB_NAME.o$JOB_ID.$TASK_ID | |
#$ -e logs/$JOB_NAME.e$JOB_ID.$TASK_ID | |
### ----- | |
### define input and output directories | |
base_dir=`pwd` | |
in_dir=$base_dir/in | |
out_dir=$base_dir/out | |
cd $in_dir | |
### ----- | |
### get all the file names into a file | |
filelist=../logs/file_list.txt | |
ls *.fasta > $filelist | |
### ----- | |
### access the fasta files by the ${SGE_TASK_ID} | |
fasta=`awk "NR == ${SGE_TASK_ID} {print}" $filelist` # ${file_list[$counter]} | |
echo $fasta | |
### ----- | |
### add the blast module and run blast | |
. /etc/profile.d/module.sh | |
module add blastplus/default | |
blastn -query $in_dir/$fasta -db nt -out $out_dir/$fasta.txt -outfmt "6 std slen qlen qcovs qcovhsp staxids sscinames sskingdoms" -soft_masking false -max_target_seqs 3 -evalue 10 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment