Created
December 20, 2021 21:25
-
-
Save mdondrup/b630f6475b19a9fb89009201bb92436e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
set -u | |
# usage: fetchAllGenomesByTaxon.sh Daphnia_pulex Lepeophtheirus_salmonis | |
# either use quotes or underscores | |
# This is just to show how to define the taxon list inline if you don't want to read taxa from the command line | |
#TAXLIST=("Daphnia pulex" "Drosophila melanogaster" "Anopheles gambiae" "Pediculus humanus" | |
#"Ixodes scapularis" "Apis mellifera" "Bombyx mori") | |
#TAXLIST=("Strigamia maritima") | |
WGET_OPTS="-c --random-wait -t 40 -a wget.log" | |
TAXLIST=$@ | |
for TAX in "${TAXLIST[@]}" ; do | |
echo getting genome for: $TAX | |
#mkdir -p "$TAX" # if you want to create a directory | |
#cd "$TAX" | |
GENOME=$(esearch -db genome -query "${TAX}"[Organism:exp] | | |
efetch -format docsum | tee "${TAX}.genome.esearch.docsum") | |
ACC=`echo $GENOME | xtract -pattern DocumentSummary -element Assembly_Accession` | |
NAME=`echo $GENOME | xtract -pattern DocumentSummary -element Assembly_Name` | |
echo authoritative genome: $ACC $NAME | |
RESULT=$(esearch -db assembly -query "$ACC" | | |
efetch -format docsum | tee "${TAX}.assembly.esearch.docsum") | |
FTPP=`echo $RESULT | xtract -pattern DocumentSummary -element FtpPath_GenBank` | |
TAXID=`echo $RESULT | xtract -pattern DocumentSummary -element Taxid` | |
echo FtpPath: $FTPP | |
BASENAME=`basename $FTPP` | |
FTPPATHG=$FTPP/$BASENAME'_genomic.fna.gz' | |
FTPPATHP=$FTPP/$BASENAME'_protein.faa.gz' | |
echo Downloading $FTPPATHG ... | |
## get genome data | |
wget $WGET_OPTS $FTPPATHG | |
BASENAME=`basename $FTPPATHG` | |
gunzip -f $BASENAME | |
echo Downloading $FTPPATHP ... | |
## get protein data | |
wget $WGET_OPTS $FTPPATHP # this may throw an error | |
if [ "$?" -eq "0" ] ; then | |
BASENAME=`basename $FTPPATHP` | |
gunzip -f $BASENAME | |
fi | |
# cd .. | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment