Last active
September 22, 2018 22:31
-
-
Save tluquez/0115232764eea8aa915f80c449d401b0 to your computer and use it in GitHub Desktop.
Script to download .fastq files from ENA (https://www.ebi.ac.uk/ena) using aspera (https://developer.asperasoft.com/desktop-advance/command-line-client). It can easily be extended to download other type of files from ENA. Example input file with the ENA urls for all .fastq files in the project ID PRJNA382448 is supplied. Note that the urls must …
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# MIT License | |
# | |
# Copyright (c) [2018] [Grupo de Bioquimica Experimental y Computacional from the Pontificia Universidad Javeriana and Tain Velasco-Luquez] | |
# | |
# Permission is hereby granted, free of charge, to any person obtaining a copy | |
# of this software and associated documentation files (the "Software"), to deal | |
# in the Software without restriction, including without limitation the rights | |
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
# copies of the Software, and to permit persons to whom the Software is | |
# furnished to do so, subject to the following conditions: | |
# | |
# The above copyright notice and this permission notice shall be included in all | |
# copies or substantial portions of the Software. | |
# | |
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
# SOFTWARE. | |
# Author: | |
# Tain Velasco-Luquez <tvelasco@javeriana.edu.co> @TainVelasco-Luquez: Design and implementation | |
# Description | |
# This script downloads files from the ENA (https://www.ebi.ac.uk/ena) employing aspera Command Line (aspera CLI from https://developer.asperasoft.com/desktop-advance/command-line-client) as suggested by ENA. Thus ensuring high transfer speeds. | |
# Last updated 11/09/2018 | |
##### Usage | |
usage() { | |
printf " | |
$(basename "$0") -- Program to download all \".fastq\" files from ENA supplied by the user in a text file. | |
Mandatory: | |
--asp_url File with the aspera urls downloaded from ENA | |
--outdir Path to save the data | |
--max_transfer_rate The max transfer rate over the network (e.g 300m or 600m) | |
Optional: | |
--uncompress If \"true\" then apply gunzip to all .gz | |
--aspera_openssh Different path to the aspera \".openssh\". see Requirements for more info. | |
--help Show this help text | |
Requirements: | |
1. This file assumes aspera CLI is installed and its path properly exported | |
2. The aspera CLI \".openssh\" is assumend to be in \"~/Applications/Aspera\ Connect.app/Contents/Resources/asperaweb_id_dsa.openssh\", otherwise, please specify a different path using \"--aspera_openssh\". | |
Example usage: | |
aspera_fastq_download_from_ENA.sh --asp_url pRJNA382448_fastq_ascp.txt --outdir Raw_files/ --max_transfer_rate 600m | |
Aspera CLI: | |
Download https://downloads.asperasoft.com/en/downloads/62 | |
" | |
} | |
##### If no arguments supplied then: | |
if [ $# -eq 0 ] | |
then | |
printf "No arguments supplied." | |
usage | |
exit 1 | |
fi | |
##### Options and arguments | |
while [[ $# > 0 ]]; do | |
case $1 in | |
--help | -h) | |
usage | |
exit 1 | |
;; | |
--asp_url ) | |
asp_url=$2 | |
;; | |
--outdir ) | |
outdir=$2 | |
;; | |
--max_transfer_rate ) | |
max_transfer_rate=$2 | |
;; | |
--uncompress ) | |
uncompress=$2 | |
;; | |
--aspera_openssh ) | |
aspera_openssh=$2 | |
;; | |
* ) | |
usage | |
exit 1 | |
;; | |
esac | |
shift | |
shift | |
done | |
set -- "${POSITIONAL[@]}" # restore positional parameters | |
##### Checking the correctness of the input options and download the files | |
if [[ -f ${asp_url} && -d ${outdir} && -n ${max_transfer_rate} ]]; then | |
if [[ $PATH = *CLI/bin* || $PATH = *.aspera/cli/bin* ]]; # First, default in OS X, and the second, default in Linux. | |
then | |
printf "Downloading the files in ${asp_url} \n" | |
# Main command | |
while read every_line; do | |
ascp -QT -l "${max_transfer_rate}" -P33001 -i ~/Applications/Aspera\ Connect.app/Contents/Resources/asperaweb_id_dsa.openssh era-fasp@$every_line "${outdir}" | |
printf $every_line "its done \n" | |
done < "${asp_url}"; | |
# Conditional to check for errors in the ascp command | |
if [[ $? = 0 ]]; then | |
printf "\n All files have been downloaded succesfully \n" >&1 | |
exit 0 | |
else | |
printf "\n Ooops! There was an error while downloading the files. PLease check the ascp command is correct. \n" >&2 | |
exit 1 | |
fi | |
# If a different path to the .openssh file | |
elif [[ -f ${aspera_openssh} ]]; then | |
printf "Downloading the files in ${asp_url} \n" | |
# Main command | |
while read every_line; do | |
ascp -QT -l "${max_transfer_rate}" -P33001 -i ${aspera_openssh} era-fasp@$every_line "${outdir}" | |
printf $every_line "its done \n" | |
done < "${asp_url}"; | |
# Conditional to check for errors in the ascp command | |
if [[ $? = 0 ]]; then | |
printf "\n All files have been downloaded succesfully \n" >&1 | |
exit 0 | |
else | |
printf "\n Ooops! There was an error while downloading the files. PLease check the ascp command is correct. \n" >&2 | |
exit 1 | |
fi | |
# If there is no *CLI/bin* in the PATH then | |
else | |
printf "\n Make sure you export the aspera CLI path and check whether the path to the asperaweb_id_dsa.openssh is correct.\n" >&2 | |
exit 1 | |
fi | |
fi | |
##### If uncompress is set to true then uncompress them, otherwise let them compressed | |
if [[ ${uncompress} == "true" ]]; then | |
cd ${outdir} | |
printf "\n About to uncompress the files...\n" >&1 | |
gunzip *.gz | |
printf "\n All files have been uncompressed.\n" >&1 | |
exit 0 | |
else | |
exit 1 | |
fi | |
##### All done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR544/009/SRR5442949/SRR5442949.fastq.gz | |
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR544/000/SRR5442950/SRR5442950.fastq.gz | |
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR544/001/SRR5442951/SRR5442951.fastq.gz | |
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR544/002/SRR5442952/SRR5442952.fastq.gz | |
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR544/003/SRR5442953/SRR5442953.fastq.gz | |
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR544/004/SRR5442954/SRR5442954.fastq.gz | |
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR544/005/SRR5442955/SRR5442955.fastq.gz | |
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR544/006/SRR5442956/SRR5442956.fastq.gz | |
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR544/007/SRR5442957/SRR5442957.fastq.gz | |
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR544/008/SRR5442958/SRR5442958.fastq.gz | |
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR544/009/SRR5442959/SRR5442959.fastq.gz | |
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR544/000/SRR5442960/SRR5442960.fastq.gz | |
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR544/001/SRR5442961/SRR5442961.fastq.gz | |
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR544/002/SRR5442962/SRR5442962.fastq.gz | |
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR544/003/SRR5442963/SRR5442963.fastq.gz | |
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR544/004/SRR5442964/SRR5442964.fastq.gz | |
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR544/005/SRR5442965/SRR5442965.fastq.gz | |
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR544/006/SRR5442966/SRR5442966.fastq.gz | |
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR544/007/SRR5442967/SRR5442967.fastq.gz | |
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR544/008/SRR5442968/SRR5442968.fastq.gz | |
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR544/009/SRR5442969/SRR5442969.fastq.gz | |
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR544/000/SRR5442970/SRR5442970.fastq.gz | |
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR544/001/SRR5442971/SRR5442971.fastq.gz | |
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR544/002/SRR5442972/SRR5442972.fastq.gz |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment