Skip to content

Instantly share code, notes, and snippets.

@hiraksarkar
Forked from apfejes/gist:95405620abb347e1abc87d5708dbc003
Created January 12, 2018 19:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hiraksarkar/ce450f94228052efc2eb9f58e11accf2 to your computer and use it in GitHub Desktop.
Save hiraksarkar/ce450f94228052efc2eb9f58e11accf2 to your computer and use it in GitHub Desktop.
A script for NCBI SRA downloads.
import argparse
default_prefix = "ftp.sra.ebi.ac.uk/vol1/fastq/"
ascp_prefix = "ftp.sra.ebi.ac.uk:/vol1/fastq/"
default_protocol = "ftp://"
default_suffix = "*.fastq.gz"
default_ascp_line = "ascp -QT -l 300m -i ~/.ssh/asperaweb_id_dsa.openssh anonftp@ftp.ncbi.nlm.nih.gov:{} {}"
def prepare_url(srr_name, prefix):
dir_1 = srr_name[:6]
url = ""
num_digits = sum(s.isdigit() for s in srr_name)
if num_digits == 6:
url = prefix + dir_1 + "/" + srr_name + "/"
elif num_digits == 7:
dir_2 = "00" + srr_name[-1]
url = prefix + dir_1 + "/" + dir_2 + "/" + srr_name + "/"
elif num_digits == 8:
dir_2 = "0" + srr_name[-2:]
url = prefix + dir_1 + "/" + dir_2 + "/" + srr_name + "/"
elif num_digits == 9:
dir_2 = srr_name[-3:]
url = prefix + dir_1 + "/" + dir_2 + "/" + srr_name + "/"
return url
def prepare_path(srr_name, prefix=None):
dir_1 = srr_name[:6]
suffix = ".sra"
url = dir_1 + "/" + srr_name + "/" + srr_name
return "{}{}{}".format(prefix, url, suffix)
def ascp_line(srr_name, out_dir):
path = prepare_path(srr_name, prefix="/sra/sra-instant/reads/ByRun/sra/SRR/")
return default_ascp_line.format(path, out_dir)
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--SRR', help='SRR name to be parsed')
parser.add_argument('--file', help='file containing SRR name to be parsed')
parser.add_argument('--ascp', action='store_true')
parser.add_argument('--out', help='output directory')
args = parser.parse_args()
suffix = default_suffix
if args.ascp:
if args.file:
with open(args.file) as srr_file:
for line in srr_file:
print(ascp_line(line.strip(), args.out))
else:
print(ascp_line(args.SRR, args.out))
else:
protocol = default_protocol
prefix = default_prefix
if args.file:
with open(args.file) as srr_file:
for line in srr_file:
url = prepare_url(line.strip(), prefix)
print("{}{}{}".format(protocol, url, suffix))
else:
url = prepare_url(args.SRR, prefix)
print("{}{}{}".format(protocol, url, suffix))
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment