Skip to content

Instantly share code, notes, and snippets.

@golobor
Created December 1, 2018 06:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save golobor/c77fce2f1d2f5e9e8ecaadb215343384 to your computer and use it in GitHub Desktop.
Save golobor/c77fce2f1d2f5e9e8ecaadb215343384 to your computer and use it in GitHub Desktop.
import subprocess
import shutil
import argparse
import pathlib
parser = argparse.ArgumentParser(description='Download all SRAs from a GSM.')
parser.add_argument('GSMs', metavar='GSMs', type=str, nargs='+',
help='GSMs to download, can be more than one')
parser.add_argument('-g', '--guess-title',
help='If specified, guess the title of each GSMs and use it to store SRRs',
action='store_true')
args = parser.parse_args()
for GSM in args.GSMs:
print(f'downloading {GSM} from NCBI GEO...')
output_folder=f'./{GSM}'
if args.guess_title:
raw_title=subprocess.check_output(
f'esearch -db sra -query {GSM} '
'| efetch -format docsum '
'| xtract -pattern DocumentSummary -element Title',
shell=True
).decode()
title = raw_title.split(GSM+':')[1].split(';')[0].strip()
print(f'Extract title "{title}" from a full title "{raw_title}"')
output_folder=f'./{title}'
pathlib.Path(output_folder).mkdir(parents=True, exist_ok=True)
SRRs=subprocess.check_output(
f'esearch -db sra -query {GSM}'
'|efetch -format docsum '
'|xtract -pattern DocumentSummary -element Run@acc',
shell=True)
SRRs = SRRs.decode().strip().split('\t')
print(f'found SRRs: {SRRs}')
for SRR in SRRs:
print(f'downloading {SRR}')
SRRs=subprocess.check_output(
f'fastq-dump -F {SRR} --split-files --gzip -O {output_folder}',
shell=True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment