Skip to content

Instantly share code, notes, and snippets.

@TTTPOB
Last active October 15, 2021 12:49
Show Gist options
  • Save TTTPOB/983ec24cf42a01b7802098ee6f117494 to your computer and use it in GitHub Desktop.
Save TTTPOB/983ec24cf42a01b7802098ee6f117494 to your computer and use it in GitHub Desktop.
simple script to get srr from gsm or srx accesion, useful when you need to download many srr data
#!/usr/bin/env python3
import httpx
from bs4 import BeautifulSoup as bs
from xml.etree import ElementTree as ET
import click
import re
# see https://blog.tpob.xyz/2021/10/14/%E6%9B%B4%E5%BF%AB%E8%8E%B7%E5%BE%97srr-accession/
# for more information
def get_srr_from_srx(srx):
srx_link = f"https://www.ncbi.nlm.nih.gov/sra/{srx}[accn]?report=FullXml"
srx_page = bs(httpx.get(srx_link).text, "html.parser")
srx_xml = srx_page.select("#ResultView")
srx_xml = srx_xml[0].text
srx_xml = ET.fromstring(srx_xml)
srr_list = srx_xml.findall(f".//*RUN")
srr_list = [srr.attrib["accession"] for srr in srr_list]
return srr_list
@click.group()
def cli():
pass
@cli.command()
@click.argument("gsm", type=str)
def gsm2srr(gsm):
gsm_url = f"https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc={gsm}&targ=self&form=xml&view=quick"
gsm_xml = httpx.get(gsm_url).text
gsm_xml = ET.fromstring(gsm_xml)
namespace = gsm_xml.tag.split("}")[0] + "}"
srx_list = gsm_xml.findall(f"{namespace}Sample/{namespace}Relation[@type='SRA']")
srx_list = [srx.attrib["target"] for srx in srx_list]
srx_list = [re.search("SRX.*$", srx).group(0) for srx in srx_list]
list_of_srr_list = [get_srr_from_srx(srx) for srx in srx_list]
srr_list = sum(list_of_srr_list, [])
srr_string = "\n".join(srr_list)
click.echo(srr_string)
@cli.command()
@click.argument("srx", type=str)
def srx2srr(srx):
srr_list = get_srr_from_srx(srx)
srr_string = "\n".join(srr_list)
click.echo(srr_string)
if __name__ == "__main__":
cli()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment