Last active
October 15, 2021 12:49
-
-
Save TTTPOB/983ec24cf42a01b7802098ee6f117494 to your computer and use it in GitHub Desktop.
simple script to get srr from gsm or srx accesion, useful when you need to download many srr data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import httpx | |
from bs4 import BeautifulSoup as bs | |
from xml.etree import ElementTree as ET | |
import click | |
import re | |
# see https://blog.tpob.xyz/2021/10/14/%E6%9B%B4%E5%BF%AB%E8%8E%B7%E5%BE%97srr-accession/ | |
# for more information | |
def get_srr_from_srx(srx): | |
srx_link = f"https://www.ncbi.nlm.nih.gov/sra/{srx}[accn]?report=FullXml" | |
srx_page = bs(httpx.get(srx_link).text, "html.parser") | |
srx_xml = srx_page.select("#ResultView") | |
srx_xml = srx_xml[0].text | |
srx_xml = ET.fromstring(srx_xml) | |
srr_list = srx_xml.findall(f".//*RUN") | |
srr_list = [srr.attrib["accession"] for srr in srr_list] | |
return srr_list | |
@click.group() | |
def cli(): | |
pass | |
@cli.command() | |
@click.argument("gsm", type=str) | |
def gsm2srr(gsm): | |
gsm_url = f"https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc={gsm}&targ=self&form=xml&view=quick" | |
gsm_xml = httpx.get(gsm_url).text | |
gsm_xml = ET.fromstring(gsm_xml) | |
namespace = gsm_xml.tag.split("}")[0] + "}" | |
srx_list = gsm_xml.findall(f"{namespace}Sample/{namespace}Relation[@type='SRA']") | |
srx_list = [srx.attrib["target"] for srx in srx_list] | |
srx_list = [re.search("SRX.*$", srx).group(0) for srx in srx_list] | |
list_of_srr_list = [get_srr_from_srx(srx) for srx in srx_list] | |
srr_list = sum(list_of_srr_list, []) | |
srr_string = "\n".join(srr_list) | |
click.echo(srr_string) | |
@cli.command() | |
@click.argument("srx", type=str) | |
def srx2srr(srx): | |
srr_list = get_srr_from_srx(srx) | |
srr_string = "\n".join(srr_list) | |
click.echo(srr_string) | |
if __name__ == "__main__": | |
cli() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment