Skip to content

Instantly share code, notes, and snippets.

@cmungall
Created August 13, 2018 23:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cmungall/a2e3bac5f4750602559a893faa3b717e to your computer and use it in GitHub Desktop.
Save cmungall/a2e3bac5f4750602559a893faa3b717e to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
"""
Command line wrapper to mygene for bulk operations
Type:
bulkgene -h
For instructions
See any mygene page eg
http://mygene.info/v3/gene/1017
for list of fields that can be mapped
Example (from symbols):
bulkgene.py -m ensembl.gene -m HGNC -m Vega -m MIM -m alias -m uniprot.Swiss-Prot ~/tmp/geneset.txt
From uniprot IDs:
bulkgene.py -s uniprot.Swiss-Prot -s uniprot.TrEMBL -m entrezgene -m ensembl.gene -m HGNC -m Vega -m MIM -m alias -m uniprot.Swiss-Prot /tmp/up
"""
import mygene
import csv
import click
import logging
logging.basicConfig(level=logging.INFO)
@click.command()
@click.option("-t", "--taxon", default="human")
@click.option("-s", "--scopes", multiple=True, default=["symbol"])
@click.option("-m", "--fields", multiple=True, default=["ensembl.gene"])
@click.argument("qfile", type=click.Path(exists=True))
def run(taxon, scopes, fields, qfile):
qterms = load_input(qfile)
logging.info('Qterms = {}'.format(len(qterms)))
mg = mygene.MyGeneInfo()
payload = mg.querymany(qterms,
species=taxon,
scopes=scopes,
fields=fields,
returnall=True)
rows = payload['out']
for r in rows:
vals = [lookup(r, f) for f in fields]
print("\t".join([ensure_str(v) for v in vals]))
def ensure_str(v):
if isinstance(v,str):
return v
elif isinstance(v,list):
return "|".join([ensure_str(e) for e in v])
else:
return str(v)
def lookup(obj, path):
toks = path.split(".")
v = obj
for t in toks:
if t in v:
v = v[t]
else:
return ""
return v
def load_input(qfile):
syms = []
with open(qfile, newline='') as csvfile:
reader = csv.reader(csvfile, delimiter='\t')
for row in reader:
t = row[0]
syms.append(t)
return syms
if __name__ == "__main__":
run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment