Skip to content

Instantly share code, notes, and snippets.

@rwst
Created August 24, 2019 17:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rwst/7e7218533eca5235419db2a878164b07 to your computer and use it in GitHub Desktop.
Save rwst/7e7218533eca5235419db2a878164b07 to your computer and use it in GitHub Desktop.
creates QS statements associating gene items with disease items, from WD and UniProt data
from sys import *
import csv
reader = csv.DictReader(open('gene-diseaseassoc.tab', 'r'), delimiter='\t')
genes = {}
for item in reader:
iturl = item.get('item')
it = iturl[iturl.rfind('/')+1:]
disurl = item.get('dis')
dis = disurl[disurl.rfind('/')+1:]
gitem = genes.get(it)
if gitem is None:
s = set()
s.add(dis)
genes[it] = s
else:
gitem.add(dis)
reader = csv.DictReader(open('diseaseid-omim.tab', 'r'), delimiter='\t')
mims = {}
for item in reader:
iturl = item.get('item')
it = iturl[iturl.rfind('/')+1:]
mimid = item.get('omim')
mim = mims.get(mimid)
if mim is None:
s = set()
s.add(it)
mims[mimid] = s
else:
mim.add(it)
dupkeys = []
for tup in mims.items():
if len(tup[1]) > 1:
#print('more than one OMIM ID: {} ({})'.format(tup[0], tup[1]))
dupkeys.append(tup[0])
for k in dupkeys:
mims.pop(k)
reader = csv.DictReader(open('uniprot-geneid.tab', 'r'), delimiter='\t')
unips = {}
dups = set()
for item in reader:
uid = item.get('uid')
iturl = item.get('gid')
it = iturl[iturl.rfind('/')+1:]
git = unips.get(uid)
if git is None or git == it:
unips[uid] = it
else:
#print('more than one value: {} ({}, {})'.format(uid, git, it))
dups.add(uid)
for k in dups:
unips.pop(k)
reader = csv.DictReader(stdin, delimiter='\t')
for item in reader:
uid = item.get('Entry')
dstr = item.get('Involvement in disease')
pos = dstr.find('[MIM:')
while pos > 0:
mimid = dstr[pos+5:pos+11]
dstr = dstr[pos+11:]
dis = mims.get(mimid)
if dis is not None:
dgenes = genes.get(list(dis)[0])
git = unips.get(uid)
if git is not None:
if dgenes is None or git not in dgenes:
print('{}|P2293|{}'.format(git, list(dis)[0]))
#print('{}|P2293|{}|S248|Q905695|S813|+2019-08-13T00:00:00Z/11|S352|"{}"'.format(git, list(dis)[0], uid))
#print('{}|P492|"{}"|S248|Q241953|S813|+2019-08-19T00:00:00Z/11'.format(it, mimid))
pos = dstr.find('[MIM:')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment