Skip to content

Instantly share code, notes, and snippets.

@rwst
Created August 21, 2019 06:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rwst/760ee4454d306c4f619053bf5798becd to your computer and use it in GitHub Desktop.
Save rwst/760ee4454d306c4f619053bf5798becd to your computer and use it in GitHub Desktop.
creating QuickStatement batches for missing claims on Wikidata objects about genes and their corresponding OMIM entries
from sys import *
import csv
reader = csv.DictReader(open('gene-entrezids.tab', 'r'), delimiter='\t')
genes = {}
for item in reader:
gid = item.get('geneid')
iturl = item.get('item')
iturl = iturl[iturl.rfind('/')+1:]
gitem = genes.get(gid)
if gitem is not None:
if iturl != gitem[0]:
print('duplicate gene id: {} {} {}'.format(gid,gitem,iturl))
continue
else:
name = item.get('itemLabel')
genes[gid] = (iturl, name)
reader = csv.DictReader(open('mim2gene.txt', 'r'), delimiter='\t')
mims = {}
for item in reader:
type = item.get('mtype')
if type != 'gene':
continue
gid = item.get('gid')
gene = genes.get(gid)
mimid = item.get('mimid')
name = item.get('gname')
if gene is None and gid is not None and len(gid) > 0:
print('missing gene: {} Entrez: {} OMIM: {}'.format(name,gid,mimid))
if gene is None:
continue
it = gene[0]
na = gene[1]
if name != na:
print('name mismatch: {} ({}) {}'.format(name, gid, na))
print('{}|P492|"{}"'.format(it, mimid))
#print('{}|P492|"{}"|S248|Q241953|S813|+2019-08-19T00:00:00Z/11'.format(it, mimid))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment