Skip to content

Instantly share code, notes, and snippets.

@pjvandehaar
Last active April 1, 2017 04:55
Show Gist options
  • Save pjvandehaar/7d1882b2120d3a2031c73b6b8fa3c7cd to your computer and use it in GitHub Desktop.
Save pjvandehaar/7d1882b2120d3a2031c73b6b8fa3c7cd to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
def get_blks():
import gzip
# <http://hgdownload.cse.ucsc.edu/goldenPath/hg38/liftOver/hg38ToHg19.over.chain.gz>
with gzip.open('hg38ToHg19.over.chain.gz', 'rt') as f:
for line in f:
line = line.rstrip('\n').split()
if line and line[0] == 'chain':
blk = dict(chr1=line[2], std1=line[4], stt1=int(line[5]), len1=int(line[6]),
chr2=line[7], std2=line[9], stt2=int(line[10]),len2=int(line[11]),
pieces=[])
elif len(line)==3:
blk['pieces'].append(dict(bth=int(line[0]), gap1=int(line[1]), gap2=int(line[2])))
elif len(line)==1:
blk['pieces'].append(dict(bth=int(line[0])))
elif not line:
yield blk
else: raise
yield blk
def wth(dct, dct2):
d = dct.copy()
d.update(dct2)
return d
assert wth(dict(a=1,b=2), dict(b=102,c=103)) == dict(a=1,b=102,c=103)
def justkeys(dct, keys):
return {k:v for k,v in dct.items() if k in keys}
assert justkeys(dict(a=1,b=2,c=3,d=4), 'ac') == dict(a=1,c=3)
def wthout(dct, keys):
return {k:v for k,v in dct.items() if k not in keys}
assert wthout(dict(a=1,b=2,c=3,d=4), 'ac') == dict(b=2,d=4)
import itertools
def take(itr, n):
return list(itertools.islice(itr, 0, n))
def get_alns():
import gzip
for blk in get_blks():
aln = justkeys(blk, 'chr1 chr2 std1 std2 stt1 stt2'.split())
for piece in blk['pieces']:
yield wth(aln, {'end1': aln['stt1'] + piece['bth']})
for i in '12':
try: aln[f'stt{i}'] += piece['bth'] + piece[f'gap{i}']
except KeyError: pass
def lookup(chrom, pos):
for aln in get_alns():
if aln['chr1'] == chrom and aln['stt1'] <= pos < aln['end1']:
yield (aln['chr2'], aln['stt2'] + pos - aln['stt1'])
# took these from dbsnp website
test_str = '''(38) 1:169_549_811 = (37) 1:169_519_049
(38) 1:25_257_119 = (37) 1:25_583_610
(38) 6:396_321 = (37) 6:396_321
(38) 16:3_243_257 = (37) 16:3_293_257
(38) 16:3_243_403 = (37) 16:3_293_403
(38) 16:3_243_257 = (37) 16:3_293_257'''
tests = []
for t in test_str.split('\n'):
t = t.split()
xs = [t[i].split(':') for i in [1,4]]
tests.append([(f'chr{x[0]}', int(x[1])) for x in xs])
for test in tests:
lkup = list(lookup(*test[0]))
print(f'{test[0][0]}:{test[0][1]:,} -> {test[1][0]}:{test[1][1]:,}')
assert len(lkup) == 1, lkup
assert lkup[0] == test[1]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment