Skip to content

Instantly share code, notes, and snippets.

@adamewing
Last active June 11, 2020 14:18
Show Gist options
  • Save adamewing/ffb8fe29fb071ebd507810cd70b62fd2 to your computer and use it in GitHub Desktop.
Save adamewing/ffb8fe29fb071ebd507810cd70b62fd2 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
from __future__ import print_function
import sys
'''
Parses records that look like:
276 28.4 0.0 1.2 chr1 837099 837180 (248119242) + SVA_D Retroposon/SVA 131 211 (1175) 1082
9901 1.8 0.4 25.1 chr1 6304394 6306236 (242650186) C SVA_D Retroposon/SVA (0) 1386 1 7974
6435 8.0 2.4 3.7 chr1 6714508 6716330 (242240092) C SVA_D Retroposon/SVA (1) 1385 1 8739
Expects records in sorted order.
'''
chromosomes = ['chr'+c for c in map(str, list(range(1, 23)) + ['X', 'Y'])]
class Rmsk:
def __init__(self, raw):
raw = raw.strip().split()
self.chrom = raw[4]
self.start = int(raw[5])
self.end = int(raw[6])
self.orient = raw[8]
self.score = [1000.0-float(raw[1])*10]
if self.orient == 'C':
self.orient = '-'
self.fam = raw[9]
def near(self, other):
assert self < other, 'record not in sorted order: %s' % str(other)
return other.start - self.end < 500 and self.chrom == other.chrom and self.orient == other.orient
def mean_score(self):
return int(sum(self.score)/len(self.score))
def __lt__(self, other):
if self.chrom == other.chrom:
return self.start < other.start
return self.chrom < other.chrom
def __str__(self):
return '%s\t%d\t%d\t%s\t%d\t%s' % (self.chrom, self.start, self.end, self.fam, self.mean_score(), self.orient)
if len(sys.argv) == 2:
with open(sys.argv[1], 'r') as raw:
sva = None
for line in raw:
if 'SVA' not in line:
continue
# ignore unplaced & alternate sequence
if line.strip().split()[4] not in chromosomes:
continue
if sva is None:
sva = Rmsk(line)
else:
rec = Rmsk(line)
if sva.near(rec):
sva.end = rec.end
sva.score += rec.score
else:
print(sva)
sva = rec
print(sva)
else:
sys.exit('usage: %s <repeatmasker .out file from UCSC>' % sys.argv[0])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment