Skip to content

Instantly share code, notes, and snippets.

@seandavi
Created November 4, 2010 13:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save seandavi/662463 to your computer and use it in GitHub Desktop.
Save seandavi/662463 to your computer and use it in GitHub Desktop.
Find functional location of a variant in a transcript database
#!/usr/bin/env python
from operator import itemgetter
class Transcript:
def __init__(self,chromosome,name,exons,cds,strand):
self.chromosome=chromosome
self.name=name
self.strand=strand
self.exons=exons
self.cds=cds
self.introns=self._computeIntrons()
def __str__(self):
return("Transcript <%s:%s %s:%s>" %
(self.name,self.strand,self.chromosome,str(self.exons)))
def _computeIntrons(self):
self.exons.sort()
intronstarts=[x[0] for x in exons[1:]]
intronends =[x[1] for x in exons[0:-1]]
introns=zip(intronstarts,intronends)
return(introns)
def overlaps(self,pos):
types=[]
for exon in self.exons:
if((pos>exon[0]) & (pos<=exon[1])):
if((pos>self.cds[0]) & (pos<=self.cds[1])):
types.append('coding')
else:
if(pos>self.cds[1]):
if(self.strand=='+'):
types.append("3'-UTR")
else:
types.append("5'-UTR")
if(pos<=self.cds[0]):
if(self.strand=='+'):
types.append("5'-UTR")
else:
types.append("3'-UTR")
maxp=max(self.exons,key=itemgetter(1))[1]
minp=min(self.exons,key=itemgetter(0))[0]
if((pos>minp) & (pos<=maxp)):
for exon in self.exons:
if(((pos>exon[0]-2) & (pos<=exon[0])) |
((pos>exon[1]) & (pos<=exon[1]+2))):
types.append('splice-site')
for intron in self.introns:
if((pos>intron[0]) & (pos<=intron[1])):
if((pos>self.cds[0]) & (pos<=self.cds[1])):
types.append('intron')
return(types)
with open("/Users/sdavis/Downloads/CCDS.hg18 (1).bed",'r') as bedfile:
bedfile.next()
for line in bedfile:
sline = line.strip().split("\t")
exonstarts=map(int,sline[9].split(",")[0:-1])
exonends=map(int,sline[10].split(",")[0:-1])
cds = (int(sline[6]),int(sline[7]))
exons = zip(exonstarts,exonends)
t = Transcript(chromosome=sline[2],name=sline[1],exons=exons,
strand=sline[3],cds=cds)
for i in range(10):
overlap=t.overlaps(51063758)
overlap=t.overlaps(51064000)
if(len(overlap)<>0):
print overlap
print t.introns
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment