Skip to content

Instantly share code, notes, and snippets.

@JohnLonginotto
Last active November 12, 2016 10:08
Show Gist options
  • Save JohnLonginotto/25445199b48095a8c93143d14d3e7113 to your computer and use it in GitHub Desktop.
Save JohnLonginotto/25445199b48095a8c93143d14d3e7113 to your computer and use it in GitHub Desktop.
class stat:
def __init__(self,INFO):
self.DESCRIPTION = ['Demo for harmbrugge', 'ENSMUSG00000051951']
self.LINKABLE = True
self.SQL = 'TEXT'
self.dependencies = ['CHR','POS','SEQ']
self.before = '''
import intervaltree
GTF_chromosomes = {}
with open('/Users/John/Downloads/Mus_musculus.GRCm38.86.gtf','rb') as f:
for line in f:
line = line.split('\t')
if len(line) < 9: continue
chr = line[0]
start = int(line[3])
end = int(line[4]) + 1 # My GTF file has 0-length entries and intervaltree doesn't like that. So I +1. I don't know your formatting.
name = line[8].split('"')[1] # This extracts the first field in the 9th column for my GTF, which is the name. Yours might be different..?
if chr not in GTF_chromosomes: GTF_chromosomes[chr] = intervaltree.IntervalTree()
GTF_chromosomes[chr][start:end] = name
'''
self.METHOD = '''
try: GTF = ','.join(set([ x[2] for x in GTF_chromosomes[CHR][POS:POS+len(SEQ)] ]))
except KeyError: GTF = 'Unknown Chromosome'
'''
addStat('GTF',[])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment