Created
November 1, 2011 17:15
-
-
Save alexkalderimis/1331226 to your computer and use it in GitHub Desktop.
Find regions with most genes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# From a file containing a set of intervals identified as | |
# regulating gene expression, identify display the 3 regions | |
# with the highest number of genes | |
from intermine.webservice import Service | |
from interminebio import RegionQuery | |
intervals = open("expression_regions.bed").readlines() | |
s = Service("www.flymine.org/query", token = "TOKEN") | |
org = "D. melanogaster" | |
types = ["Gene"] | |
lists = [(i, s.create_list(RegionQuery(s, org, types, [i]))) | |
for i in intervals] | |
top3 = sorted(lists, key=lambda x: x[1].size, reverse = True)[:3] | |
for interval, genes in top3: | |
print "%s has %d genes" % (interval.strip(), genes.size) | |
for gene in genes: | |
loc = gene.chromosomeLocation | |
vals = (gene.symbol if gene.symbol else gene.primaryIdentifier, | |
gene.chromosome.primaryIdentifier, | |
loc.start, loc.end) | |
print "%s: %s:%d..%d" % vals |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment