Skip to content

Instantly share code, notes, and snippets.

@heliy
Created April 18, 2014 09:46
Show Gist options
  • Save heliy/11034806 to your computer and use it in GitHub Desktop.
Save heliy/11034806 to your computer and use it in GitHub Desktop.
GO的is-a关系树 输入为原始obo文件 结果约56万行
import sys
import re
id="id"
ns="namespace"
ia="is-a"
nps="node-ptrs"
def get_terms_namespaces(go_file):
terms={}
namespaces={}
for info in open(go_file,'r').read().split("[Term]")[1:]:
term={}
lines=info.splitlines()
is_a=[]
for line in lines:
if re.match(r"^id: GO",line):
term[id]=line.split()[1]
if re.match(r"^namespace",line):
term[ns]=line.split()[1]
if re.match(r"^is_a: GO",line):
is_a.append(line.split()[1])
term[ia]=is_a
term[nps]=[]
terms[term[id]]=term
namespaces[term[ns]]=1
return terms,namespaces.keys()
class node(object):
def __init__(self,parent,name):
self.parent=parent
self.name=name
self.childs=[]
def __str__(self,level=0):
s=("[--"*level)+self.name+"\n"
if len(self.childs)>1:
for child in self.childs:
s=s+child.__str__(level+1)
return s
def add_child(self,child_id):
child=node(self,child_id)
self.childs.append(child)
return child
def build_tree(terms,namespaces):
root=node(None,"ROOT")
nss_dic={}
for single_ns in namespaces:
nss_dic[single_ns]=root.add_child(single_ns)
def add_term(term):
# print "getID: "+term[id]+"/len:"+str(len(term[nps]))
if len(term[nps])>0:
# print "IsIn: "+term[id]
return term[nps]
if len(term[ia])<1:
parent=nss_dic[term[ns]]
np=parent.add_child(term[id])
# print "NameAs["+term[ns]+"]: "+term[id]
return [np]
nodes=[]
for parent_id in term[ia]:
parent=terms[parent_id]
parent[nps]=add_term(parent)
for parent_node in parent[nps]:
if term[id] in [child.name for child in parent_node.childs]:
continue
# print "takeBeweeteen: "+parent_id+" <-> "+term[id]
nodes.append(parent_node.add_child(term[id]))
return nodes
for term_id in terms.keys():
term_p=terms[term_id]
term_p[nps]=add_term(term_p)
return root
if __name__=="__main__":
f=sys.argv[1]
terms,nss=get_terms_namespaces(f)
tree=build_tree(terms,nss)
print tree
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment