Skip to content

Instantly share code, notes, and snippets.

@heliy
Created May 8, 2014 11:56
Show Gist options
  • Save heliy/2a0bd0d2a181c85bf520 to your computer and use it in GitHub Desktop.
Save heliy/2a0bd0d2a181c85bf520 to your computer and use it in GitHub Desktop.
最大简约法进化系统发育树 固定模型
# coding:utf-8
"""
构建最大简约法进化系统发育树
使用 sh:
python sysevatree data > eva-tree.txt
"""
import sys
keys=['00','01','10','11']
"""
判断该位点是否有信息
"""
def is_sig(blas):
elems={}
for key in keys:
elems[key]=0
for i in blas:
elems[i]+=1
count=elems.values()
count.sort()
if 2<=count[2] : #有两个2以上的位点
return True
else:
return False
"""
从文件里得到显著的位点
返回{id:[blas,rs],}
"""
def getsnps(f):
snps={}
for line in open(f,'r').read().replace("|","").splitlines():
cons=line.split("\t")
id=cons[0]
rs=cons[3]
blas=cons[6:]
if is_sig(blas):
snps[id]=[blas,rs]
return snps
"""
字典里最小value对应的key
"""
def min_key(dic):
min_value=min(dic.values())
for key in dic.keys():
if dic[key]==min_value:
return key
"""
树节点类
"""
class Node(object):
#初始化
def __init__(self,data,left,right):
self.data=data
self.left=left
self.right=right
# 方便print
def __str__(self,level=0):
s=("[--"*level)+str(self.data)+"\n"
if self.left!=None:
s=s+self.left.__str__(level+1)
if self.right!=None:
s=s+self.right.__str__(level+1)
return s
# 从子节点推断可能的取值和相应的得分
def simi_data(self):
if(isinstance(self.data,str)):
return {self.data:0}
else:
l_simi=self.left.simi_data()
r_simi=self.right.simi_data()
self.data={}
for key in keys:
if key in l_simi.keys() and key in r_simi.keys() : #共有
self.data[key]=l_simi[key]+r_simi[key]
elif key in l_simi.keys(): #left有right没有
self.data[key]=l_simi[key]+min(r_simi.values())+1
elif key in r_simi.keys():
self.data[key]=r_simi[key]+min(l_simi.values())+1
else:
continue
return self.data
# 从父节点推断自己的取值
def eva_data(self,up):
if(isinstance(self.data,str)):
return
else:
if up.keys()[0] in self.data.keys(): # 与父节点相同
key=up.keys()[0]
else: # 最优节点
key=min_key(self.data)
self.data={key:self.data[key]}
self.left.eva_data(self.data)
self.right.eva_data(self.data)
"""
N0
/ \
/ \
/ N1
/ / \
/ / \
/ N2 \
/ / \ \
N3 / N4 N5
/ \ / / \ / \
L0 L1 L2 L3 L4 L5 L6
"""
"""
发生树类
"""
class Tree(object):
def __init__(self,blas):
self.leaves=[Node(data=ls,left=None,right=None) for ls in blas]
node3=Node(data=None,left=self.leaves[0],right=self.leaves[1])
node4=Node(data=None,left=self.leaves[3],right=self.leaves[4])
node5=Node(data=None,left=self.leaves[5],right=self.leaves[6])
node2=Node(data=None,left=self.leaves[2],right=node4)
node1=Node(data=None,left=node2,right=node5)
node0=Node(data=None,left=node3,right=node1)
self.nodes=[node0,node1,node2,node3,node4,node5]
self.root=node0
def __str__(self):
return self.root.__str__()
def tree_print(self):
format="""
%s
/ \\
/ \\
/ %s
/ / \\
/ / \\
/ %s \\
/ / \\ \\
%s / %s %s
/ \\ / / \\ / \\
%s %s %s %s %s %s %s
"""
strs=tuple([str(node.data.keys()[0]) for node in self.nodes]+[str(leaf.data) for leaf in self.leaves])
print format % strs
# 最优情况时的根节点
def __get_best_root(self):
simi_root=self.root.simi_data()
key=min_key(simi_root)
return {key:simi_root[key]}
# 将最优情况推行至全树
def take_eva(self):
best_root=self.__get_best_root()
self.root.eva_data(best_root)
if __name__=="__main__":
f=sys.argv[1]
snps=getsnps(f)
for id in snps.keys():
[blas,rs]=snps[id]
tree=Tree(blas)
tree.take_eva()
print rs
tree.tree_print()
print "========================="
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment