Skip to content

Instantly share code, notes, and snippets.

@mtholder
Created June 7, 2011 20:26
Show Gist options
  • Save mtholder/1013074 to your computer and use it in GitHub Desktop.
Save mtholder/1013074 to your computer and use it in GitHub Desktop.
Uses dendropy to calculate a similarity score for the trees in analyses based on split support
import sys
import dendropy
from copy import copy
threshold = float(sys.argv[1])
filename_list = sys.argv[2:]
SCRIPT_NAME = 'conflict_viz'
def debug(*valist):
msg = ' '.join([str(i) for i in valist])
sys.stderr.write(SCRIPT_NAME + ': ' + msg + '\n')
status = debug
warn = status
def die(*valist):
warn(*valist)
sys.exit(1)
dataset = dendropy.DataSet()
dataset.attach_taxon_set()
analysis_list = []
for n, filename in enumerate(filename_list):
debug('reading', filename)
sys.stdout.write(str(n+1) + ' = ' + filename + '\n')
dataset.read_from_path(filename, schema='NEXUS')
# TODO don't use a DataSet, read each tree list separately sez Jeet
assert (n + 1 == len(dataset.tree_lists))
tree_list = dataset.tree_lists[-1]
tree_w_support = tree_list[0]
tree_w_support.update_splits()
split_list = []
for int_nd in tree_w_support.preorder_node_iter(filter_fn=lambda x: x.is_internal()):
if int_nd is not tree_w_support.seed_node:
support = float(int_nd.label)
if support > threshold:
split_list.append((support, int_nd.edge.split_bitmask))
split_list.sort()
analysis_list.append({
'denominator':sum(i[0] for i in split_list),
'split_list':split_list,
'tree':tree_w_support
})
class Stat:
ELIMINATE, DOWNWEIGHTED, COMPLETE_DOWNWEIGHTED = range(3)
mode = Stat.ELIMINATE
full_leaf_mask = dataset.taxon_sets[0].all_taxa_bitmask()
sys.stdout.write('\t'.join([''] + [str(i + 1) for i in range(len(analysis_list))]) + '\n')
for row_n, row in enumerate(analysis_list):
sys.stdout.write(str(row_n + 1))
denominator = row['denominator']
num = len(row['split_list'])
for col_n, col in enumerate(analysis_list):
if col_n == row_n:
sys.stdout.write('\t' + str(denominator) + '/' + str(num))
else:
row_copy = [[i[0], i[1]] for i in row['split_list']]
col_copy = [[i[0], i[1]] for i in col['split_list']]
row_tree = row['tree']
col_tree = col['tree']
union_leaves_mask = row_tree.seed_node.edge.split_bitmask | col_tree.seed_node.edge.split_bitmask
numerator = 0.0
for rs in row_copy:
row_split = rs[1]
for cs in col_copy:
col_split = cs[1]
if not dendropy.treesplit.is_compatible(row_split, col_split, union_leaves_mask):
# splits in conflict. Lower the "score" for the row_split
if mode == Stat.ELIMINATE:
rs[0] = 0.0
break
elif mode == Stat.DOWNWEIGHTED:
rs[0] *= (1 - cs[0])
break
elif mode == Stat.COMPLETE_DOWNWEIGHTED:
rs[0] *= (1 - cs[0])
numerator += rs[0]
sys.stdout.write('\t' + str(numerator/denominator))
sys.stdout.write('\n')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment