Skip to content

Instantly share code, notes, and snippets.

@krassowski
Created April 4, 2017 13:55
Show Gist options
  • Save krassowski/af52a96365b491bea66256ae44a05636 to your computer and use it in GitHub Desktop.
Save krassowski/af52a96365b491bea66256ae44a05636 to your computer and use it in GitHub Desktop.
BCD implementation
"""
A partial, lazy implementation of BCD (http://genomebiology.biomedcentral.com/articles/10.1186/gb-2007-8-12-r271)
BiBS, Systems Biology workshops 2017
"""
from math import sqrt
from networkx import Graph, number_of_nodes, connected_component_subgraphs
from networkx.algorithms import edge_betweenness_centrality
def test_graph():
g = Graph()
for x in range(6):
g.add_node(x + 1)
g.add_edge(1, 2)
g.add_edge(3, 2)
g.add_edge(3, 1)
g.add_edge(3, 4)
g.add_edge(5, 4)
g.add_edge(6, 4)
return g
def networkx_from_sif(path, mode='r', delimeter=None):
"""Return NetworkX created from given SIF file"""
g = Graph()
with open(path, mode) as f:
for line in f:
try:
node_from, relation, *nodes_to = line.strip().split(delimeter)
g.add_node(node_from)
g.add_nodes_from(nodes_to)
for node_to in nodes_to:
g.add_edge(node_from, node_to, relation=relation)
except ValueError:
node_from = line.strip()
g.add_node(node_from)
return g
class Node:
def __init__(self, v=None, parent=None):
self.value = v
self.children = []
self.parent = parent
self.is_cluster = False
@property
def is_special_parent(self):
return not self.value and any([c.value for c in self.children])
def maximal_commonality(self):
pass
class BCD:
def __init__(self, graph):
self.graph = graph
self.N = number_of_nodes(graph)
self.commonalities = {
edge: self.commonality(edge)
for edge in self.graph.edges()
}
def commonality(self, edge):
a = edge[0]
b = edge[1]
n = self.graph.degree(a)
m = self.graph.degree(b)
k = (
set(self.graph.neighbors(a))
.intersection(set(self.graph.neighbors(b)))
.difference({a, b})
)
k = n + m / self.N
return k + 1 / sqrt(n * m)
def calc(self):
if number_of_nodes(self.graph) == 1:
return Node(self.graph, parent=self.graph)
else:
parent = Node(None, parent=self.graph)
self.remove()
sub_graphs = connected_component_subgraphs(self.graph)
for graph in sub_graphs:
bcd = BCD(graph)
subtree = bcd.calc()
parent.children.append(subtree)
return parent
def remove(self):
edges_betweenness = edge_betweenness_centrality(self.graph)
fractions = {
edge: edges_betweenness[edge] / self.commonalities[edge]
for edge in self.graph.edges()
}
to_be_removed = max(
fractions.keys(),
key=(lambda key: fractions[key])
)
self.graph.remove_edge(*to_be_removed)
def mark_clusters(node):
if node.is_special_parent:
node.is_cluster = True
else:
for child in node.children:
mark_clusters(child)
def merge_clusters(node):
# TO BE DONE
return node
def filter_out_false_positives(graph):
# TO BE DONE
return graph
def cluster_with_bcd(graph):
graph = filter_out_false_positives(graph)
bcd = BCD(graph)
tree = bcd.calc() # tree is the root
mark_clusters(tree)
return tree
# graph = read_gexf('yeast.gexf', relabel=True)
# graph = networkx_from_sif('yeast.gexf')
graph = test_graph()
clustered = cluster_with_bcd(graph)
# from IPython import embed
# embed()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment