Created
August 11, 2012 21:48
-
-
Save avalind/3327494 to your computer and use it in GitHub Desktop.
Simple Debruijn graph
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# For simplicity, not efficiency. | |
class Graph(object): | |
def __init__(self): | |
self.nodes = {} | |
def add_node(self, n): | |
if n not in self.nodes: | |
self.nodes[n] = [] | |
return True | |
def add_edge(self, first, sec): | |
if first not in self.nodes: | |
self.nodes[first] = [sec] | |
if sec not in self.nodes: | |
self.nodes[sec] = [] | |
else: | |
self.nodes[first].append(sec) | |
def __str__(self): | |
return str(self.nodes) | |
def __repr__(self): | |
return self.__str__() | |
def all_outgoing_paths(symbol, alfabet): | |
""" | |
Generate a set of all outgoing edges from the symbol specified | |
given a symbol and a alfabet. | |
""" | |
for char in alfabet: | |
yield symbol[1:]+char | |
def generate_debruijn_graph(seqs, alfabet=['A','T','C','G']): | |
""" | |
Generate a de bruijn graph from the sequences | |
in the seqs array. each item in the seq array | |
must contain only characters specified in alfabet. | |
assume that all items in seq have the same length. | |
""" | |
g = Graph() | |
for seq in seqs: | |
g.add_node(seq) | |
for outgoing in all_outgoing_paths(seq, alfabet): | |
if outgoing in seqs: | |
g.add_edge(seq, outgoing) | |
return g | |
def main(): | |
symbols = ['11', '01', '00', '10'] | |
print generate_debruijn_graph(symbols, alfabet=['1', '0']) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment