Last active
March 14, 2020 17:23
-
-
Save dang3r/f2434de0a167ecfb0b90f48be7d6df67 to your computer and use it in GitHub Desktop.
Use regular expressions to find a path between two points in a directed acylic graph (DAG)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
from string import ascii_lowercase | |
from typing import List, Tuple | |
# Example graphs | |
graph_basic = { | |
"a": ("b", "c", "f"), | |
"b": ("h"), | |
"c": ("j"), | |
"f": ("j"), | |
"h": (), | |
"j": ("x", "y"), | |
"x": (), | |
"z": (), | |
"y": ("z") | |
} | |
# A graph containing an edge from every character in the latin alphabet to its successor. | |
# a -> b, b -> c, ..., y->z | |
graph_alpha = {char: (ascii_lowercase[idx+1]) for idx, char in enumerate(ascii_lowercase[:-1])} | |
graph_alpha["z"] = () | |
def tsort(src: str, graph: dict, seen: set, done: list): | |
if src in seen: | |
return | |
seen.add(src) | |
for children in graph.get(src, []): | |
tsort(children, graph, seen, done) | |
done.append(src) | |
def topological_sort(graph: dict) -> list: | |
"""Return a topological ordering of a graph's vertifces""" | |
seen = set() | |
done = list() | |
# In case the graph has multiple partitions | |
for node in graph: | |
tsort(node, graph, seen, done) | |
return done[::-1] | |
def find_path(graph: dict, src: str, dst: str) -> tuple: | |
""" | |
Use regular expressions to find a path between two nodes in a graph. | |
The graph must be a directed acylic graph (DAG). | |
""" | |
if src == dst: | |
return [src] | |
g_str = " ".join([f'{n}{"".join(graph[n])}' for n in topological_sort(graph)]) | |
# Create a separate regular expression for each potential path length | |
# Use named backreferences and pipe all separate expressions together. | |
rs = [] | |
for i in range(1, len(graph)): | |
s = "(?:" + src | |
for j in range(1, i+1): | |
# Named capture groups require a non-digit first char | |
name = "dan%d%d" % (i, j) | |
s += r"\w*(?P<%s>\w).*\s(?P=%s)" % (name, name) | |
s += r"\w*%s)" % (dst) | |
rs.append(s) | |
m = re.findall("|".join(rs), g_str) | |
if m: | |
# Filter out all empty capture groups | |
nodes = [item for item in m[0] if item != ""] | |
return tuple([src, *nodes, dst]) | |
def test_find_path(): | |
assert find_path(graph_basic, "a", "z") in set([tuple("acjyz"), tuple("afjyz")]) | |
assert find_path(graph_basic, "a", "y") in set([tuple("acjy"), tuple("afjy")]) | |
assert find_path(graph_basic, "a", "h") == tuple("abh") | |
assert find_path(graph_alpha, "a", "j") == tuple("abcdefghij") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment