Skip to content

Instantly share code, notes, and snippets.

@dang3r
Last active March 14, 2020 17:23
Show Gist options
  • Save dang3r/f2434de0a167ecfb0b90f48be7d6df67 to your computer and use it in GitHub Desktop.
Save dang3r/f2434de0a167ecfb0b90f48be7d6df67 to your computer and use it in GitHub Desktop.
Use regular expressions to find a path between two points in a directed acylic graph (DAG)
import re
from string import ascii_lowercase
from typing import List, Tuple
# Example graphs
graph_basic = {
"a": ("b", "c", "f"),
"b": ("h"),
"c": ("j"),
"f": ("j"),
"h": (),
"j": ("x", "y"),
"x": (),
"z": (),
"y": ("z")
}
# A graph containing an edge from every character in the latin alphabet to its successor.
# a -> b, b -> c, ..., y->z
graph_alpha = {char: (ascii_lowercase[idx+1]) for idx, char in enumerate(ascii_lowercase[:-1])}
graph_alpha["z"] = ()
def tsort(src: str, graph: dict, seen: set, done: list):
if src in seen:
return
seen.add(src)
for children in graph.get(src, []):
tsort(children, graph, seen, done)
done.append(src)
def topological_sort(graph: dict) -> list:
"""Return a topological ordering of a graph's vertifces"""
seen = set()
done = list()
# In case the graph has multiple partitions
for node in graph:
tsort(node, graph, seen, done)
return done[::-1]
def find_path(graph: dict, src: str, dst: str) -> tuple:
"""
Use regular expressions to find a path between two nodes in a graph.
The graph must be a directed acylic graph (DAG).
"""
if src == dst:
return [src]
g_str = " ".join([f'{n}{"".join(graph[n])}' for n in topological_sort(graph)])
# Create a separate regular expression for each potential path length
# Use named backreferences and pipe all separate expressions together.
rs = []
for i in range(1, len(graph)):
s = "(?:" + src
for j in range(1, i+1):
# Named capture groups require a non-digit first char
name = "dan%d%d" % (i, j)
s += r"\w*(?P<%s>\w).*\s(?P=%s)" % (name, name)
s += r"\w*%s)" % (dst)
rs.append(s)
m = re.findall("|".join(rs), g_str)
if m:
# Filter out all empty capture groups
nodes = [item for item in m[0] if item != ""]
return tuple([src, *nodes, dst])
def test_find_path():
assert find_path(graph_basic, "a", "z") in set([tuple("acjyz"), tuple("afjyz")])
assert find_path(graph_basic, "a", "y") in set([tuple("acjy"), tuple("afjy")])
assert find_path(graph_basic, "a", "h") == tuple("abh")
assert find_path(graph_alpha, "a", "j") == tuple("abcdefghij")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment