This gist is superseded by https://github.com/EliahKagan/SubtreeSearch.
Last active
August 31, 2021 03:04
-
-
Save EliahKagan/61bce6756440f13cd5e1465538c18107 to your computer and use it in GitHub Desktop.
linear-time subtree search
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Linear time subtree search (in Crystal, with GraphViz DOT generation) | |
# | |
# Copyright (c) 2020 Eliah Kagan <degeneracypressure@gmail.com> | |
# | |
# Permission to use, copy, modify, and/or distribute this software for any | |
# purpose with or without fee is hereby granted. | |
# | |
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH | |
# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY | |
# AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, | |
# INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM | |
# LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR | |
# OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR | |
# PERFORMANCE OF THIS SOFTWARE. | |
# To see the visualization, run "crystal subtree-search.cr", and supply the | |
# text written to standard output, which consists of a graph description in DOT | |
# code, to GraphViz. One way to do that is to copy it to your clipboard and | |
# paste it at https://dreampuf.github.io/GraphvizOnline. | |
require "deque" | |
# A mutable node for a binary tree with no parent pointers. | |
class Node(T) | |
property key : T | |
property left : Node(T)? | |
property right : Node(T)? | |
def initialize(@key, @left, @right) | |
end | |
def initialize(@key) | |
@left = @right = nil | |
end | |
end | |
# Compactly expresses an internal node. | |
def tree(key : T, left : Node(T)?, right : Node(T)?) forall T | |
Node(T).new(key, left, right) | |
end | |
# Compactly expresses a leaf node. | |
def tree(key : T) forall T | |
Node(T).new(key) | |
end | |
# Serializes the tree rooted at *root* as DOT code, intepretable by Graphviz. | |
# Inspired by "Visualizing binary trees with Graphviz" by Eli Bendersky: | |
# https://eli.thegreenplace.net/2009/11/23/visualizing-binary-trees-with-graphviz | |
# That approach uses a node's key to identify a vertex. Here, I identify all | |
# vertices by ascending integers and separately label each with its node's key | |
# (for those not representing nil). This permits trees with duplicate keys. | |
def dot(root : Node(T)?, name = "Tree", io = STDOUT, indent = 4) forall T | |
margin = " " * indent | |
io.puts %(digraph "#{name}" {) | |
queue = Deque(NamedTuple(node: Node(T), vertex: Int32)).new | |
order = 0 # The number of vertices encountered so far. | |
emit_vertex = ->(node : Node(T)?) do | |
vertex = order | |
order += 1 | |
if node | |
io.puts %(#{margin}#{vertex} [label="#{node.key}"]) | |
queue.push({node: node, vertex: vertex}) | |
else | |
io.puts %(#{margin}#{vertex} [shape=point]) | |
end | |
vertex | |
end | |
emit_edge = ->(src : Int32, dest : Int32) do | |
io.puts "#{margin}#{src} -> #{dest}" | |
nil | |
end | |
emit_vertex.call(root) | |
until queue.empty? | |
parent = queue.shift | |
emit_edge.call(parent[:vertex], emit_vertex.call(parent[:node].left)) | |
emit_edge.call(parent[:vertex], emit_vertex.call(parent[:node].right)) | |
end | |
io.puts "}" | |
end | |
# Finds all the subtrees in *corpus* that match *pattern*. | |
# Runtime is linear in the sum of the sizes of the two trees. | |
def search(corpus : Node(T)?, pattern : Node(T)) forall T | |
codes = {} of Tuple(T, Int32, Int32) => Int32 # {key, code, code} => code | |
encode = Proc(Node(T)?, Int32).new { raise "Bug: encode not reassigned" } | |
encode = ->(node : Node(T)?) do | |
return 0 unless node | |
triple = {node.key, encode.call(node.left), encode.call(node.right)} | |
codes[triple] ||= codes.size + 1 # +1 because nil => 0 implicitly. | |
end | |
pattern_code = encode.call(pattern) | |
matches = [] of Node(T) | |
search = Proc(Node(T)?, Int32?).new { raise "Bug: search not reassigned" } | |
search = ->(node : Node(T)?) do | |
return 0 unless node | |
left_code = search.call(node.left) | |
right_code = search.call(node.right) | |
return nil unless left_code && right_code | |
code = codes[{node.key, left_code, right_code}]? | |
return nil unless code | |
matches << node if code == pattern_code | |
code | |
end | |
search.call(corpus) | |
matches | |
end | |
corpus = tree("dog", tree("cat", tree("mule"), | |
tree("horse")), | |
tree("snake", tree("lizard", nil, | |
tree("iguana")), | |
tree("fox", tree("cat", tree("mule"), | |
tree("horse")), | |
tree("human")))) | |
pattern = tree("cat", tree("mule"), tree("horse")) | |
matches = search(corpus, pattern) | |
matches.each_with_index do |match, index| | |
dot(match, "Match #{index}") | |
puts | |
end | |
matches[0].right.as(Node(String)).left = tree("donkey") | |
dot(corpus, "corpus (after modification)") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Linear time subtree search (in C#, for LINQPad) | |
// | |
// Copyright (c) 2020 Eliah Kagan <degeneracypressure@gmail.com> | |
// | |
// Permission to use, copy, modify, and/or distribute this software for any | |
// purpose with or without fee is hereby granted. | |
// | |
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY | |
// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION | |
// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN | |
// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |
// Paste this code into LINQPad (https://www.linqpad.net/). | |
/// <summary> | |
/// A mutable node for a binary tree with no parent pointers. | |
/// </summary> | |
internal sealed class Node<T> { | |
public Node(T key, Node<T>? left, Node<T>? right) | |
=> (Key, Left, Right) = (key, left, right); | |
public Node(T key) : this(key, null, null) { } | |
public T Key { get; set; } | |
public Node<T>? Left { get; set; } | |
public Node<T>? Right { get; set; } | |
} | |
internal static class NodeExtensions { | |
/// <summary> | |
/// Finds all subtrees in <c>self</c> that match <c>other</c>. | |
/// </summary> | |
/// <remarks> | |
/// Runtime is linear in the sum of the sizes of the two trees. | |
/// </remarks> | |
internal static List<Node<T>> FindAll<T>(this Node<T>? self, Node<T> other) | |
{ | |
var codes = new Dictionary<(T, int, int), int>(); // (key, code, code) -> code | |
int Encode(Node<T>? node) | |
{ | |
if (node == null) return 0; | |
var triple = (node.Key, Encode(node.Left), Encode(node.Right)); | |
if (!codes.TryGetValue(triple, out var code)) { | |
code = codes.Count + 1; // +1 because null -> 0 implicitly | |
codes.Add(triple, code); | |
} | |
return code; | |
} | |
var patternCode = Encode(other); | |
var matches = new List<Node<T>>(); | |
int? Search(Node<T>? node) | |
{ | |
if (node == null) return 0; | |
var leftCode = Search(node.Left); | |
var rightCode = Search(node.Right); | |
if (leftCode == null || rightCode == null) return null; | |
var triple = (node.Key, (int)leftCode, (int)rightCode); | |
if (!codes.TryGetValue(triple, out var code)) return null; | |
if (code == patternCode) matches.Add(node); | |
return code; | |
} | |
Search(self); | |
return matches; | |
} | |
} | |
internal static class UnitTest { | |
/// <summary> | |
/// Helper function for readably expressing an internal node. | |
/// </summary> | |
private static Node<T> Tree<T>(T key, Node<T>? left, Node<T>? right) | |
=> new Node<T>(key, left, right); | |
/// <summary> | |
/// Helper function for readably expressing a leaf node. | |
/// </summary> | |
private static Node<T> Tree<T>(T key) => new Node<T>(key); | |
private static void Main() | |
{ | |
var tree = Tree("dog", Tree("cat", Tree("mule"), | |
Tree("horse")), | |
Tree("snake", Tree("lizard", null, | |
Tree("iguana")), | |
Tree("fox", Tree("cat", Tree("mule"), | |
Tree("horse")), | |
Tree("human")))); | |
var pattern = Tree("cat", Tree("mule"), Tree("horse")); | |
var matches = tree.FindAll(pattern); | |
matches.Dump(nameof(matches)); | |
matches[0].Right!.Left = Tree("donkey"); | |
tree.Dump($"{nameof(tree)} (after modification)", depth: 10); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment