Skip to content

Instantly share code, notes, and snippets.

@EliahKagan
Last active August 31, 2021 03:04
Show Gist options
  • Save EliahKagan/61bce6756440f13cd5e1465538c18107 to your computer and use it in GitHub Desktop.
Save EliahKagan/61bce6756440f13cd5e1465538c18107 to your computer and use it in GitHub Desktop.
linear-time subtree search
# Linear time subtree search (in Crystal, with GraphViz DOT generation)
#
# Copyright (c) 2020 Eliah Kagan <degeneracypressure@gmail.com>
#
# Permission to use, copy, modify, and/or distribute this software for any
# purpose with or without fee is hereby granted.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
# AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
# INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
# LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
# OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
# PERFORMANCE OF THIS SOFTWARE.
# To see the visualization, run "crystal subtree-search.cr", and supply the
# text written to standard output, which consists of a graph description in DOT
# code, to GraphViz. One way to do that is to copy it to your clipboard and
# paste it at https://dreampuf.github.io/GraphvizOnline.
require "deque"
# A mutable node for a binary tree with no parent pointers.
class Node(T)
property key : T
property left : Node(T)?
property right : Node(T)?
def initialize(@key, @left, @right)
end
def initialize(@key)
@left = @right = nil
end
end
# Compactly expresses an internal node.
def tree(key : T, left : Node(T)?, right : Node(T)?) forall T
Node(T).new(key, left, right)
end
# Compactly expresses a leaf node.
def tree(key : T) forall T
Node(T).new(key)
end
# Serializes the tree rooted at *root* as DOT code, intepretable by Graphviz.
# Inspired by "Visualizing binary trees with Graphviz" by Eli Bendersky:
# https://eli.thegreenplace.net/2009/11/23/visualizing-binary-trees-with-graphviz
# That approach uses a node's key to identify a vertex. Here, I identify all
# vertices by ascending integers and separately label each with its node's key
# (for those not representing nil). This permits trees with duplicate keys.
def dot(root : Node(T)?, name = "Tree", io = STDOUT, indent = 4) forall T
margin = " " * indent
io.puts %(digraph "#{name}" {)
queue = Deque(NamedTuple(node: Node(T), vertex: Int32)).new
order = 0 # The number of vertices encountered so far.
emit_vertex = ->(node : Node(T)?) do
vertex = order
order += 1
if node
io.puts %(#{margin}#{vertex} [label="#{node.key}"])
queue.push({node: node, vertex: vertex})
else
io.puts %(#{margin}#{vertex} [shape=point])
end
vertex
end
emit_edge = ->(src : Int32, dest : Int32) do
io.puts "#{margin}#{src} -> #{dest}"
nil
end
emit_vertex.call(root)
until queue.empty?
parent = queue.shift
emit_edge.call(parent[:vertex], emit_vertex.call(parent[:node].left))
emit_edge.call(parent[:vertex], emit_vertex.call(parent[:node].right))
end
io.puts "}"
end
# Finds all the subtrees in *corpus* that match *pattern*.
# Runtime is linear in the sum of the sizes of the two trees.
def search(corpus : Node(T)?, pattern : Node(T)) forall T
codes = {} of Tuple(T, Int32, Int32) => Int32 # {key, code, code} => code
encode = Proc(Node(T)?, Int32).new { raise "Bug: encode not reassigned" }
encode = ->(node : Node(T)?) do
return 0 unless node
triple = {node.key, encode.call(node.left), encode.call(node.right)}
codes[triple] ||= codes.size + 1 # +1 because nil => 0 implicitly.
end
pattern_code = encode.call(pattern)
matches = [] of Node(T)
search = Proc(Node(T)?, Int32?).new { raise "Bug: search not reassigned" }
search = ->(node : Node(T)?) do
return 0 unless node
left_code = search.call(node.left)
right_code = search.call(node.right)
return nil unless left_code && right_code
code = codes[{node.key, left_code, right_code}]?
return nil unless code
matches << node if code == pattern_code
code
end
search.call(corpus)
matches
end
corpus = tree("dog", tree("cat", tree("mule"),
tree("horse")),
tree("snake", tree("lizard", nil,
tree("iguana")),
tree("fox", tree("cat", tree("mule"),
tree("horse")),
tree("human"))))
pattern = tree("cat", tree("mule"), tree("horse"))
matches = search(corpus, pattern)
matches.each_with_index do |match, index|
dot(match, "Match #{index}")
puts
end
matches[0].right.as(Node(String)).left = tree("donkey")
dot(corpus, "corpus (after modification)")
// Linear time subtree search (in C#, for LINQPad)
//
// Copyright (c) 2020 Eliah Kagan <degeneracypressure@gmail.com>
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
// Paste this code into LINQPad (https://www.linqpad.net/).
/// <summary>
/// A mutable node for a binary tree with no parent pointers.
/// </summary>
internal sealed class Node<T> {
public Node(T key, Node<T>? left, Node<T>? right)
=> (Key, Left, Right) = (key, left, right);
public Node(T key) : this(key, null, null) { }
public T Key { get; set; }
public Node<T>? Left { get; set; }
public Node<T>? Right { get; set; }
}
internal static class NodeExtensions {
/// <summary>
/// Finds all subtrees in <c>self</c> that match <c>other</c>.
/// </summary>
/// <remarks>
/// Runtime is linear in the sum of the sizes of the two trees.
/// </remarks>
internal static List<Node<T>> FindAll<T>(this Node<T>? self, Node<T> other)
{
var codes = new Dictionary<(T, int, int), int>(); // (key, code, code) -> code
int Encode(Node<T>? node)
{
if (node == null) return 0;
var triple = (node.Key, Encode(node.Left), Encode(node.Right));
if (!codes.TryGetValue(triple, out var code)) {
code = codes.Count + 1; // +1 because null -> 0 implicitly
codes.Add(triple, code);
}
return code;
}
var patternCode = Encode(other);
var matches = new List<Node<T>>();
int? Search(Node<T>? node)
{
if (node == null) return 0;
var leftCode = Search(node.Left);
var rightCode = Search(node.Right);
if (leftCode == null || rightCode == null) return null;
var triple = (node.Key, (int)leftCode, (int)rightCode);
if (!codes.TryGetValue(triple, out var code)) return null;
if (code == patternCode) matches.Add(node);
return code;
}
Search(self);
return matches;
}
}
internal static class UnitTest {
/// <summary>
/// Helper function for readably expressing an internal node.
/// </summary>
private static Node<T> Tree<T>(T key, Node<T>? left, Node<T>? right)
=> new Node<T>(key, left, right);
/// <summary>
/// Helper function for readably expressing a leaf node.
/// </summary>
private static Node<T> Tree<T>(T key) => new Node<T>(key);
private static void Main()
{
var tree = Tree("dog", Tree("cat", Tree("mule"),
Tree("horse")),
Tree("snake", Tree("lizard", null,
Tree("iguana")),
Tree("fox", Tree("cat", Tree("mule"),
Tree("horse")),
Tree("human"))));
var pattern = Tree("cat", Tree("mule"), Tree("horse"));
var matches = tree.FindAll(pattern);
matches.Dump(nameof(matches));
matches[0].Right!.Left = Tree("donkey");
tree.Dump($"{nameof(tree)} (after modification)", depth: 10);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment