Skip to content

Instantly share code, notes, and snippets.

@benmkw
Created December 29, 2019 09:33
Show Gist options
  • Save benmkw/4c3d04ec87dd7668e4af972d5ae6971b to your computer and use it in GitHub Desktop.
Save benmkw/4c3d04ec87dd7668e4af972d5ae6971b to your computer and use it in GitHub Desktop.
trying to build a trie at comptime for efficient string matching
// https://gcc.godbolt.org/z/n5eApv
// https://gcc.godbolt.org/z/Lximvt shorter, remove inline -> issue
// https://gcc.godbolt.org/z/dLznz9 wip more inline
// ok https://gcc.godbolt.org/z/3mv-Z6
const std = @import("std");
const warn = std.debug.warn;
const math = std.math;
const VALUE = enum {
AA,
ABCe,
BDPE,
CC,
CD,
X,
ERROR,
};
fn p(v: var) void {
warn("{}\n", .{v});
}
const TrieNode = struct {
// only ascii
childs: [128]?*TrieNode = [_]?*TrieNode{null} ** 128,
value: ?union(enum) {
char: u8,
end: VALUE,
} = null,
};
const TrieNodePool = struct {
nodes: []TrieNode,
curr_index: usize,
max_size: usize,
fn allocate(self: *TrieNodePool) *TrieNode {
std.debug.assert(self.curr_index < self.max_size);
self.curr_index += 1;
return &self.nodes[self.curr_index - 1];
}
};
fn insert_word_into_trie(root: *TrieNode, word: []const u8, trie_node_pool: *TrieNodePool) void {
var curr_node = root;
for (word) |character| {
for (curr_node.childs) |*child| {
if (child.* == null) {
// searched and found no existing node with value
child.* = trie_node_pool.allocate();
// SIGBUS (Misaligned address error) if more comptime enabled
p("until here");
child.*.?.value = .{ .char = character };
p("but not until here");
curr_node = child.*.?;
break;
} else if (child.*.?.value) |some_value| {
switch (some_value) {
.char => |c| {
if (c == character) {
// character/ substring already present
curr_node = child.*.?;
break;
}
},
.end => {
// continue
},
}
} else {
std.debug.assert(child.*.?.value.?.char != character);
}
}
}
curr_node.childs[0] = trie_node_pool.allocate();
curr_node.childs[0].?.value = .{ .end = std.meta.stringToEnum(VALUE, word).? };
}
fn create_trie_from(inputs: [][]const u8, trie_node_pool: *TrieNodePool) TrieNode {
var root = TrieNode{};
for (inputs) |word| {
insert_word_into_trie(&root, word, trie_node_pool);
}
return root;
}
const PrintOffset = struct {
value: usize = 0,
};
fn print_trie(trieNode: TrieNode, offset: PrintOffset) void {
for (trieNode.childs) |child| {
if (child) |some_child| {
var i: usize = 0;
while (i < offset.value) : (i += 1) {
warn("~", .{});
}
if (some_child.value == null) {
warn("{c}\n", .{some_child.value});
break;
}
switch (some_child.value.?) {
.char => |c| warn("{c}\n", .{c}),
.end => |e| warn("END: {}\n", .{e}),
}
print_trie(some_child.*, .{ .value = offset.value + 1 });
} else if (child == null) {
// p("null");
break;
}
}
}
fn match_with(str: []const u8, root: TrieNode) VALUE {
var curr_node = root;
for (str) |char| {
for (root.childs) |child| {
if (child) |some_child| {
switch (some_child.value.?) {
.char => |c| {
if (some_child.value.?.char == char) {
if (str.len != 1) {
// need to match the rest
return match_with(str[1..str.len], some_child.*);
} else {
// done
// warn("{c}\n", .{some_child.value});
return some_child.childs[0].?.value.?.end;
}
}
},
.end => |e| {
return VALUE.ERROR;
},
}
} else {
return VALUE.ERROR;
}
}
}
return VALUE.ERROR; // TODO use !VALUE as return value
}
pub export fn main() void {
comptime var input = [_][]const u8{ "CD", "AA", "BDPE", "ABCe", "CC", "X" };
// comptime var input = [_][]const u8{ "CD", "AA" };
// comptime var input = [_][]const u8{"aCDefag"};
warn("{} with {} elements\n", .{ input, input.len });
const size: usize = 70;
// making this var comptime crashes
comptime var nodes = [_]TrieNode{TrieNode{}} ** size;
std.debug.assert(nodes.len == size);
var trie_node_pool = TrieNodePool{ .nodes = &nodes, .max_size = size, .curr_index = 0 };
std.debug.assert(trie_node_pool.nodes.len == size);
const trie = create_trie_from(&input, &trie_node_pool);
print_trie(trie, .{});
p(match_with("CD", trie));
p(match_with("AA", trie));
p(match_with("BDPE", trie));
p(match_with("ABCe", trie));
p(match_with("CC", trie));
p(match_with("X", trie));
p(match_with("Xe", trie));
p("\nDONE");
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment