Skip to content

Instantly share code, notes, and snippets.

@sug0
Last active December 10, 2022 00:59
Show Gist options
  • Save sug0/1bc632eb19ebdeb2d596c1617fba4138 to your computer and use it in GitHub Desktop.
Save sug0/1bc632eb19ebdeb2d596c1617fba4138 to your computer and use it in GitHub Desktop.
Rust code generator to match against a set of N pre-defined strings, in logarithmic time
use std::collections::{BTreeMap, BTreeSet};
use std::env;
use std::io::{self, BufRead};
#[derive(Debug, Default)]
struct Node {
terminal: bool,
children: BTreeMap<char, Node>,
}
#[derive(Debug, Default, Eq, PartialEq, Ord, PartialOrd)]
struct CompressedNode {
prefix: String,
terminal: bool,
children: BTreeSet<CompressedNode>,
}
#[derive(Clone)]
struct Vertex<T> {
id: usize,
label: T,
}
fn main() {
let mut node = Node::default();
let stdin = io::stdin();
for line in stdin.lock().lines().flat_map(|line| line) {
for word in line.split_whitespace() {
node.insert(word);
}
}
if let Ok(true) = env::var("COMPRESS").map(|s| s == "1") {
if let Ok(true) = env::var("EMIT").map(|s| s == "1") {
node.compress().emit();
} else {
node.compress().dot();
}
} else {
node.dot();
}
}
impl Node {
fn insert<T>(&mut self, s: T)
where
T: AsRef<str>,
{
self.insert_(s.as_ref().chars())
}
fn insert_<I>(&mut self, mut chars: I)
where
I: Iterator<Item = char>,
{
if let Some(ch) = chars.next() {
let node = self.children.entry(ch).or_insert_with(Node::default);
node.insert_(chars);
} else {
self.terminal = true;
}
}
fn dot(&self) {
let mut gen = 0;
println!("digraph {{");
for (ch, child_node) in self.children.iter() {
let id = next_id(&mut gen);
let from = Vertex { id, label: *ch };
println!(
"{} [label=\"{}\",color={}]",
from.id,
from.label,
self.color()
);
child_node.dot_(&mut gen, from);
}
println!("}}");
}
fn dot_(&self, gen: &mut usize, from: Vertex<char>) {
for (to, child_node) in self.children.iter() {
let id = next_id(gen);
let to = Vertex { id, label: *to };
println!(
"{} [label=\"{}\",color={}]",
to.id,
to.label,
child_node.color()
);
println!("{} -> {};", from.id, to.id);
child_node.dot_(gen, to);
}
}
fn color(&self) -> &'static str {
if self.terminal {
"red"
} else {
"gray"
}
}
fn compress(&self) -> CompressedNode {
self.compress_(String::new())
}
fn compress_(&self, mut buf: String) -> CompressedNode {
if self.terminal || self.children.len() > 1 {
let children = self
.children
.iter()
.map(|(ch, child_node)| child_node.compress_((*ch).into()))
.collect();
return CompressedNode {
terminal: self.terminal,
prefix: buf,
children,
};
}
if let Some((ch, child_node)) = self.children.iter().next() {
buf.push(*ch);
child_node.compress_(buf)
} else {
CompressedNode {
terminal: true,
prefix: buf,
children: BTreeSet::new(),
}
}
}
}
impl CompressedNode {
fn dot(&self) {
let mut gen = 0;
println!("digraph {{");
let from = Vertex {
id: next_id(&mut gen),
label: self.prefix.as_str(),
};
println!(
"{} [label=\"{}\",color={}]",
from.id,
from.label,
self.color()
);
for child_node in self.children.iter() {
child_node.dot_(&mut gen, from.clone());
}
println!("}}");
}
fn dot_<'a>(&'a self, gen: &mut usize, from: Vertex<&'a str>) {
let to = Vertex {
id: next_id(gen),
label: self.prefix.as_str(),
};
println!("{} [label=\"{}\",color={}]", to.id, to.label, self.color());
println!("{} -> {};", from.id, to.id);
for child_node in self.children.iter() {
child_node.dot_(gen, to.clone());
}
}
fn color(&self) -> &'static str {
if self.terminal {
"red"
} else {
"gray"
}
}
fn emit(&self) {
println!("fn match_str(s: &str) -> bool {{");
if self.prefix != "" {
println!("if s.starts_with(\"{}\") {{", self.prefix);
println!("let s = &s[{}..];", self.prefix.len());
if self.terminal {
println!("if s.len() == 0 {{");
println!("return true;");
println!("}}");
}
}
for child_node in self.children.iter() {
println!("if s.starts_with(\"{}\") {{", child_node.prefix);
child_node.emit_();
println!("return false;");
println!("}}");
}
if self.prefix != "" {
println!("}}");
}
println!("false");
println!("}}");
}
fn emit_(&self) {
println!("let s = &s[{}..];", self.prefix.len());
if self.terminal {
println!("if s.len() == 0 {{");
println!("return true;");
println!("}}");
}
for child_node in self.children.iter() {
println!("if s.starts_with(\"{}\") {{", child_node.prefix);
child_node.emit_();
println!("return false;");
println!("}}");
}
}
}
fn next_id(g: &mut usize) -> usize {
let id = *g;
*g += 1;
id
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment