Last active
December 10, 2022 00:59
-
-
Save sug0/1bc632eb19ebdeb2d596c1617fba4138 to your computer and use it in GitHub Desktop.
Rust code generator to match against a set of N pre-defined strings, in logarithmic time
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std::collections::{BTreeMap, BTreeSet}; | |
use std::env; | |
use std::io::{self, BufRead}; | |
#[derive(Debug, Default)] | |
struct Node { | |
terminal: bool, | |
children: BTreeMap<char, Node>, | |
} | |
#[derive(Debug, Default, Eq, PartialEq, Ord, PartialOrd)] | |
struct CompressedNode { | |
prefix: String, | |
terminal: bool, | |
children: BTreeSet<CompressedNode>, | |
} | |
#[derive(Clone)] | |
struct Vertex<T> { | |
id: usize, | |
label: T, | |
} | |
fn main() { | |
let mut node = Node::default(); | |
let stdin = io::stdin(); | |
for line in stdin.lock().lines().flat_map(|line| line) { | |
for word in line.split_whitespace() { | |
node.insert(word); | |
} | |
} | |
if let Ok(true) = env::var("COMPRESS").map(|s| s == "1") { | |
if let Ok(true) = env::var("EMIT").map(|s| s == "1") { | |
node.compress().emit(); | |
} else { | |
node.compress().dot(); | |
} | |
} else { | |
node.dot(); | |
} | |
} | |
impl Node { | |
fn insert<T>(&mut self, s: T) | |
where | |
T: AsRef<str>, | |
{ | |
self.insert_(s.as_ref().chars()) | |
} | |
fn insert_<I>(&mut self, mut chars: I) | |
where | |
I: Iterator<Item = char>, | |
{ | |
if let Some(ch) = chars.next() { | |
let node = self.children.entry(ch).or_insert_with(Node::default); | |
node.insert_(chars); | |
} else { | |
self.terminal = true; | |
} | |
} | |
fn dot(&self) { | |
let mut gen = 0; | |
println!("digraph {{"); | |
for (ch, child_node) in self.children.iter() { | |
let id = next_id(&mut gen); | |
let from = Vertex { id, label: *ch }; | |
println!( | |
"{} [label=\"{}\",color={}]", | |
from.id, | |
from.label, | |
self.color() | |
); | |
child_node.dot_(&mut gen, from); | |
} | |
println!("}}"); | |
} | |
fn dot_(&self, gen: &mut usize, from: Vertex<char>) { | |
for (to, child_node) in self.children.iter() { | |
let id = next_id(gen); | |
let to = Vertex { id, label: *to }; | |
println!( | |
"{} [label=\"{}\",color={}]", | |
to.id, | |
to.label, | |
child_node.color() | |
); | |
println!("{} -> {};", from.id, to.id); | |
child_node.dot_(gen, to); | |
} | |
} | |
fn color(&self) -> &'static str { | |
if self.terminal { | |
"red" | |
} else { | |
"gray" | |
} | |
} | |
fn compress(&self) -> CompressedNode { | |
self.compress_(String::new()) | |
} | |
fn compress_(&self, mut buf: String) -> CompressedNode { | |
if self.terminal || self.children.len() > 1 { | |
let children = self | |
.children | |
.iter() | |
.map(|(ch, child_node)| child_node.compress_((*ch).into())) | |
.collect(); | |
return CompressedNode { | |
terminal: self.terminal, | |
prefix: buf, | |
children, | |
}; | |
} | |
if let Some((ch, child_node)) = self.children.iter().next() { | |
buf.push(*ch); | |
child_node.compress_(buf) | |
} else { | |
CompressedNode { | |
terminal: true, | |
prefix: buf, | |
children: BTreeSet::new(), | |
} | |
} | |
} | |
} | |
impl CompressedNode { | |
fn dot(&self) { | |
let mut gen = 0; | |
println!("digraph {{"); | |
let from = Vertex { | |
id: next_id(&mut gen), | |
label: self.prefix.as_str(), | |
}; | |
println!( | |
"{} [label=\"{}\",color={}]", | |
from.id, | |
from.label, | |
self.color() | |
); | |
for child_node in self.children.iter() { | |
child_node.dot_(&mut gen, from.clone()); | |
} | |
println!("}}"); | |
} | |
fn dot_<'a>(&'a self, gen: &mut usize, from: Vertex<&'a str>) { | |
let to = Vertex { | |
id: next_id(gen), | |
label: self.prefix.as_str(), | |
}; | |
println!("{} [label=\"{}\",color={}]", to.id, to.label, self.color()); | |
println!("{} -> {};", from.id, to.id); | |
for child_node in self.children.iter() { | |
child_node.dot_(gen, to.clone()); | |
} | |
} | |
fn color(&self) -> &'static str { | |
if self.terminal { | |
"red" | |
} else { | |
"gray" | |
} | |
} | |
fn emit(&self) { | |
println!("fn match_str(s: &str) -> bool {{"); | |
if self.prefix != "" { | |
println!("if s.starts_with(\"{}\") {{", self.prefix); | |
println!("let s = &s[{}..];", self.prefix.len()); | |
if self.terminal { | |
println!("if s.len() == 0 {{"); | |
println!("return true;"); | |
println!("}}"); | |
} | |
} | |
for child_node in self.children.iter() { | |
println!("if s.starts_with(\"{}\") {{", child_node.prefix); | |
child_node.emit_(); | |
println!("return false;"); | |
println!("}}"); | |
} | |
if self.prefix != "" { | |
println!("}}"); | |
} | |
println!("false"); | |
println!("}}"); | |
} | |
fn emit_(&self) { | |
println!("let s = &s[{}..];", self.prefix.len()); | |
if self.terminal { | |
println!("if s.len() == 0 {{"); | |
println!("return true;"); | |
println!("}}"); | |
} | |
for child_node in self.children.iter() { | |
println!("if s.starts_with(\"{}\") {{", child_node.prefix); | |
child_node.emit_(); | |
println!("return false;"); | |
println!("}}"); | |
} | |
} | |
} | |
fn next_id(g: &mut usize) -> usize { | |
let id = *g; | |
*g += 1; | |
id | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment