Last active
June 17, 2024 12:13
-
-
Save pczarn/c51dd53b5e194467a79ff5592856d14c to your computer and use it in GitHub Desktop.
mini-earley
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/target | |
callgrind.out.* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This file is automatically @generated by Cargo. | |
# It is not intended for manual editing. | |
version = 3 | |
[[package]] | |
name = "tiny-earley" | |
version = "0.1.0" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[package] | |
name = "tiny-earley" | |
version = "0.1.0" | |
edition = "2021" | |
[lib] | |
path = "lib.rs" | |
[benches] | |
path = "bench_parser.rs" | |
[dependencies] | |
[profile.bench] | |
opt-level = 3 # Use slightly better optimizations. | |
[features] | |
default = ["debug", "rel_compact_forest"] | |
debug = [] | |
simple = [] | |
extra_inline = [] | |
vec2d = [] | |
simplest_forest = [] | |
simple_forest = [] | |
compact_forest = [] | |
linked_forest = [] | |
forest_size = [] | |
rel_compact_forest = [] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Forest | |
use std::{collections::LinkedList, iter}; | |
use super::*; | |
#[derive(Clone)] | |
pub struct Forest { | |
graph: Vec<u8>, | |
eval: Vec<Option<usize>>, | |
} | |
#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd)] | |
pub struct NodeHandle(usize); | |
#[derive(Clone)] | |
enum Node { | |
Product { | |
action: u32, | |
left_factor: NodeHandle, | |
right_factor: Option<NodeHandle>, | |
}, | |
Leaf { | |
terminal: Symbol, | |
values: u32, | |
}, | |
} | |
static NODE_SIZES: &'static [(u8, u8, u8, bool)] = &[ | |
(8, 8, 0, false), | |
(16, 16, 0, false), | |
(8, 8, 0, true), | |
(8, 8, 8, true), | |
(8, 16, 8, true), | |
(16, 16, 8, true), | |
(8, 16, 16, true), | |
(8, 16, 0, true), | |
(8, 24, 24, true), | |
(16, 24, 24, true), | |
(16, 32, 32, true), | |
(16, 32, 0, true), | |
(32, 64, 64, true), | |
(32, 64, 0, true), | |
(32, 64, 16, true), | |
]; | |
const NULL_ACTION: u32 = 0; | |
impl Forest { | |
fn push_node(&mut self, node: Node) { | |
let size = |mut x: u64| { | |
let mut result = 0; | |
while x != 0 { | |
x = x >> 8; | |
result += 8; | |
} | |
result | |
}; | |
let tup = match node { | |
Node::Product { action, left_factor, right_factor } => { | |
(action as u32, left_factor.0 as u64, right_factor.map_or(0, |f| f.0 as u64), true) | |
} | |
Node::Leaf { terminal, values } => { | |
(terminal.0 as u32, values as u64, 0, false) | |
} | |
}; | |
let s = (size(tup.0 as u64), size(tup.1), size(tup.2)); | |
let (idx, size) = NODE_SIZES.iter().enumerate().find(|(_i, sizes)| s.0 <= sizes.0 && s.1 <= sizes.1 && s.2 <= sizes.2 && tup.3 == sizes.3).unwrap(); | |
// if idx.is_none() { | |
// panic!("wrong size for {:?} {:?}", tup, size(tup.1)); | |
// } | |
// let idx = idx.unwrap(); | |
// let size = NODE_SIZES[idx]; | |
let mut result = [0u8; 24]; | |
result[0 .. size.0 as usize / 8].copy_from_slice(&u32::to_le_bytes(tup.0)[0 .. size.0 as usize / 8]); | |
result[size.0 as usize / 8 .. size.0 as usize / 8 + size.1 as usize / 8].copy_from_slice(&u64::to_le_bytes(tup.1)[0 .. size.1 as usize / 8]); | |
result[size.0 as usize / 8 + size.1 as usize / 8 .. size.0 as usize / 8 + size.1 as usize / 8 + size.2 as usize / 8].copy_from_slice(&u64::to_le_bytes(tup.2)[0 .. size.2 as usize / 8]); | |
self.graph.push(idx as u8); | |
self.graph.extend(result[0 .. size.0 as usize / 8 + size.1 as usize / 8 + size.2 as usize / 8].into_iter().cloned()); | |
} | |
} | |
impl Forest { | |
pub fn memory_use(&self) -> usize { | |
self.graph.len() | |
} | |
pub fn new<const S: usize>(grammar: &Grammar<S>) -> Self { | |
Self { | |
graph: vec![0], | |
eval: grammar.rules.iter().map(|rule| rule.id).collect(), | |
} | |
} | |
pub fn leaf(&mut self, terminal: Symbol, _x: usize, values: u32) -> NodeHandle { | |
let handle = NodeHandle(self.graph.len()); | |
self.push_node(Node::Leaf { terminal, values }); | |
handle | |
} | |
pub fn push_summand(&mut self, item: CompletedItem) -> NodeHandle { | |
let handle = NodeHandle(self.graph.len()); | |
let eval = self.eval[item.dot].map(|id| id as u32); | |
self.push_node(Node::Product { | |
action: eval.unwrap_or(NULL_ACTION), | |
left_factor: item.left_node, | |
right_factor: item.right_node, | |
}); | |
handle | |
} | |
pub fn evaluator<T: Eval>(&mut self, eval: T) -> Evaluator<T> { | |
Evaluator { | |
forest: self, | |
eval, | |
} | |
} | |
fn get(&self, handle: NodeHandle) -> Node { | |
let slice = &self.graph[handle.0 ..]; | |
let size = NODE_SIZES[slice[0] as usize]; | |
let all = &slice[1 .. size.0 as usize / 8 + size.1 as usize / 8 + size.2 as usize / 8 + 1]; | |
let (first, second) = all.split_at(size.0 as usize / 8); | |
let (second, third) = second.split_at(size.1 as usize / 8); | |
let mut a = [0; 4]; | |
a[0 .. first.len()].copy_from_slice(first); | |
let mut b = [0; 8]; | |
b[0 .. second.len()].copy_from_slice(second); | |
let mut c = [0; 8]; | |
c[0 .. third.len()].copy_from_slice(third); | |
if size.3 { | |
Node::Product { | |
action: u32::from_le_bytes(a), | |
left_factor: NodeHandle(u64::from_le_bytes(b) as usize), | |
right_factor: if size.2 == 0 { None } else { Some(NodeHandle(u64::from_le_bytes(c) as usize)) }, | |
} | |
} else { | |
Node::Leaf { | |
terminal: Symbol(u32::from_le_bytes(a)), | |
values: u32::from_le_bytes([b[0], b[1], b[2], b[3]]), | |
} | |
} | |
} | |
} | |
pub trait Eval { | |
type Elem; | |
fn leaf(&mut self, terminal: Symbol, values: u32) -> Self::Elem; | |
fn product(&mut self, action: u32, list: &mut LinkedList<Self::Elem>) -> Self::Elem; | |
} | |
pub struct Evaluator<'a, T> { | |
eval: T, | |
forest: &'a mut Forest, | |
} | |
impl<'a, T: Eval> Evaluator<'a, T> { | |
pub fn evaluate(&mut self, finished_node: NodeHandle) -> T::Elem { | |
self.evaluate_rec(finished_node).into_iter().next().unwrap() | |
} | |
fn evaluate_rec(&mut self, handle: NodeHandle) -> LinkedList<T::Elem> { | |
match self.forest.get(handle) { | |
Node::Product { | |
left_factor, | |
right_factor, | |
action, | |
} => { | |
let mut evald = self.evaluate_rec(left_factor); | |
if let Some(factor) = right_factor { | |
evald.append(&mut self.evaluate_rec(factor)); | |
} | |
if action != NULL_ACTION { | |
let mut list = LinkedList::new(); | |
list.push_back(self.eval.product(action as u32, &mut evald)); | |
list | |
} else { | |
evald | |
} | |
} | |
Node::Leaf { terminal, values } => { | |
let mut list = LinkedList::new(); | |
list.push_back(self.eval.leaf(terminal, values)); | |
list | |
} | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#![feature(stmt_expr_attributes)] | |
#![feature(test)] | |
extern crate test; | |
#[cfg_attr(all(not(feature = "simple_forest"), not(feature = "compact_forest"), not(feature = "linked_forest"), not(feature = "rel_compact_forest")), path = "simplest_forest.rs")] | |
#[cfg_attr(feature = "simple_forest", path = "simple_forest.rs")] | |
#[cfg_attr(feature = "compact_forest", path = "compact_forest.rs")] | |
#[cfg_attr(feature = "linked_forest", path = "linked_forest.rs")] | |
#[cfg_attr(feature = "rel_compact_forest", path = "rel_compact_forest.rs")] | |
mod forest; | |
use test::Bencher; | |
use std::{collections::BinaryHeap, ops::Index}; | |
#[cfg(feature = "debug")] | |
use std::collections::{BTreeMap, BTreeSet}; | |
use self::forest::*; | |
#[derive(Clone, Copy, Debug, Default, Eq, PartialEq, Ord, PartialOrd)] | |
pub struct Symbol(u32); | |
#[derive(Clone)] | |
pub struct Grammar<const S: usize> { | |
rules: Vec<Rule>, | |
start_symbol: Symbol, | |
symbol_names: [&'static str; S], | |
gen_symbols: u32, | |
} | |
#[derive(Clone, Copy, Debug)] | |
struct Rule { | |
lhs: Symbol, | |
rhs0: Symbol, | |
rhs1: Option<Symbol>, | |
id: Option<usize>, | |
} | |
#[derive(Clone)] | |
struct Tables<const S: usize> { | |
prediction_matrix: [[bool; S]; S], | |
start_symbol: Symbol, | |
rules: Vec<Rule>, | |
rules_by_rhs0: Vec<Rule>, | |
completions: Vec<Vec<PredictionTransition>>, | |
symbol_names: [&'static str; S], | |
#[cfg(feature = "simple")] | |
gen_completions: Vec<PredictionTransition>, | |
} | |
#[derive(Copy, Clone, Debug, Default)] | |
struct PredictionTransition { | |
symbol: Symbol, | |
top: Symbol, | |
dot: usize, | |
is_unary: bool, | |
} | |
// Recognizer | |
#[derive(Clone)] | |
pub struct Recognizer<const S: usize> { | |
tables: Tables<S>, | |
earley_chart: EarleyChart<S>, | |
complete: BinaryHeap<CompletedItem>, | |
pub forest: Forest, | |
pub finished_node: Option<NodeHandle>, | |
} | |
#[cfg(feature = "vec2d")] | |
#[derive(Clone)] | |
struct EarleyChart<const S: usize> { | |
predicted: Vec<[bool; S]>, | |
indices: Vec<usize>, | |
items: Vec<Item>, | |
} | |
#[cfg(not(feature = "vec2d"))] | |
#[derive(Clone)] | |
struct EarleyChart<const S: usize> { | |
sets: Vec<EarleySet<S>>, | |
} | |
#[cfg(feature = "vec2d")] | |
impl<const S: usize> EarleyChart<S> { | |
fn next_set(&mut self, predicted: Option<[bool; S]>) { | |
self.predicted.push(predicted.unwrap_or([false; S])); | |
self.indices.push(self.items.len()); | |
} | |
fn new() -> Self { | |
EarleyChart { | |
predicted: vec![], | |
indices: vec![], | |
items: vec![], | |
} | |
} | |
fn len(&self) -> usize { | |
self.indices.len() | |
} | |
fn predicted(&self, index: usize) -> &[bool] { | |
&self.predicted[index][..] | |
} | |
fn medial(&mut self) -> &mut Vec<Item> { | |
&mut self.items | |
} | |
fn next_medial(&self) -> &[Item] { | |
&self.items[self.indices[self.indices.len() - 1] .. ] | |
} | |
fn last(&self) -> EarleySetRef { | |
let items = &self.items[self.indices[self.indices.len() - 1] ..]; | |
let predicted = &self.predicted.last().unwrap()[..]; | |
EarleySetRef { | |
items, | |
predicted, | |
} | |
} | |
fn next_to_last(&self) -> EarleySetRef { | |
let items = &self.items[self.indices[self.indices.len() - 2] .. self.indices[self.indices.len() - 1]]; | |
let predicted = &self.predicted[self.predicted.len() - 2][..]; | |
EarleySetRef { | |
items, | |
predicted, | |
} | |
} | |
fn last_mut(&mut self) -> EarleySetMut { | |
EarleySetMut { | |
items: &mut self.items[self.indices[self.indices.len() - 1] ..], | |
predicted: &mut self.predicted.last_mut().unwrap()[..] | |
} | |
} | |
} | |
#[cfg(not(feature = "vec2d"))] | |
impl<const S: usize> EarleyChart<S> { | |
fn next_set(&mut self, predicted: Option<[bool; S]>) { | |
self.sets.push(EarleySet { predicted: predicted.unwrap_or([false; S]), medial: vec![] }); | |
} | |
fn new() -> Self { | |
EarleyChart { | |
sets: vec![], | |
} | |
} | |
fn len(&self) -> usize { | |
self.sets.len() | |
} | |
fn push(&mut self, set: EarleySet<S>) { | |
self.sets.push(set); | |
} | |
fn predicted(&self, index: usize) -> &[bool] { | |
&self.sets[index].predicted[..] | |
} | |
fn last(&self) -> EarleySetRef { | |
let last = self.sets.last().unwrap(); | |
EarleySetRef { | |
items: &last.medial[..], | |
predicted: &last.predicted[..], | |
} | |
} | |
fn next_to_last(&self) -> EarleySetRef { | |
let last = &self.sets[self.sets.len() - 2]; | |
EarleySetRef { | |
items: &last.medial[..], | |
predicted: &last.predicted[..], | |
} | |
} | |
fn medial(&mut self) -> &mut Vec<Item> { | |
&mut self.sets.last_mut().unwrap().medial | |
} | |
fn next_medial(&self) -> &[Item] { | |
&self.sets.last().unwrap().medial[..] | |
} | |
fn last_mut(&mut self) -> EarleySetMut { | |
let last = self.sets.last_mut().unwrap(); | |
EarleySetMut { | |
items: &mut last.medial[..], | |
predicted: &mut last.predicted[..] | |
} | |
} | |
} | |
struct EarleySetMut<'a> { | |
items: &'a mut [Item], | |
predicted: &'a mut [bool], | |
} | |
struct EarleySetRef<'a> { | |
items: &'a [Item], | |
predicted: &'a [bool], | |
} | |
#[cfg(feature = "vec2d")] | |
impl<const S: usize> Index<usize> for EarleyChart<S> { | |
type Output = [Item]; | |
fn index(&self, index: usize) -> &Self::Output { | |
&self.items[self.indices[index] .. self.indices[index + 1]] | |
} | |
} | |
#[cfg(not(feature = "vec2d"))] | |
impl<const S: usize> Index<usize> for EarleyChart<S> { | |
type Output = [Item]; | |
fn index(&self, index: usize) -> &Self::Output { | |
&self.sets[index].medial[..] | |
} | |
} | |
#[derive(Clone)] | |
struct EarleySet<const S: usize> { | |
predicted: [bool; S], | |
medial: Vec<Item>, | |
} | |
#[derive(Ord, PartialOrd, Eq, PartialEq, Clone)] | |
struct Item { | |
postdot: Symbol, | |
dot: usize, | |
origin: usize, | |
node: NodeHandle, | |
} | |
#[derive(Clone, Copy, Ord, PartialOrd, Eq, PartialEq)] | |
struct CompletedItem { | |
origin: usize, | |
dot: usize, | |
left_node: NodeHandle, | |
right_node: Option<NodeHandle>, | |
} | |
trait UnionWith { | |
fn union_with(&mut self, other: &[bool]); | |
} | |
impl<const S: usize> UnionWith for [bool; S] { | |
fn union_with(&mut self, other: &[bool]) { | |
for (dst, &src) in self.iter_mut().zip(other.iter()) { | |
*dst |= src; | |
} | |
} | |
} | |
impl<'a> UnionWith for &'a mut [bool] { | |
fn union_with(&mut self, other: &[bool]) { | |
for (dst, &src) in self.iter_mut().zip(other.iter()) { | |
*dst |= src; | |
} | |
} | |
} | |
impl Symbol { | |
fn usize(self) -> usize { | |
self.0 as usize | |
} | |
} | |
impl<const S: usize> EarleySet<S> { | |
fn new() -> Self { | |
EarleySet { | |
predicted: [false; S], | |
medial: vec![], | |
} | |
} | |
} | |
impl<const S: usize> Grammar<S> { | |
pub fn new(symbol_names: [&'static str; S], start_symbol: usize) -> Self { | |
Self { | |
rules: vec![], | |
start_symbol: Symbol(start_symbol as u32), | |
symbol_names, | |
gen_symbols: 0, | |
} | |
} | |
pub fn symbols(&self) -> [Symbol; S] { | |
let mut result = [Symbol(0); S]; | |
for (i, elem) in result.iter_mut().enumerate() { | |
*elem = Symbol(i as u32); | |
} | |
result | |
} | |
pub fn rule<const N: usize>(&mut self, lhs: Symbol, rhs: [Symbol; N], id: usize) { | |
let mut cur_rhs0 = rhs[0]; | |
for i in 1..N - 1 { | |
let gensym = Symbol(self.gen_symbols + S as u32); | |
self.gen_symbols += 1; | |
self.rules.push(Rule { | |
lhs: gensym, | |
rhs0: cur_rhs0, | |
rhs1: Some(rhs[i]), | |
id: None, | |
}); | |
cur_rhs0 = gensym; | |
} | |
self.rules.push(Rule { | |
lhs, | |
rhs0: cur_rhs0, | |
rhs1: if N == 1 { None } else { Some(rhs[N - 1]) }, | |
id: Some(id), | |
}); | |
} | |
fn sort_rules(&mut self) { | |
self.rules.sort_by(|a, b| a.lhs.cmp(&b.lhs)); | |
} | |
} | |
// Implementation for the recognizer. | |
// | |
// The recognizer has a chart of earley sets (Vec<EarleySet>) as well as the last set (next_set). | |
// | |
// A typical loop that utilizes the recognizer: | |
// | |
// - for character in string { | |
// 1. recognizer.begin_earleme(); | |
// 2. recognizer.scan(token_to_symbol(character), values()); | |
// 2a. complete | |
// 3. recognizer.end_earleme(); | |
// 3a. self.complete_all_sums_entirely(); | |
// 3b. self.sort_medial_items(); | |
// 3c. self.prediction_pass(); | |
// - } | |
// | |
impl<const S: usize> Recognizer<S> { | |
pub fn new(grammar: &Grammar<S>) -> Self { | |
let mut result = Self { | |
tables: Tables::new(grammar), | |
earley_chart: EarleyChart::new(), | |
forest: Forest::new(grammar), | |
// complete: BinaryHeap::new_by_key(Box::new(|completed_item| (completed_item.origin, completed_item.dot))), | |
complete: BinaryHeap::with_capacity(64), | |
finished_node: None, | |
}; | |
result.initialize(); | |
result | |
} | |
fn initialize(&mut self) { | |
self.earley_chart.next_set(Some(self.tables.prediction_matrix[self.tables.start_symbol.usize()])); | |
self.earley_chart.next_set(None); | |
} | |
pub fn scan(&mut self, terminal: Symbol, values: u32) { | |
let earleme = self.earley_chart.len() - 2; | |
let node = self.forest.leaf(terminal, earleme + 1, values); | |
self.complete(earleme, terminal, node); | |
} | |
pub fn end_earleme(&mut self) -> bool { | |
if self.is_exhausted() { | |
false | |
} else { | |
// Completion pass, which saves successful parses. | |
self.finished_node = None; | |
self.complete_all_sums_entirely(); | |
// Do the rest. | |
self.sort_medial_items(); | |
self.prediction_pass(); | |
#[cfg(not(feature = "vec2d"))] | |
self.earley_chart.sets | |
.push(EarleySet::new()); | |
#[cfg(feature = "vec2d")] | |
self.earley_chart.next_set(None); | |
true | |
} | |
} | |
#[cfg_attr(feature = "extra_inline", inline)] | |
fn is_exhausted(&self) -> bool { | |
self.earley_chart.next_medial().len() == 0 && self.complete.is_empty() | |
} | |
fn complete_all_sums_entirely(&mut self) { | |
while let Some(&ei) = self.complete.peek() { | |
let lhs_sym = self.tables.get_lhs(ei.dot); | |
let mut result_node = None; | |
while let Some(&ei2) = self.complete.peek() { | |
if ei.origin == ei2.origin && lhs_sym == self.tables.get_lhs(ei2.dot) { | |
result_node = Some(self.forest.push_summand(ei2)); | |
self.complete.pop(); | |
} else { | |
break; | |
} | |
} | |
if ei.origin == 0 && lhs_sym == self.tables.start_symbol { | |
self.finished_node = Some(result_node.unwrap()); | |
} | |
self.complete(ei.origin, lhs_sym, result_node.unwrap()); | |
} | |
} | |
/// Sorts medial items with deduplication. | |
fn sort_medial_items(&mut self) { | |
// Build index by postdot | |
// These medial positions themselves are sorted by postdot symbol. | |
self.earley_chart.last_mut().items.sort_unstable(); | |
} | |
#[cfg_attr(feature = "extra_inline", inline)] | |
fn prediction_pass(&mut self) { | |
// Iterate through medial items in the current set. | |
let mut last = self.earley_chart.last_mut(); | |
let iter = last.items.iter(); | |
// For each medial item in the current set, predict its postdot symbol. | |
for ei in iter { | |
if let Some(postdot) = self | |
.tables | |
.get_rhs1(ei.dot) | |
.filter(|postdot| !last.predicted[postdot.usize()]) | |
{ | |
// Prediction happens here. We would prefer to call `self.predict`, but we can't, | |
// because `self.medial` is borrowed by `iter`. | |
let source = &self.tables.prediction_matrix[postdot.usize()][..]; | |
last.predicted.union_with(source); | |
} | |
} | |
} | |
fn complete(&mut self, earleme: usize, symbol: Symbol, node: NodeHandle) { | |
if symbol.usize() >= S { | |
self.complete_binary_predictions(earleme, symbol, node); | |
} else if self.earley_chart.predicted(earleme)[symbol.usize()] { | |
self.complete_medial_items(earleme, symbol, node); | |
self.complete_predictions(earleme, symbol, node); | |
} | |
} | |
#[cfg_attr(feature = "extra_inline", inline)] | |
fn complete_medial_items(&mut self, earleme: usize, symbol: Symbol, right_node: NodeHandle) { | |
let inner_start = { | |
// we use binary search to narrow down the range of items. | |
let set_idx = self.earley_chart[earleme] | |
.binary_search_by(|ei| (self.tables.get_rhs1(ei.dot), 1).cmp(&(Some(symbol), 0))); | |
match set_idx { | |
Ok(idx) | Err(idx) => idx, | |
} | |
}; | |
let rhs1_eq = |ei: &&Item| self.tables.get_rhs1(ei.dot) == Some(symbol); | |
for item in self.earley_chart[earleme][inner_start..] | |
.iter() | |
.take_while(rhs1_eq) | |
{ | |
self.complete.push(CompletedItem { | |
dot: item.dot, | |
origin: item.origin, | |
left_node: item.node, | |
right_node: Some(right_node), | |
}); | |
} | |
} | |
#[cfg_attr(feature = "extra_inline", inline)] | |
fn complete_predictions(&mut self, earleme: usize, symbol: Symbol, node: NodeHandle) { | |
// println!("{:?}", slice); | |
for trans in &self.tables.completions[symbol.usize()] { | |
if self.earley_chart.predicted(earleme)[trans.top.usize()] { | |
if trans.is_unary { | |
self.complete.push(CompletedItem { | |
origin: earleme, | |
dot: trans.dot, | |
left_node: node, | |
right_node: None, | |
}); | |
} else { | |
self.earley_chart.medial().push(Item { | |
origin: earleme, | |
dot: trans.dot, | |
node: node, | |
postdot: self.tables.get_rhs1(trans.dot).unwrap(), | |
}); | |
} | |
} | |
} | |
} | |
#[cfg(feature = "simple")] | |
#[cfg_attr(feature = "extra_inline", inline)] | |
fn complete_binary_predictions(&mut self, earleme: usize, symbol: Symbol, node: NodeHandle) { | |
// println!("{:?}", slice); | |
let trans = self.tables.gen_completions[symbol.usize() - S]; | |
if self.earley_chart.predicted(earleme)[trans.top.usize()] { | |
self.earley_chart.medial().push(Item { | |
origin: earleme, | |
dot: trans.dot, | |
node: node, | |
postdot: self.tables.get_rhs1(trans.dot).unwrap(), | |
}); | |
} | |
} | |
#[cfg(not(feature = "simple"))] | |
#[cfg_attr(feature = "extra_inline", inline)] | |
fn complete_binary_predictions(&mut self, earleme: usize, symbol: Symbol, node: NodeHandle) { | |
// println!("{:?}", slice); | |
for trans in &self.tables.completions[symbol.usize()] { | |
if self.earley_chart.predicted(earleme)[trans.top.usize()] { | |
let postdot = self.tables.get_rhs1(trans.dot).unwrap(); | |
self.earley_chart.medial().push(Item { | |
origin: earleme, | |
dot: trans.dot, | |
node: node, | |
postdot, | |
}); | |
} | |
} | |
} | |
#[cfg(feature = "debug")] | |
fn log_last_earley_set(&self) { | |
let dots = self.dots_for_log(self.earley_chart.last()); | |
for (rule_id, dots) in dots { | |
print!( | |
"{} ::= ", | |
self.tables.symbol_names[self.tables.get_lhs(rule_id).usize()] | |
); | |
if let Some(origins) = dots.get(&0) { | |
print!("{:?}", origins); | |
} | |
print!( | |
" {} ", | |
self.tables.symbol_names[self.tables.rules[rule_id].rhs0.usize()] | |
); | |
if let Some(origins) = dots.get(&1) { | |
print!("{:?}", origins); | |
} | |
if let Some(rhs1) = self.tables.get_rhs1(rule_id) { | |
print!(" {} ", self.tables.symbol_names[rhs1.usize()]); | |
} | |
println!(); | |
} | |
println!(); | |
} | |
#[cfg(feature = "debug")] | |
fn log_earley_set_diff(&self) { | |
use std::collections::{BTreeMap, BTreeSet}; | |
let dots_last_by_id = self.dots_for_log(self.earley_chart.next_to_last()); | |
let mut dots_next_by_id = self.dots_for_log(self.earley_chart.last()); | |
let mut rule_ids: BTreeSet<usize> = BTreeSet::new(); | |
rule_ids.extend(dots_last_by_id.keys()); | |
rule_ids.extend(dots_next_by_id.keys()); | |
for item in self.complete.iter() { | |
let position = if self.tables.get_rhs1(item.dot).is_some() { | |
2 | |
} else { | |
1 | |
}; | |
dots_next_by_id | |
.entry(item.dot) | |
.or_insert(BTreeMap::new()) | |
.entry(position) | |
.or_insert(BTreeSet::new()) | |
.insert(item.origin); | |
} | |
let mut empty_diff = true; | |
for rule_id in rule_ids { | |
let dots_last = dots_last_by_id.get(&rule_id); | |
let dots_next = dots_next_by_id.get(&rule_id); | |
if dots_last == dots_next { | |
continue; | |
} | |
empty_diff = false; | |
print!( | |
"from {} ::= ", | |
self.tables.symbol_names[self.tables.get_top_lhs(rule_id).usize()] | |
); | |
if let Some(origins) = dots_last.and_then(|d| d.get(&0)) { | |
print!("{:?}", origins); | |
} | |
print!( | |
" {} ", | |
self.tables.symbol_names[self.tables.get_top_rhs0(rule_id).usize()] | |
); | |
if let Some(origins) = dots_last.and_then(|d| d.get(&1)) { | |
print!("{:?}", origins); | |
} | |
if let Some(rhs1) = self.tables.get_rhs1(rule_id) { | |
print!(" {} ", self.tables.symbol_names[rhs1.usize()]); | |
} | |
println!(); | |
print!( | |
"to {} ::= ", | |
self.tables.symbol_names[self.tables.get_top_lhs(rule_id).usize()] | |
); | |
if let Some(origins) = dots_next.and_then(|d| d.get(&0)) { | |
print!("{:?}", origins); | |
} | |
print!( | |
" {} ", | |
self.tables.symbol_names[self.tables.get_top_rhs0(rule_id).usize()] | |
); | |
if let Some(origins) = dots_next.and_then(|d| d.get(&1)) { | |
print!("{:?}", origins); | |
} | |
if let Some(rhs1) = self.tables.get_rhs1(rule_id) { | |
print!(" {} ", self.tables.symbol_names[rhs1.usize()]); | |
} | |
if let Some(origins) = dots_next.and_then(|d| d.get(&2)) { | |
print!("{:?}", origins); | |
} | |
println!(); | |
} | |
if empty_diff { | |
println!("no diff"); | |
println!(); | |
} else { | |
println!(); | |
} | |
} | |
#[cfg(feature = "debug")] | |
fn dots_for_log(&self, es: EarleySetRef) -> BTreeMap<usize, BTreeMap<usize, BTreeSet<usize>>> { | |
let mut dots = BTreeMap::new(); | |
for (i, rule) in self.tables.rules.iter().enumerate() { | |
if es.predicted[self.tables.get_top_lhs(i).usize()] { | |
dots.entry(i) | |
.or_insert(BTreeMap::new()) | |
.entry(0) | |
.or_insert(BTreeSet::new()) | |
.insert(self.earley_chart.len() - 1); | |
} | |
} | |
for item in es.items { | |
dots.entry(item.dot) | |
.or_insert(BTreeMap::new()) | |
.entry(1) | |
.or_insert(BTreeSet::new()) | |
.insert(item.origin); | |
} | |
dots | |
} | |
} | |
impl<const S: usize> Tables<S> { | |
fn new(grammar: &Grammar<S>) -> Self { | |
let mut result = Self { | |
prediction_matrix: [[false; S]; S], | |
start_symbol: grammar.start_symbol, | |
rules: vec![], | |
rules_by_rhs0: vec![], | |
completions: vec![], | |
symbol_names: grammar.symbol_names, | |
#[cfg(feature = "simple")] | |
gen_completions: vec![Default::default(); grammar.gen_symbols as usize], | |
}; | |
result.populate(grammar); | |
result | |
} | |
fn populate(&mut self, grammar: &Grammar<S>) { | |
self.populate_rules(grammar); | |
self.populate_prediction_matrix(grammar); | |
self.populate_completions(grammar); | |
} | |
fn populate_prediction_matrix(&mut self, grammar: &Grammar<S>) { | |
for rule in &grammar.rules { | |
if rule.rhs0.usize() < S { | |
let mut top = rule.lhs; | |
while top.usize() >= S { | |
// appears on only one rhs0 | |
let idx = self.rules_by_rhs0 | |
.binary_search_by_key(&top, |elem| elem.rhs0) | |
.expect("lhs not found"); | |
top = self.rules_by_rhs0[idx].lhs; | |
} | |
self.prediction_matrix[top.usize()][rule.rhs0.usize()] = true; | |
} | |
} | |
self.reflexive_closure(); | |
self.transitive_closure(); | |
} | |
fn reflexive_closure(&mut self) { | |
for i in 0..S { | |
self.prediction_matrix[i][i] = true; | |
} | |
} | |
fn transitive_closure(&mut self) { | |
for pos in 0..S { | |
let (rows0, rows1) = self.prediction_matrix.split_at_mut(pos); | |
let (rows1, rows2) = rows1.split_at_mut(1); | |
for dst_row in rows0.iter_mut().chain(rows2.iter_mut()) { | |
if dst_row[pos] { | |
dst_row.union_with(&rows1[0]); | |
} | |
} | |
} | |
} | |
fn populate_rules(&mut self, grammar: &Grammar<S>) { | |
self.rules = grammar.rules.clone(); | |
self.rules_by_rhs0 = self.rules.clone(); | |
self.rules_by_rhs0.sort_by_key(|rule| rule.rhs0); | |
} | |
fn populate_completions(&mut self, grammar: &Grammar<S>) { | |
self.completions.resize(S + grammar.gen_symbols as usize, vec![]); | |
for (i, rule) in grammar.rules.iter().enumerate() { | |
let rhs0 = rule.rhs0.usize(); | |
let mut top = rule.lhs; | |
while top.usize() >= S { | |
// appears on only one rhs0 | |
let idx = self.rules_by_rhs0 | |
.binary_search_by_key(&top, |elem| elem.rhs0) | |
.expect("lhs not found"); | |
top = self.rules_by_rhs0[idx].lhs; | |
} | |
let transition = PredictionTransition { | |
symbol: rule.lhs, | |
top, | |
dot: i, | |
is_unary: rule.rhs1.is_none(), | |
}; | |
if rhs0 >= S && cfg!(feature = "simple") { | |
#[cfg(feature = "simple")] { | |
self.gen_completions[rhs0 - S] = transition; | |
} | |
} else { | |
self.completions[rhs0].push(transition); | |
} | |
} | |
} | |
fn get_rhs1(&self, n: usize) -> Option<Symbol> { | |
self.rules.get(n).and_then(|rule| rule.rhs1) | |
} | |
fn get_lhs(&self, n: usize) -> Symbol { | |
self.rules[n].lhs | |
} | |
#[cfg(feature = "debug")] | |
fn get_top_lhs(&self, dot: usize) -> Symbol { | |
let mut top = self.rules[dot].lhs; | |
while top.usize() >= S { | |
// appears on only one rhs0 | |
let idx = self.rules_by_rhs0 | |
.binary_search_by_key(&top, |elem| elem.rhs0) | |
.expect("lhs not found"); | |
top = self.rules_by_rhs0[idx].lhs; | |
} | |
top | |
} | |
#[cfg(feature = "debug")] | |
fn get_top_rhs0(&self, dot: usize) -> Symbol { | |
let mut top = self.rules[dot].rhs0; | |
while top.usize() >= S { | |
// appears on only one rhs0 | |
let idx = self.rules_by_rhs0 | |
.binary_search_by_key(&top, |elem| elem.rhs0) | |
.expect("lhs not found"); | |
top = self.rules_by_rhs0[idx].lhs; | |
} | |
top | |
} | |
} | |
#[derive(Clone, Debug)] | |
pub enum Value { | |
Digits(String), | |
Float(f64), | |
None, | |
} | |
#[derive(Clone)] | |
pub struct CalcRecognizer { | |
grammar: Grammar<13>, | |
recognizer: Recognizer<13>, | |
} | |
pub fn calc_recognizer() -> CalcRecognizer { | |
let mut grammar = Grammar::new( | |
[ | |
"sum", "factor", "op_mul", "op_div", "lparen", "rparen", "expr_sym", "op_minus", | |
"op_plus", "number", "whole", "digit", "dot", | |
], | |
0, | |
); | |
let [sum, factor, op_mul, op_div, lparen, rparen, expr_sym, op_minus, op_plus, number, whole, digit, dot] = | |
grammar.symbols(); | |
// sum ::= sum [+-] factor | |
// sum ::= factor | |
// factor ::= factor [*/] expr | |
// factor ::= expr | |
// expr ::= '(' sum ')' | '-' expr | number | |
// number ::= whole | whole '.' whole | |
// whole ::= whole [0-9] | [0-9] | |
grammar.rule(sum, [sum, op_plus, factor], 0); | |
grammar.rule(sum, [sum, op_minus, factor], 1); | |
grammar.rule(sum, [factor], 2); | |
grammar.rule(factor, [factor, op_mul, expr_sym], 3); | |
grammar.rule(factor, [factor, op_div, expr_sym], 4); | |
grammar.rule(factor, [expr_sym], 5); | |
grammar.rule(expr_sym, [lparen, sum, rparen], 6); | |
grammar.rule(expr_sym, [op_minus, expr_sym], 7); | |
grammar.rule(expr_sym, [number], 8); | |
grammar.rule(number, [whole], 9); | |
grammar.rule(number, [whole, dot, whole], 10); | |
grammar.rule(whole, [whole, digit], 11); | |
grammar.rule(whole, [digit], 12); | |
grammar.sort_rules(); | |
let recognizer = Recognizer::new(&grammar); | |
CalcRecognizer { | |
recognizer, | |
grammar, | |
} | |
} | |
#[cfg(any(feature = "linked_forest", feature = "compact_forest", feature = "rel_compact_forest"))] | |
struct E { | |
symbols: [Symbol; 13], | |
} | |
#[cfg(any(feature = "linked_forest", feature = "compact_forest", feature = "rel_compact_forest"))] | |
impl self::forest::Eval for E { | |
type Elem = Value; | |
fn leaf(&mut self, terminal: Symbol, values: u32) -> Self::Elem { | |
let [sum, factor, op_mul, op_div, lparen, rparen, _expr_sym, op_minus, op_plus, _number, _whole, digit, dot] = | |
self.symbols; | |
if terminal == digit { | |
Value::Digits((values as u8 as char).to_string()) | |
} else { | |
Value::None | |
} | |
} | |
fn product(&mut self, action: u32, args: &mut std::collections::LinkedList<Self::Elem>) -> Self::Elem { | |
let [sum, factor, op_mul, op_div, lparen, rparen, _expr_sym, op_minus, op_plus, _number, _whole, digit, dot] = | |
self.symbols; | |
match ( | |
action, | |
args.pop_front().unwrap_or(Value::None), | |
args.pop_front().unwrap_or(Value::None), | |
args.pop_front().unwrap_or(Value::None), | |
) { | |
(0, Value::Float(left), _, Value::Float(right)) => Value::Float(left + right), | |
(1, Value::Float(left), _, Value::Float(right)) => Value::Float(left - right), | |
(2, val, Value::None, Value::None) => val, | |
(3, Value::Float(left), _, Value::Float(right)) => Value::Float(left * right), | |
(4, Value::Float(left), _, Value::Float(right)) => Value::Float(left / right), | |
(5, val, Value::None, Value::None) => val, | |
(6, _, val, _) => val, | |
(7, _, Value::Float(num), Value::None) => Value::Float(-num), | |
(8, Value::Digits(digits), Value::None, Value::None) => { | |
Value::Float(digits.parse::<f64>().unwrap()) | |
} | |
(9, val @ Value::Digits(..), _, _) => val, | |
(10, Value::Digits(before_dot), _, Value::Digits(after_dot)) => { | |
let mut digits = before_dot; | |
digits.push('.'); | |
digits.push_str(&after_dot[..]); | |
Value::Digits(digits) | |
} | |
(11, Value::Digits(mut num), Value::Digits(digit), _) => { | |
num.push_str(&digit[..]); | |
Value::Digits(num) | |
} | |
(12, val @ Value::Digits(..), _, _) => val, | |
args => panic!("unknown rule id {:?} or args {:?}", action, args), | |
} | |
} | |
} | |
impl CalcRecognizer { | |
pub fn parse(&mut self, expr: &str) -> f64 { | |
let [sum, factor, op_mul, op_div, lparen, rparen, _expr_sym, op_minus, op_plus, _number, _whole, digit, dot] = | |
self.grammar.symbols(); | |
let symbols = self.grammar.symbols(); | |
for (i, ch) in expr.chars().enumerate() { | |
let terminal = match ch { | |
'-' => op_minus, | |
'.' => dot, | |
'0'..='9' => digit, | |
'(' => lparen, | |
')' => rparen, | |
'*' => op_mul, | |
'/' => op_div, | |
'+' => op_plus, | |
' ' => continue, | |
other => panic!("invalid character {}", other), | |
}; | |
self.recognizer.scan(terminal, ch as u32); | |
let success = self.recognizer.end_earleme(); | |
#[cfg(feature = "debug")] | |
if !success { | |
self.recognizer.log_earley_set_diff(); | |
} | |
assert!(success, "parse failed at character {}", i); | |
} | |
let finished_node = self.recognizer.finished_node.expect("parse failed"); | |
#[cfg(any(feature = "compact_forest", feature = "linked_forest", feature = "rel_compact_forest"))] | |
let result = self.recognizer.forest.evaluator(E { symbols }).evaluate(finished_node); | |
#[cfg(feature = "simple_forest")] | |
let mut evaluator = Evaluator::new( | |
#[cfg(all(not(feature = "simple_forest"), not(feature = "compact_forest"), not(feature = "linked_forest")))] | |
|rule_id, result: &[Value], args: &[usize]| { | |
// println!("{:?}", (rule_id, result, args)); | |
match ( | |
rule_id, | |
args.get(0).cloned().and_then(|a| result.get(a)).cloned().unwrap_or(Value::None), | |
args.get(1).cloned().and_then(|a| result.get(a)).cloned().unwrap_or(Value::None), | |
args.get(2).cloned().and_then(|a| result.get(a)).cloned().unwrap_or(Value::None), | |
) { | |
(0, Value::Float(left), _, Value::Float(right)) => Value::Float(left + right), | |
(1, Value::Float(left), _, Value::Float(right)) => Value::Float(left - right), | |
(2, val, Value::None, Value::None) => val, | |
(3, Value::Float(left), _, Value::Float(right)) => Value::Float(left * right), | |
(4, Value::Float(left), _, Value::Float(right)) => Value::Float(left / right), | |
(5, val, Value::None, Value::None) => val, | |
(6, _, val, _) => val, | |
(7, _, Value::Float(num), Value::None) => Value::Float(-num), | |
(8, Value::Digits(digits), Value::None, Value::None) => { | |
Value::Float(digits.parse::<f64>().unwrap()) | |
} | |
(9, val @ Value::Digits(..), _, _) => val, | |
(10, Value::Digits(before_dot), _, Value::Digits(after_dot)) => { | |
let mut digits = before_dot; | |
digits.push('.'); | |
digits.push_str(&after_dot[..]); | |
Value::Digits(digits) | |
} | |
(11, Value::Digits(mut num), Value::Digits(digit), _) => { | |
num.push_str(&digit[..]); | |
Value::Digits(num) | |
} | |
(12, val @ Value::Digits(..), _, _) => val, | |
args => panic!("unknown rule id {:?} or args {:?}", rule_id, args), | |
}}, | |
#[cfg(feature = "simple_forest")] | |
|rule_id, args: &[Value]| { | |
// println!("{:?}", (rule_id, result, args)); | |
match ( | |
rule_id, | |
args.get(0).cloned().unwrap_or(Value::None), | |
args.get(1).cloned().unwrap_or(Value::None), | |
args.get(2).cloned().unwrap_or(Value::None), | |
) { | |
(0, Value::Float(left), _, Value::Float(right)) => Value::Float(left + right), | |
(1, Value::Float(left), _, Value::Float(right)) => Value::Float(left - right), | |
(2, val, Value::None, Value::None) => val, | |
(3, Value::Float(left), _, Value::Float(right)) => Value::Float(left * right), | |
(4, Value::Float(left), _, Value::Float(right)) => Value::Float(left / right), | |
(5, val, Value::None, Value::None) => val, | |
(6, _, val, _) => val, | |
(7, _, Value::Float(num), Value::None) => Value::Float(-num), | |
(8, Value::Digits(digits), Value::None, Value::None) => { | |
Value::Float(digits.parse::<f64>().unwrap()) | |
} | |
(9, val @ Value::Digits(..), _, _) => val, | |
(10, Value::Digits(before_dot), _, Value::Digits(after_dot)) => { | |
let mut digits = before_dot; | |
digits.push('.'); | |
digits.push_str(&after_dot[..]); | |
Value::Digits(digits) | |
} | |
(11, Value::Digits(mut num), Value::Digits(digit), _) => { | |
num.push_str(&digit[..]); | |
Value::Digits(num) | |
} | |
(12, val @ Value::Digits(..), _, _) => val, | |
args => panic!("unknown rule id {:?} or args {:?}", rule_id, args), | |
}}, | |
#[cfg(feature = "simple_forest")] | |
|terminal, values| { | |
if terminal == digit { | |
Value::Digits((values as u8 as char).to_string()) | |
} else { | |
Value::None | |
} | |
} | |
); | |
#[cfg(feature = "forest_size")] | |
panic!("{}", self.recognizer.forest.memory_use()); | |
#[cfg(feature = "simple_forest")] | |
let result = evaluator.evaluate(&mut self.recognizer.forest, finished_node); | |
if let Value::Float(num) = result { | |
num | |
} else { | |
panic!("evaluation failed") | |
} | |
} | |
} | |
pub fn calc(expr: &str) -> f64 { | |
let mut recognizer = calc_recognizer(); | |
recognizer.parse(expr) | |
} | |
#[test] | |
fn test_parse() { | |
assert_eq!(calc("1.0 + 2.0"), 3.0); | |
} | |
#[bench] | |
fn bench_parser(bench: &mut Bencher) { | |
let recognizer = calc_recognizer(); | |
bench.bytes = 9; | |
bench.iter(|| { | |
let mut parser = recognizer.clone(); | |
parser.parse("1.0 + 2.0") | |
}); | |
} | |
#[bench] | |
fn bench_parser2(bench: &mut Bencher) { | |
let recognizer = calc_recognizer(); | |
bench.bytes = 76; | |
bench.iter(|| { | |
let mut parser = recognizer.clone(); | |
parser.parse("1.0 + 2.0 * 3.0 + 1.0 + 2.0 * 3.0 + 1.0 + 2.0 * 3.0 / 1.0 + 2.0 * 3.01234234") | |
}); | |
} | |
#[bench] | |
fn bench_parser3(bench: &mut Bencher) { | |
let recognizer = calc_recognizer(); | |
bench.bytes = 68 + 92 + 74; | |
bench.iter(|| { | |
let mut parser = recognizer.clone(); | |
parser.parse("1.0 + (2.0 * 3.0 + (1.0 + 2.0 * 3.0) + 1.0) + 2.0 * 3.0 / 1.0 + 2.0 \ | |
* 3.01234234 + (2.0 * 3.0 + (1.0 + 2.0 * 3.0) + 1.0) + 2.0 * 3.0 / 1.0 + 2.0 * 3.01234234 + \ | |
(2.0 * 3.0 + (1.0 + 2.0 * 3.0) + 1.0) + 2.0 * 3.0 / 1.0 + 2.0 * 3.01234234") | |
}); | |
} | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Forest | |
use std::collections::LinkedList; | |
use super::*; | |
#[derive(Clone)] | |
pub struct Forest { | |
graph: Vec<Node>, | |
eval: Vec<Option<usize>>, | |
} | |
#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd)] | |
pub struct NodeHandle(usize); | |
#[derive(Clone)] | |
enum Node { | |
Product { | |
action: u32, | |
left_factor: NodeHandle, | |
right_factor: Option<NodeHandle>, | |
}, | |
Leaf { | |
terminal: Symbol, | |
values: u32, | |
}, | |
} | |
const NULL_ACTION: u32 = !0; | |
impl Forest { | |
pub fn memory_use(&self) -> usize { | |
self.graph.len() * std::mem::size_of::<Node>() | |
} | |
pub fn new<const S: usize>(grammar: &Grammar<S>) -> Self { | |
Self { | |
graph: vec![], | |
eval: grammar.rules.iter().map(|rule| rule.id).collect(), | |
} | |
} | |
pub fn leaf(&mut self, terminal: Symbol, _x: usize, values: u32) -> NodeHandle { | |
let handle = NodeHandle(self.graph.len()); | |
self.graph.push(Node::Leaf { terminal, values }); | |
handle | |
} | |
pub fn push_summand(&mut self, item: CompletedItem) -> NodeHandle { | |
let handle = NodeHandle(self.graph.len()); | |
let eval = self.eval[item.dot].map(|id| id as u32); | |
self.graph.push(Node::Product { | |
action: eval.unwrap_or(NULL_ACTION), | |
left_factor: item.left_node, | |
right_factor: item.right_node, | |
}); | |
handle | |
} | |
pub fn evaluator<T: Eval>(&mut self, eval: T) -> Evaluator<T> { | |
Evaluator { | |
forest: self, | |
eval, | |
} | |
} | |
} | |
pub trait Eval { | |
type Elem; | |
fn leaf(&mut self, terminal: Symbol, values: u32) -> Self::Elem; | |
fn product(&mut self, action: u32, list: &mut LinkedList<Self::Elem>) -> Self::Elem; | |
} | |
pub struct Evaluator<'a, T> { | |
eval: T, | |
forest: &'a mut Forest, | |
} | |
impl<'a, T: Eval> Evaluator<'a, T> { | |
pub fn evaluate(&mut self, finished_node: NodeHandle) -> T::Elem { | |
self.evaluate_rec(finished_node).into_iter().next().unwrap() | |
} | |
fn evaluate_rec(&mut self, handle: NodeHandle) -> LinkedList<T::Elem> { | |
match self.forest.graph[handle.0] { | |
Node::Product { | |
left_factor, | |
right_factor, | |
action, | |
} => { | |
let mut evald = self.evaluate_rec(left_factor); | |
if let Some(factor) = right_factor { | |
evald.append(&mut self.evaluate_rec(factor)); | |
} | |
if action != NULL_ACTION { | |
let mut list = LinkedList::new(); | |
list.push_back(self.eval.product(action as u32, &mut evald)); | |
list | |
} else { | |
evald | |
} | |
} | |
Node::Leaf { terminal, values } => { | |
let mut list = LinkedList::new(); | |
list.push_back(self.eval.leaf(terminal, values)); | |
list | |
} | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Forest | |
use std::{collections::LinkedList, iter}; | |
use super::*; | |
#[derive(Clone)] | |
pub struct Forest { | |
graph: Vec<u8>, | |
eval: Vec<Option<usize>>, | |
} | |
#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd)] | |
pub struct NodeHandle(usize, usize); | |
impl NodeHandle { | |
fn get(self) -> usize { | |
self.1 - self.0 | |
} | |
} | |
#[derive(Clone)] | |
enum Node { | |
Product { | |
action: u32, | |
left_factor: NodeHandle, | |
right_factor: Option<NodeHandle>, | |
}, | |
Leaf { | |
terminal: Symbol, | |
values: u32, | |
}, | |
} | |
static NODE_SIZES: &'static [(u8, u8, u8, bool)] = &[ | |
(8, 8, 0, false), | |
(16, 16, 0, false), | |
(8, 8, 0, true), | |
(8, 8, 8, true), | |
(8, 16, 8, true), | |
(16, 16, 8, true), | |
(8, 16, 16, true), | |
(8, 16, 0, true), | |
(8, 24, 24, true), | |
(16, 24, 24, true), | |
(16, 32, 32, true), | |
(16, 32, 0, true), | |
(32, 64, 64, true), | |
(32, 64, 0, true), | |
(32, 64, 16, true), | |
]; | |
const NULL_ACTION: u32 = 0; | |
impl Forest { | |
fn push_node(&mut self, node: Node) { | |
let size = |mut x: u64| { | |
let mut result = 0; | |
while x != 0 { | |
x = x >> 8; | |
result += 8; | |
} | |
result | |
}; | |
let tup = match node { | |
Node::Product { action, left_factor, right_factor } => { | |
(action as u32, (self.graph.len() - left_factor.get()) as u64, right_factor.map_or(0, |f| (self.graph.len() - f.get()) as u64), true) | |
} | |
Node::Leaf { terminal, values } => { | |
(terminal.0 as u32, values as u64, 0, false) | |
} | |
}; | |
let s = (size(tup.0 as u64), size(tup.1), size(tup.2)); | |
let (idx, size) = NODE_SIZES.iter().enumerate().find(|(_i, sizes)| s.0 <= sizes.0 && s.1 <= sizes.1 && s.2 <= sizes.2 && tup.3 == sizes.3).unwrap(); | |
// if idx.is_none() { | |
// panic!("wrong size for {:?} {:?}", tup, size(tup.1)); | |
// } | |
// let idx = idx.unwrap(); | |
// let size = NODE_SIZES[idx]; | |
let mut result = [0u8; 24]; | |
result[0 .. size.0 as usize / 8].copy_from_slice(&u32::to_le_bytes(tup.0)[0 .. size.0 as usize / 8]); | |
result[size.0 as usize / 8 .. size.0 as usize / 8 + size.1 as usize / 8].copy_from_slice(&u64::to_le_bytes(tup.1)[0 .. size.1 as usize / 8]); | |
result[size.0 as usize / 8 + size.1 as usize / 8 .. size.0 as usize / 8 + size.1 as usize / 8 + size.2 as usize / 8].copy_from_slice(&u64::to_le_bytes(tup.2)[0 .. size.2 as usize / 8]); | |
self.graph.push(idx as u8); | |
self.graph.extend(result[0 .. size.0 as usize / 8 + size.1 as usize / 8 + size.2 as usize / 8].into_iter().cloned()); | |
} | |
} | |
impl Forest { | |
pub fn memory_use(&self) -> usize { | |
self.graph.len() | |
} | |
pub fn new<const S: usize>(grammar: &Grammar<S>) -> Self { | |
Self { | |
graph: vec![0], | |
eval: grammar.rules.iter().map(|rule| rule.id).collect(), | |
} | |
} | |
pub fn leaf(&mut self, terminal: Symbol, _x: usize, values: u32) -> NodeHandle { | |
let handle = NodeHandle(0, self.graph.len()); | |
self.push_node(Node::Leaf { terminal, values }); | |
handle | |
} | |
pub fn push_summand(&mut self, item: CompletedItem) -> NodeHandle { | |
let handle = NodeHandle(0, self.graph.len()); | |
let eval = self.eval[item.dot].map(|id| id as u32); | |
self.push_node(Node::Product { | |
action: eval.unwrap_or(NULL_ACTION), | |
left_factor: item.left_node, | |
right_factor: item.right_node, | |
}); | |
handle | |
} | |
pub fn evaluator<T: Eval>(&mut self, eval: T) -> Evaluator<T> { | |
Evaluator { | |
forest: self, | |
eval, | |
} | |
} | |
fn get(&self, handle: NodeHandle) -> Node { | |
let slice = &self.graph[handle.get() ..]; | |
let size = NODE_SIZES[slice[0] as usize]; | |
let all = &slice[1 .. size.0 as usize / 8 + size.1 as usize / 8 + size.2 as usize / 8 + 1]; | |
let (first, second) = all.split_at(size.0 as usize / 8); | |
let (second, third) = second.split_at(size.1 as usize / 8); | |
let mut a = [0; 4]; | |
a[0 .. first.len()].copy_from_slice(first); | |
let mut b = [0; 8]; | |
b[0 .. second.len()].copy_from_slice(second); | |
let mut c = [0; 8]; | |
c[0 .. third.len()].copy_from_slice(third); | |
if size.3 { | |
Node::Product { | |
action: u32::from_le_bytes(a), | |
left_factor: NodeHandle(u64::from_le_bytes(b) as usize, handle.get()), | |
right_factor: if size.2 == 0 { None } else { Some(NodeHandle(u64::from_le_bytes(c) as usize, handle.get())) }, | |
} | |
} else { | |
Node::Leaf { | |
terminal: Symbol(u32::from_le_bytes(a)), | |
values: u32::from_le_bytes([b[0], b[1], b[2], b[3]]), | |
} | |
} | |
} | |
} | |
pub trait Eval { | |
type Elem; | |
fn leaf(&mut self, terminal: Symbol, values: u32) -> Self::Elem; | |
fn product(&mut self, action: u32, list: &mut LinkedList<Self::Elem>) -> Self::Elem; | |
} | |
pub struct Evaluator<'a, T> { | |
eval: T, | |
forest: &'a mut Forest, | |
} | |
impl<'a, T: Eval> Evaluator<'a, T> { | |
pub fn evaluate(&mut self, finished_node: NodeHandle) -> T::Elem { | |
self.evaluate_rec(finished_node).into_iter().next().unwrap() | |
} | |
fn evaluate_rec(&mut self, handle: NodeHandle) -> LinkedList<T::Elem> { | |
match self.forest.get(handle) { | |
Node::Product { | |
left_factor, | |
right_factor, | |
action, | |
} => { | |
let mut evald = self.evaluate_rec(left_factor); | |
if let Some(factor) = right_factor { | |
evald.append(&mut self.evaluate_rec(factor)); | |
} | |
if action != NULL_ACTION { | |
let mut list = LinkedList::new(); | |
list.push_back(self.eval.product(action as u32, &mut evald)); | |
list | |
} else { | |
evald | |
} | |
} | |
Node::Leaf { terminal, values } => { | |
let mut list = LinkedList::new(); | |
list.push_back(self.eval.leaf(terminal, values)); | |
list | |
} | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Forest | |
use super::*; | |
#[derive(Clone)] | |
pub struct Forest { | |
graph: Vec<Node>, | |
eval: Vec<Option<usize>>, | |
} | |
#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd)] | |
pub struct NodeHandle(usize); | |
#[derive(Clone)] | |
enum Node { | |
Product { | |
action: u32, | |
left_factor: NodeHandle, | |
right_factor: Option<NodeHandle>, | |
}, | |
Leaf { | |
terminal: Symbol, | |
values: u32, | |
}, | |
} | |
const NULL_ACTION: u32 = !0; | |
impl Forest { | |
pub fn new<const S: usize>(grammar: &Grammar<S>) -> Self { | |
Self { | |
graph: vec![], | |
eval: grammar.rules.iter().map(|rule| rule.id).collect(), | |
} | |
} | |
pub fn leaf(&mut self, terminal: Symbol, _x: usize, values: u32) -> NodeHandle { | |
let handle = NodeHandle(self.graph.len()); | |
self.graph.push(Node::Leaf { terminal, values }); | |
handle | |
} | |
pub fn push_summand(&mut self, item: CompletedItem) -> NodeHandle { | |
let handle = NodeHandle(self.graph.len()); | |
let eval = self.eval[item.dot].map(|id| id as u32); | |
self.graph.push(Node::Product { | |
action: eval.unwrap_or(NULL_ACTION), | |
left_factor: item.left_node, | |
right_factor: item.right_node, | |
}); | |
handle | |
} | |
} | |
pub struct Evaluator<F, G> { | |
eval_product: F, | |
eval_leaf: G, | |
} | |
struct Rec<T>(T, Option<Box<Rec<T>>>); | |
impl<T, F, G> Evaluator<F, G> | |
where | |
F: Fn(u32, &[T]) -> T + Copy, | |
G: Fn(Symbol, u32) -> T + Copy, | |
T: Clone + ::std::fmt::Debug, | |
{ | |
pub fn new(eval_product: F, eval_leaf: G) -> Self { | |
Self { | |
eval_product, | |
eval_leaf, | |
} | |
} | |
pub fn evaluate(&mut self, forest: &mut Forest, finished_node: NodeHandle) -> T { | |
self.evaluate_rec(forest, finished_node)[0].clone() | |
} | |
fn evaluate_rec(&mut self, forest: &mut Forest, handle: NodeHandle) -> Vec<T> { | |
match forest.graph[handle.0] { | |
Node::Product { | |
left_factor, | |
right_factor, | |
action, | |
} => { | |
let mut result = self.evaluate_rec(forest, left_factor); | |
if let Some(factor) = right_factor { | |
result.extend(self.evaluate_rec(forest, factor)); | |
} | |
if action != NULL_ACTION { | |
vec![(self.eval_product)(action as u32, &result)] | |
} else { | |
result | |
} | |
} | |
Node::Leaf { terminal, values } => { | |
vec![(self.eval_leaf)(terminal, values)] | |
} | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Forest | |
use std::iter::Product; | |
use super::*; | |
#[derive(Clone)] | |
pub struct Forest { | |
graph: Vec<Node>, | |
eval: Vec<Option<usize>>, | |
} | |
#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd)] | |
pub struct NodeHandle(usize); | |
#[derive(Clone)] | |
enum Node { | |
Product { | |
action: u32, | |
left_factor: NodeHandle, | |
right_factor: Option<NodeHandle>, | |
}, | |
Leaf { | |
terminal: Symbol, | |
values: u32, | |
}, | |
Evaluated { | |
index: usize, | |
} | |
} | |
const NULL_ACTION: u32 = !0; | |
impl Forest { | |
pub fn new<const S: usize>(grammar: &Grammar<S>) -> Self { | |
Self { | |
graph: vec![], | |
eval: grammar.rules.iter().map(|rule| rule.id).collect(), | |
} | |
} | |
pub fn leaf(&mut self, terminal: Symbol, _x: usize, values: u32) -> NodeHandle { | |
let handle = NodeHandle(self.graph.len()); | |
self.graph.push(Node::Leaf { terminal, values }); | |
handle | |
} | |
pub fn push_summand(&mut self, item: CompletedItem) -> NodeHandle { | |
let handle = NodeHandle(self.graph.len()); | |
let eval = self.eval[item.dot].map(|id| id as u32); | |
self.graph.push(Node::Product { | |
action: eval.unwrap_or(NULL_ACTION), | |
left_factor: item.left_node, | |
right_factor: item.right_node, | |
}); | |
handle | |
} | |
} | |
pub struct Evaluator<F, G> { | |
eval_product: F, | |
eval_leaf: G, | |
stack: Vec<NodeHandle>, | |
} | |
impl<T, F, G> Evaluator<F, G> | |
where | |
F: Fn(u32, &[T], &[usize]) -> T + Copy, | |
G: Fn(Symbol, u32) -> T + Copy, | |
T: Clone + ::std::fmt::Debug, | |
{ | |
pub fn new(eval_product: F, eval_leaf: G) -> Self { | |
Self { | |
eval_product, | |
eval_leaf, | |
stack: vec![], | |
} | |
} | |
pub fn evaluate(&mut self, forest: &mut Forest, finished_node: NodeHandle) -> T { | |
let mut liveness = ::std::iter::repeat(false).take(forest.graph.len()).collect::<Vec<_>>(); | |
let mut work = vec![finished_node]; | |
let mut result = vec![]; | |
let mut refs = vec![]; | |
while let Some(node) = work.pop() { | |
if !liveness[node.0] { | |
match forest.graph[node.0] { | |
Node::Product { left_factor, right_factor, action } => { | |
liveness[node.0] = action != NULL_ACTION; | |
if let Some(right) = right_factor { | |
work.push(right); | |
} | |
work.push(left_factor); | |
} | |
Node::Leaf { .. } => { | |
liveness[node.0] = true; | |
} | |
Node::Evaluated { .. } => unreachable!() | |
} | |
} | |
} | |
for (i, alive) in (0 .. forest.graph.len()).zip(liveness.iter().cloned()) { | |
if alive { | |
self.evaluate_node(forest, NodeHandle(i), &mut result, &mut refs); | |
} | |
} | |
match forest.graph[finished_node.0] { | |
Node::Evaluated { index } => { | |
result[index].clone() | |
} | |
_ => unreachable!() | |
} | |
} | |
fn evaluate_node<'a>(&mut self, forest: &mut Forest, node: NodeHandle, result: &'a mut Vec<T>, refs: &'a mut Vec<usize>) { | |
#[derive(Eq, PartialEq)] | |
enum Action { | |
Leaf(Symbol, u32), | |
Product(u32), | |
} | |
self.stack.push(node); | |
let mut first_action = Action::Product(NULL_ACTION); | |
while let Some(factor) = self.stack.pop() { | |
match &forest.graph[factor.0] { | |
&Node::Product { action, left_factor, right_factor } => { | |
if action != NULL_ACTION { | |
first_action = Action::Product(action); | |
} | |
if let Some(rfactor) = right_factor { | |
self.stack.push(rfactor); | |
} | |
self.stack.push(left_factor); | |
} | |
&Node::Evaluated { index } => { | |
refs.push(index); | |
} | |
&Node::Leaf { terminal, values } => { | |
first_action = Action::Leaf(terminal, values) | |
} | |
} | |
} | |
let finished = match first_action { | |
Action::Leaf(terminal, values) => { | |
(self.eval_leaf)(terminal, values) | |
} | |
Action::Product(action) => { | |
(self.eval_product)(action, &result[..], &refs[..]) | |
} | |
}; | |
refs.clear(); | |
self.stack.clear(); | |
forest.graph[node.0] = Node::Evaluated { index: result.len() }; | |
result.push(finished); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
We now do lots of conditional compilation. This gist contains a full Cargo crate.