Skip to content

Instantly share code, notes, and snippets.

@pczarn
Last active June 17, 2024 12:13
Show Gist options
  • Save pczarn/c51dd53b5e194467a79ff5592856d14c to your computer and use it in GitHub Desktop.
Save pczarn/c51dd53b5e194467a79ff5592856d14c to your computer and use it in GitHub Desktop.
mini-earley
/target
callgrind.out.*
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "tiny-earley"
version = "0.1.0"
[package]
name = "tiny-earley"
version = "0.1.0"
edition = "2021"
[lib]
path = "lib.rs"
[benches]
path = "bench_parser.rs"
[dependencies]
[profile.bench]
opt-level = 3 # Use slightly better optimizations.
[features]
default = ["debug", "rel_compact_forest"]
debug = []
simple = []
extra_inline = []
vec2d = []
simplest_forest = []
simple_forest = []
compact_forest = []
linked_forest = []
forest_size = []
rel_compact_forest = []
// Forest
use std::{collections::LinkedList, iter};
use super::*;
#[derive(Clone)]
pub struct Forest {
graph: Vec<u8>,
eval: Vec<Option<usize>>,
}
#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd)]
pub struct NodeHandle(usize);
#[derive(Clone)]
enum Node {
Product {
action: u32,
left_factor: NodeHandle,
right_factor: Option<NodeHandle>,
},
Leaf {
terminal: Symbol,
values: u32,
},
}
static NODE_SIZES: &'static [(u8, u8, u8, bool)] = &[
(8, 8, 0, false),
(16, 16, 0, false),
(8, 8, 0, true),
(8, 8, 8, true),
(8, 16, 8, true),
(16, 16, 8, true),
(8, 16, 16, true),
(8, 16, 0, true),
(8, 24, 24, true),
(16, 24, 24, true),
(16, 32, 32, true),
(16, 32, 0, true),
(32, 64, 64, true),
(32, 64, 0, true),
(32, 64, 16, true),
];
const NULL_ACTION: u32 = 0;
impl Forest {
fn push_node(&mut self, node: Node) {
let size = |mut x: u64| {
let mut result = 0;
while x != 0 {
x = x >> 8;
result += 8;
}
result
};
let tup = match node {
Node::Product { action, left_factor, right_factor } => {
(action as u32, left_factor.0 as u64, right_factor.map_or(0, |f| f.0 as u64), true)
}
Node::Leaf { terminal, values } => {
(terminal.0 as u32, values as u64, 0, false)
}
};
let s = (size(tup.0 as u64), size(tup.1), size(tup.2));
let (idx, size) = NODE_SIZES.iter().enumerate().find(|(_i, sizes)| s.0 <= sizes.0 && s.1 <= sizes.1 && s.2 <= sizes.2 && tup.3 == sizes.3).unwrap();
// if idx.is_none() {
// panic!("wrong size for {:?} {:?}", tup, size(tup.1));
// }
// let idx = idx.unwrap();
// let size = NODE_SIZES[idx];
let mut result = [0u8; 24];
result[0 .. size.0 as usize / 8].copy_from_slice(&u32::to_le_bytes(tup.0)[0 .. size.0 as usize / 8]);
result[size.0 as usize / 8 .. size.0 as usize / 8 + size.1 as usize / 8].copy_from_slice(&u64::to_le_bytes(tup.1)[0 .. size.1 as usize / 8]);
result[size.0 as usize / 8 + size.1 as usize / 8 .. size.0 as usize / 8 + size.1 as usize / 8 + size.2 as usize / 8].copy_from_slice(&u64::to_le_bytes(tup.2)[0 .. size.2 as usize / 8]);
self.graph.push(idx as u8);
self.graph.extend(result[0 .. size.0 as usize / 8 + size.1 as usize / 8 + size.2 as usize / 8].into_iter().cloned());
}
}
impl Forest {
pub fn memory_use(&self) -> usize {
self.graph.len()
}
pub fn new<const S: usize>(grammar: &Grammar<S>) -> Self {
Self {
graph: vec![0],
eval: grammar.rules.iter().map(|rule| rule.id).collect(),
}
}
pub fn leaf(&mut self, terminal: Symbol, _x: usize, values: u32) -> NodeHandle {
let handle = NodeHandle(self.graph.len());
self.push_node(Node::Leaf { terminal, values });
handle
}
pub fn push_summand(&mut self, item: CompletedItem) -> NodeHandle {
let handle = NodeHandle(self.graph.len());
let eval = self.eval[item.dot].map(|id| id as u32);
self.push_node(Node::Product {
action: eval.unwrap_or(NULL_ACTION),
left_factor: item.left_node,
right_factor: item.right_node,
});
handle
}
pub fn evaluator<T: Eval>(&mut self, eval: T) -> Evaluator<T> {
Evaluator {
forest: self,
eval,
}
}
fn get(&self, handle: NodeHandle) -> Node {
let slice = &self.graph[handle.0 ..];
let size = NODE_SIZES[slice[0] as usize];
let all = &slice[1 .. size.0 as usize / 8 + size.1 as usize / 8 + size.2 as usize / 8 + 1];
let (first, second) = all.split_at(size.0 as usize / 8);
let (second, third) = second.split_at(size.1 as usize / 8);
let mut a = [0; 4];
a[0 .. first.len()].copy_from_slice(first);
let mut b = [0; 8];
b[0 .. second.len()].copy_from_slice(second);
let mut c = [0; 8];
c[0 .. third.len()].copy_from_slice(third);
if size.3 {
Node::Product {
action: u32::from_le_bytes(a),
left_factor: NodeHandle(u64::from_le_bytes(b) as usize),
right_factor: if size.2 == 0 { None } else { Some(NodeHandle(u64::from_le_bytes(c) as usize)) },
}
} else {
Node::Leaf {
terminal: Symbol(u32::from_le_bytes(a)),
values: u32::from_le_bytes([b[0], b[1], b[2], b[3]]),
}
}
}
}
pub trait Eval {
type Elem;
fn leaf(&mut self, terminal: Symbol, values: u32) -> Self::Elem;
fn product(&mut self, action: u32, list: &mut LinkedList<Self::Elem>) -> Self::Elem;
}
pub struct Evaluator<'a, T> {
eval: T,
forest: &'a mut Forest,
}
impl<'a, T: Eval> Evaluator<'a, T> {
pub fn evaluate(&mut self, finished_node: NodeHandle) -> T::Elem {
self.evaluate_rec(finished_node).into_iter().next().unwrap()
}
fn evaluate_rec(&mut self, handle: NodeHandle) -> LinkedList<T::Elem> {
match self.forest.get(handle) {
Node::Product {
left_factor,
right_factor,
action,
} => {
let mut evald = self.evaluate_rec(left_factor);
if let Some(factor) = right_factor {
evald.append(&mut self.evaluate_rec(factor));
}
if action != NULL_ACTION {
let mut list = LinkedList::new();
list.push_back(self.eval.product(action as u32, &mut evald));
list
} else {
evald
}
}
Node::Leaf { terminal, values } => {
let mut list = LinkedList::new();
list.push_back(self.eval.leaf(terminal, values));
list
}
}
}
}
#![feature(stmt_expr_attributes)]
#![feature(test)]
extern crate test;
#[cfg_attr(all(not(feature = "simple_forest"), not(feature = "compact_forest"), not(feature = "linked_forest"), not(feature = "rel_compact_forest")), path = "simplest_forest.rs")]
#[cfg_attr(feature = "simple_forest", path = "simple_forest.rs")]
#[cfg_attr(feature = "compact_forest", path = "compact_forest.rs")]
#[cfg_attr(feature = "linked_forest", path = "linked_forest.rs")]
#[cfg_attr(feature = "rel_compact_forest", path = "rel_compact_forest.rs")]
mod forest;
use test::Bencher;
use std::{collections::BinaryHeap, ops::Index};
#[cfg(feature = "debug")]
use std::collections::{BTreeMap, BTreeSet};
use self::forest::*;
#[derive(Clone, Copy, Debug, Default, Eq, PartialEq, Ord, PartialOrd)]
pub struct Symbol(u32);
#[derive(Clone)]
pub struct Grammar<const S: usize> {
rules: Vec<Rule>,
start_symbol: Symbol,
symbol_names: [&'static str; S],
gen_symbols: u32,
}
#[derive(Clone, Copy, Debug)]
struct Rule {
lhs: Symbol,
rhs0: Symbol,
rhs1: Option<Symbol>,
id: Option<usize>,
}
#[derive(Clone)]
struct Tables<const S: usize> {
prediction_matrix: [[bool; S]; S],
start_symbol: Symbol,
rules: Vec<Rule>,
rules_by_rhs0: Vec<Rule>,
completions: Vec<Vec<PredictionTransition>>,
symbol_names: [&'static str; S],
#[cfg(feature = "simple")]
gen_completions: Vec<PredictionTransition>,
}
#[derive(Copy, Clone, Debug, Default)]
struct PredictionTransition {
symbol: Symbol,
top: Symbol,
dot: usize,
is_unary: bool,
}
// Recognizer
#[derive(Clone)]
pub struct Recognizer<const S: usize> {
tables: Tables<S>,
earley_chart: EarleyChart<S>,
complete: BinaryHeap<CompletedItem>,
pub forest: Forest,
pub finished_node: Option<NodeHandle>,
}
#[cfg(feature = "vec2d")]
#[derive(Clone)]
struct EarleyChart<const S: usize> {
predicted: Vec<[bool; S]>,
indices: Vec<usize>,
items: Vec<Item>,
}
#[cfg(not(feature = "vec2d"))]
#[derive(Clone)]
struct EarleyChart<const S: usize> {
sets: Vec<EarleySet<S>>,
}
#[cfg(feature = "vec2d")]
impl<const S: usize> EarleyChart<S> {
fn next_set(&mut self, predicted: Option<[bool; S]>) {
self.predicted.push(predicted.unwrap_or([false; S]));
self.indices.push(self.items.len());
}
fn new() -> Self {
EarleyChart {
predicted: vec![],
indices: vec![],
items: vec![],
}
}
fn len(&self) -> usize {
self.indices.len()
}
fn predicted(&self, index: usize) -> &[bool] {
&self.predicted[index][..]
}
fn medial(&mut self) -> &mut Vec<Item> {
&mut self.items
}
fn next_medial(&self) -> &[Item] {
&self.items[self.indices[self.indices.len() - 1] .. ]
}
fn last(&self) -> EarleySetRef {
let items = &self.items[self.indices[self.indices.len() - 1] ..];
let predicted = &self.predicted.last().unwrap()[..];
EarleySetRef {
items,
predicted,
}
}
fn next_to_last(&self) -> EarleySetRef {
let items = &self.items[self.indices[self.indices.len() - 2] .. self.indices[self.indices.len() - 1]];
let predicted = &self.predicted[self.predicted.len() - 2][..];
EarleySetRef {
items,
predicted,
}
}
fn last_mut(&mut self) -> EarleySetMut {
EarleySetMut {
items: &mut self.items[self.indices[self.indices.len() - 1] ..],
predicted: &mut self.predicted.last_mut().unwrap()[..]
}
}
}
#[cfg(not(feature = "vec2d"))]
impl<const S: usize> EarleyChart<S> {
fn next_set(&mut self, predicted: Option<[bool; S]>) {
self.sets.push(EarleySet { predicted: predicted.unwrap_or([false; S]), medial: vec![] });
}
fn new() -> Self {
EarleyChart {
sets: vec![],
}
}
fn len(&self) -> usize {
self.sets.len()
}
fn push(&mut self, set: EarleySet<S>) {
self.sets.push(set);
}
fn predicted(&self, index: usize) -> &[bool] {
&self.sets[index].predicted[..]
}
fn last(&self) -> EarleySetRef {
let last = self.sets.last().unwrap();
EarleySetRef {
items: &last.medial[..],
predicted: &last.predicted[..],
}
}
fn next_to_last(&self) -> EarleySetRef {
let last = &self.sets[self.sets.len() - 2];
EarleySetRef {
items: &last.medial[..],
predicted: &last.predicted[..],
}
}
fn medial(&mut self) -> &mut Vec<Item> {
&mut self.sets.last_mut().unwrap().medial
}
fn next_medial(&self) -> &[Item] {
&self.sets.last().unwrap().medial[..]
}
fn last_mut(&mut self) -> EarleySetMut {
let last = self.sets.last_mut().unwrap();
EarleySetMut {
items: &mut last.medial[..],
predicted: &mut last.predicted[..]
}
}
}
struct EarleySetMut<'a> {
items: &'a mut [Item],
predicted: &'a mut [bool],
}
struct EarleySetRef<'a> {
items: &'a [Item],
predicted: &'a [bool],
}
#[cfg(feature = "vec2d")]
impl<const S: usize> Index<usize> for EarleyChart<S> {
type Output = [Item];
fn index(&self, index: usize) -> &Self::Output {
&self.items[self.indices[index] .. self.indices[index + 1]]
}
}
#[cfg(not(feature = "vec2d"))]
impl<const S: usize> Index<usize> for EarleyChart<S> {
type Output = [Item];
fn index(&self, index: usize) -> &Self::Output {
&self.sets[index].medial[..]
}
}
#[derive(Clone)]
struct EarleySet<const S: usize> {
predicted: [bool; S],
medial: Vec<Item>,
}
#[derive(Ord, PartialOrd, Eq, PartialEq, Clone)]
struct Item {
postdot: Symbol,
dot: usize,
origin: usize,
node: NodeHandle,
}
#[derive(Clone, Copy, Ord, PartialOrd, Eq, PartialEq)]
struct CompletedItem {
origin: usize,
dot: usize,
left_node: NodeHandle,
right_node: Option<NodeHandle>,
}
trait UnionWith {
fn union_with(&mut self, other: &[bool]);
}
impl<const S: usize> UnionWith for [bool; S] {
fn union_with(&mut self, other: &[bool]) {
for (dst, &src) in self.iter_mut().zip(other.iter()) {
*dst |= src;
}
}
}
impl<'a> UnionWith for &'a mut [bool] {
fn union_with(&mut self, other: &[bool]) {
for (dst, &src) in self.iter_mut().zip(other.iter()) {
*dst |= src;
}
}
}
impl Symbol {
fn usize(self) -> usize {
self.0 as usize
}
}
impl<const S: usize> EarleySet<S> {
fn new() -> Self {
EarleySet {
predicted: [false; S],
medial: vec![],
}
}
}
impl<const S: usize> Grammar<S> {
pub fn new(symbol_names: [&'static str; S], start_symbol: usize) -> Self {
Self {
rules: vec![],
start_symbol: Symbol(start_symbol as u32),
symbol_names,
gen_symbols: 0,
}
}
pub fn symbols(&self) -> [Symbol; S] {
let mut result = [Symbol(0); S];
for (i, elem) in result.iter_mut().enumerate() {
*elem = Symbol(i as u32);
}
result
}
pub fn rule<const N: usize>(&mut self, lhs: Symbol, rhs: [Symbol; N], id: usize) {
let mut cur_rhs0 = rhs[0];
for i in 1..N - 1 {
let gensym = Symbol(self.gen_symbols + S as u32);
self.gen_symbols += 1;
self.rules.push(Rule {
lhs: gensym,
rhs0: cur_rhs0,
rhs1: Some(rhs[i]),
id: None,
});
cur_rhs0 = gensym;
}
self.rules.push(Rule {
lhs,
rhs0: cur_rhs0,
rhs1: if N == 1 { None } else { Some(rhs[N - 1]) },
id: Some(id),
});
}
fn sort_rules(&mut self) {
self.rules.sort_by(|a, b| a.lhs.cmp(&b.lhs));
}
}
// Implementation for the recognizer.
//
// The recognizer has a chart of earley sets (Vec<EarleySet>) as well as the last set (next_set).
//
// A typical loop that utilizes the recognizer:
//
// - for character in string {
// 1. recognizer.begin_earleme();
// 2. recognizer.scan(token_to_symbol(character), values());
// 2a. complete
// 3. recognizer.end_earleme();
// 3a. self.complete_all_sums_entirely();
// 3b. self.sort_medial_items();
// 3c. self.prediction_pass();
// - }
//
impl<const S: usize> Recognizer<S> {
pub fn new(grammar: &Grammar<S>) -> Self {
let mut result = Self {
tables: Tables::new(grammar),
earley_chart: EarleyChart::new(),
forest: Forest::new(grammar),
// complete: BinaryHeap::new_by_key(Box::new(|completed_item| (completed_item.origin, completed_item.dot))),
complete: BinaryHeap::with_capacity(64),
finished_node: None,
};
result.initialize();
result
}
fn initialize(&mut self) {
self.earley_chart.next_set(Some(self.tables.prediction_matrix[self.tables.start_symbol.usize()]));
self.earley_chart.next_set(None);
}
pub fn scan(&mut self, terminal: Symbol, values: u32) {
let earleme = self.earley_chart.len() - 2;
let node = self.forest.leaf(terminal, earleme + 1, values);
self.complete(earleme, terminal, node);
}
pub fn end_earleme(&mut self) -> bool {
if self.is_exhausted() {
false
} else {
// Completion pass, which saves successful parses.
self.finished_node = None;
self.complete_all_sums_entirely();
// Do the rest.
self.sort_medial_items();
self.prediction_pass();
#[cfg(not(feature = "vec2d"))]
self.earley_chart.sets
.push(EarleySet::new());
#[cfg(feature = "vec2d")]
self.earley_chart.next_set(None);
true
}
}
#[cfg_attr(feature = "extra_inline", inline)]
fn is_exhausted(&self) -> bool {
self.earley_chart.next_medial().len() == 0 && self.complete.is_empty()
}
fn complete_all_sums_entirely(&mut self) {
while let Some(&ei) = self.complete.peek() {
let lhs_sym = self.tables.get_lhs(ei.dot);
let mut result_node = None;
while let Some(&ei2) = self.complete.peek() {
if ei.origin == ei2.origin && lhs_sym == self.tables.get_lhs(ei2.dot) {
result_node = Some(self.forest.push_summand(ei2));
self.complete.pop();
} else {
break;
}
}
if ei.origin == 0 && lhs_sym == self.tables.start_symbol {
self.finished_node = Some(result_node.unwrap());
}
self.complete(ei.origin, lhs_sym, result_node.unwrap());
}
}
/// Sorts medial items with deduplication.
fn sort_medial_items(&mut self) {
// Build index by postdot
// These medial positions themselves are sorted by postdot symbol.
self.earley_chart.last_mut().items.sort_unstable();
}
#[cfg_attr(feature = "extra_inline", inline)]
fn prediction_pass(&mut self) {
// Iterate through medial items in the current set.
let mut last = self.earley_chart.last_mut();
let iter = last.items.iter();
// For each medial item in the current set, predict its postdot symbol.
for ei in iter {
if let Some(postdot) = self
.tables
.get_rhs1(ei.dot)
.filter(|postdot| !last.predicted[postdot.usize()])
{
// Prediction happens here. We would prefer to call `self.predict`, but we can't,
// because `self.medial` is borrowed by `iter`.
let source = &self.tables.prediction_matrix[postdot.usize()][..];
last.predicted.union_with(source);
}
}
}
fn complete(&mut self, earleme: usize, symbol: Symbol, node: NodeHandle) {
if symbol.usize() >= S {
self.complete_binary_predictions(earleme, symbol, node);
} else if self.earley_chart.predicted(earleme)[symbol.usize()] {
self.complete_medial_items(earleme, symbol, node);
self.complete_predictions(earleme, symbol, node);
}
}
#[cfg_attr(feature = "extra_inline", inline)]
fn complete_medial_items(&mut self, earleme: usize, symbol: Symbol, right_node: NodeHandle) {
let inner_start = {
// we use binary search to narrow down the range of items.
let set_idx = self.earley_chart[earleme]
.binary_search_by(|ei| (self.tables.get_rhs1(ei.dot), 1).cmp(&(Some(symbol), 0)));
match set_idx {
Ok(idx) | Err(idx) => idx,
}
};
let rhs1_eq = |ei: &&Item| self.tables.get_rhs1(ei.dot) == Some(symbol);
for item in self.earley_chart[earleme][inner_start..]
.iter()
.take_while(rhs1_eq)
{
self.complete.push(CompletedItem {
dot: item.dot,
origin: item.origin,
left_node: item.node,
right_node: Some(right_node),
});
}
}
#[cfg_attr(feature = "extra_inline", inline)]
fn complete_predictions(&mut self, earleme: usize, symbol: Symbol, node: NodeHandle) {
// println!("{:?}", slice);
for trans in &self.tables.completions[symbol.usize()] {
if self.earley_chart.predicted(earleme)[trans.top.usize()] {
if trans.is_unary {
self.complete.push(CompletedItem {
origin: earleme,
dot: trans.dot,
left_node: node,
right_node: None,
});
} else {
self.earley_chart.medial().push(Item {
origin: earleme,
dot: trans.dot,
node: node,
postdot: self.tables.get_rhs1(trans.dot).unwrap(),
});
}
}
}
}
#[cfg(feature = "simple")]
#[cfg_attr(feature = "extra_inline", inline)]
fn complete_binary_predictions(&mut self, earleme: usize, symbol: Symbol, node: NodeHandle) {
// println!("{:?}", slice);
let trans = self.tables.gen_completions[symbol.usize() - S];
if self.earley_chart.predicted(earleme)[trans.top.usize()] {
self.earley_chart.medial().push(Item {
origin: earleme,
dot: trans.dot,
node: node,
postdot: self.tables.get_rhs1(trans.dot).unwrap(),
});
}
}
#[cfg(not(feature = "simple"))]
#[cfg_attr(feature = "extra_inline", inline)]
fn complete_binary_predictions(&mut self, earleme: usize, symbol: Symbol, node: NodeHandle) {
// println!("{:?}", slice);
for trans in &self.tables.completions[symbol.usize()] {
if self.earley_chart.predicted(earleme)[trans.top.usize()] {
let postdot = self.tables.get_rhs1(trans.dot).unwrap();
self.earley_chart.medial().push(Item {
origin: earleme,
dot: trans.dot,
node: node,
postdot,
});
}
}
}
#[cfg(feature = "debug")]
fn log_last_earley_set(&self) {
let dots = self.dots_for_log(self.earley_chart.last());
for (rule_id, dots) in dots {
print!(
"{} ::= ",
self.tables.symbol_names[self.tables.get_lhs(rule_id).usize()]
);
if let Some(origins) = dots.get(&0) {
print!("{:?}", origins);
}
print!(
" {} ",
self.tables.symbol_names[self.tables.rules[rule_id].rhs0.usize()]
);
if let Some(origins) = dots.get(&1) {
print!("{:?}", origins);
}
if let Some(rhs1) = self.tables.get_rhs1(rule_id) {
print!(" {} ", self.tables.symbol_names[rhs1.usize()]);
}
println!();
}
println!();
}
#[cfg(feature = "debug")]
fn log_earley_set_diff(&self) {
use std::collections::{BTreeMap, BTreeSet};
let dots_last_by_id = self.dots_for_log(self.earley_chart.next_to_last());
let mut dots_next_by_id = self.dots_for_log(self.earley_chart.last());
let mut rule_ids: BTreeSet<usize> = BTreeSet::new();
rule_ids.extend(dots_last_by_id.keys());
rule_ids.extend(dots_next_by_id.keys());
for item in self.complete.iter() {
let position = if self.tables.get_rhs1(item.dot).is_some() {
2
} else {
1
};
dots_next_by_id
.entry(item.dot)
.or_insert(BTreeMap::new())
.entry(position)
.or_insert(BTreeSet::new())
.insert(item.origin);
}
let mut empty_diff = true;
for rule_id in rule_ids {
let dots_last = dots_last_by_id.get(&rule_id);
let dots_next = dots_next_by_id.get(&rule_id);
if dots_last == dots_next {
continue;
}
empty_diff = false;
print!(
"from {} ::= ",
self.tables.symbol_names[self.tables.get_top_lhs(rule_id).usize()]
);
if let Some(origins) = dots_last.and_then(|d| d.get(&0)) {
print!("{:?}", origins);
}
print!(
" {} ",
self.tables.symbol_names[self.tables.get_top_rhs0(rule_id).usize()]
);
if let Some(origins) = dots_last.and_then(|d| d.get(&1)) {
print!("{:?}", origins);
}
if let Some(rhs1) = self.tables.get_rhs1(rule_id) {
print!(" {} ", self.tables.symbol_names[rhs1.usize()]);
}
println!();
print!(
"to {} ::= ",
self.tables.symbol_names[self.tables.get_top_lhs(rule_id).usize()]
);
if let Some(origins) = dots_next.and_then(|d| d.get(&0)) {
print!("{:?}", origins);
}
print!(
" {} ",
self.tables.symbol_names[self.tables.get_top_rhs0(rule_id).usize()]
);
if let Some(origins) = dots_next.and_then(|d| d.get(&1)) {
print!("{:?}", origins);
}
if let Some(rhs1) = self.tables.get_rhs1(rule_id) {
print!(" {} ", self.tables.symbol_names[rhs1.usize()]);
}
if let Some(origins) = dots_next.and_then(|d| d.get(&2)) {
print!("{:?}", origins);
}
println!();
}
if empty_diff {
println!("no diff");
println!();
} else {
println!();
}
}
#[cfg(feature = "debug")]
fn dots_for_log(&self, es: EarleySetRef) -> BTreeMap<usize, BTreeMap<usize, BTreeSet<usize>>> {
let mut dots = BTreeMap::new();
for (i, rule) in self.tables.rules.iter().enumerate() {
if es.predicted[self.tables.get_top_lhs(i).usize()] {
dots.entry(i)
.or_insert(BTreeMap::new())
.entry(0)
.or_insert(BTreeSet::new())
.insert(self.earley_chart.len() - 1);
}
}
for item in es.items {
dots.entry(item.dot)
.or_insert(BTreeMap::new())
.entry(1)
.or_insert(BTreeSet::new())
.insert(item.origin);
}
dots
}
}
impl<const S: usize> Tables<S> {
fn new(grammar: &Grammar<S>) -> Self {
let mut result = Self {
prediction_matrix: [[false; S]; S],
start_symbol: grammar.start_symbol,
rules: vec![],
rules_by_rhs0: vec![],
completions: vec![],
symbol_names: grammar.symbol_names,
#[cfg(feature = "simple")]
gen_completions: vec![Default::default(); grammar.gen_symbols as usize],
};
result.populate(grammar);
result
}
fn populate(&mut self, grammar: &Grammar<S>) {
self.populate_rules(grammar);
self.populate_prediction_matrix(grammar);
self.populate_completions(grammar);
}
fn populate_prediction_matrix(&mut self, grammar: &Grammar<S>) {
for rule in &grammar.rules {
if rule.rhs0.usize() < S {
let mut top = rule.lhs;
while top.usize() >= S {
// appears on only one rhs0
let idx = self.rules_by_rhs0
.binary_search_by_key(&top, |elem| elem.rhs0)
.expect("lhs not found");
top = self.rules_by_rhs0[idx].lhs;
}
self.prediction_matrix[top.usize()][rule.rhs0.usize()] = true;
}
}
self.reflexive_closure();
self.transitive_closure();
}
fn reflexive_closure(&mut self) {
for i in 0..S {
self.prediction_matrix[i][i] = true;
}
}
fn transitive_closure(&mut self) {
for pos in 0..S {
let (rows0, rows1) = self.prediction_matrix.split_at_mut(pos);
let (rows1, rows2) = rows1.split_at_mut(1);
for dst_row in rows0.iter_mut().chain(rows2.iter_mut()) {
if dst_row[pos] {
dst_row.union_with(&rows1[0]);
}
}
}
}
fn populate_rules(&mut self, grammar: &Grammar<S>) {
self.rules = grammar.rules.clone();
self.rules_by_rhs0 = self.rules.clone();
self.rules_by_rhs0.sort_by_key(|rule| rule.rhs0);
}
fn populate_completions(&mut self, grammar: &Grammar<S>) {
self.completions.resize(S + grammar.gen_symbols as usize, vec![]);
for (i, rule) in grammar.rules.iter().enumerate() {
let rhs0 = rule.rhs0.usize();
let mut top = rule.lhs;
while top.usize() >= S {
// appears on only one rhs0
let idx = self.rules_by_rhs0
.binary_search_by_key(&top, |elem| elem.rhs0)
.expect("lhs not found");
top = self.rules_by_rhs0[idx].lhs;
}
let transition = PredictionTransition {
symbol: rule.lhs,
top,
dot: i,
is_unary: rule.rhs1.is_none(),
};
if rhs0 >= S && cfg!(feature = "simple") {
#[cfg(feature = "simple")] {
self.gen_completions[rhs0 - S] = transition;
}
} else {
self.completions[rhs0].push(transition);
}
}
}
fn get_rhs1(&self, n: usize) -> Option<Symbol> {
self.rules.get(n).and_then(|rule| rule.rhs1)
}
fn get_lhs(&self, n: usize) -> Symbol {
self.rules[n].lhs
}
#[cfg(feature = "debug")]
fn get_top_lhs(&self, dot: usize) -> Symbol {
let mut top = self.rules[dot].lhs;
while top.usize() >= S {
// appears on only one rhs0
let idx = self.rules_by_rhs0
.binary_search_by_key(&top, |elem| elem.rhs0)
.expect("lhs not found");
top = self.rules_by_rhs0[idx].lhs;
}
top
}
#[cfg(feature = "debug")]
fn get_top_rhs0(&self, dot: usize) -> Symbol {
let mut top = self.rules[dot].rhs0;
while top.usize() >= S {
// appears on only one rhs0
let idx = self.rules_by_rhs0
.binary_search_by_key(&top, |elem| elem.rhs0)
.expect("lhs not found");
top = self.rules_by_rhs0[idx].lhs;
}
top
}
}
#[derive(Clone, Debug)]
pub enum Value {
Digits(String),
Float(f64),
None,
}
#[derive(Clone)]
pub struct CalcRecognizer {
grammar: Grammar<13>,
recognizer: Recognizer<13>,
}
pub fn calc_recognizer() -> CalcRecognizer {
let mut grammar = Grammar::new(
[
"sum", "factor", "op_mul", "op_div", "lparen", "rparen", "expr_sym", "op_minus",
"op_plus", "number", "whole", "digit", "dot",
],
0,
);
let [sum, factor, op_mul, op_div, lparen, rparen, expr_sym, op_minus, op_plus, number, whole, digit, dot] =
grammar.symbols();
// sum ::= sum [+-] factor
// sum ::= factor
// factor ::= factor [*/] expr
// factor ::= expr
// expr ::= '(' sum ')' | '-' expr | number
// number ::= whole | whole '.' whole
// whole ::= whole [0-9] | [0-9]
grammar.rule(sum, [sum, op_plus, factor], 0);
grammar.rule(sum, [sum, op_minus, factor], 1);
grammar.rule(sum, [factor], 2);
grammar.rule(factor, [factor, op_mul, expr_sym], 3);
grammar.rule(factor, [factor, op_div, expr_sym], 4);
grammar.rule(factor, [expr_sym], 5);
grammar.rule(expr_sym, [lparen, sum, rparen], 6);
grammar.rule(expr_sym, [op_minus, expr_sym], 7);
grammar.rule(expr_sym, [number], 8);
grammar.rule(number, [whole], 9);
grammar.rule(number, [whole, dot, whole], 10);
grammar.rule(whole, [whole, digit], 11);
grammar.rule(whole, [digit], 12);
grammar.sort_rules();
let recognizer = Recognizer::new(&grammar);
CalcRecognizer {
recognizer,
grammar,
}
}
#[cfg(any(feature = "linked_forest", feature = "compact_forest", feature = "rel_compact_forest"))]
struct E {
symbols: [Symbol; 13],
}
#[cfg(any(feature = "linked_forest", feature = "compact_forest", feature = "rel_compact_forest"))]
impl self::forest::Eval for E {
type Elem = Value;
fn leaf(&mut self, terminal: Symbol, values: u32) -> Self::Elem {
let [sum, factor, op_mul, op_div, lparen, rparen, _expr_sym, op_minus, op_plus, _number, _whole, digit, dot] =
self.symbols;
if terminal == digit {
Value::Digits((values as u8 as char).to_string())
} else {
Value::None
}
}
fn product(&mut self, action: u32, args: &mut std::collections::LinkedList<Self::Elem>) -> Self::Elem {
let [sum, factor, op_mul, op_div, lparen, rparen, _expr_sym, op_minus, op_plus, _number, _whole, digit, dot] =
self.symbols;
match (
action,
args.pop_front().unwrap_or(Value::None),
args.pop_front().unwrap_or(Value::None),
args.pop_front().unwrap_or(Value::None),
) {
(0, Value::Float(left), _, Value::Float(right)) => Value::Float(left + right),
(1, Value::Float(left), _, Value::Float(right)) => Value::Float(left - right),
(2, val, Value::None, Value::None) => val,
(3, Value::Float(left), _, Value::Float(right)) => Value::Float(left * right),
(4, Value::Float(left), _, Value::Float(right)) => Value::Float(left / right),
(5, val, Value::None, Value::None) => val,
(6, _, val, _) => val,
(7, _, Value::Float(num), Value::None) => Value::Float(-num),
(8, Value::Digits(digits), Value::None, Value::None) => {
Value::Float(digits.parse::<f64>().unwrap())
}
(9, val @ Value::Digits(..), _, _) => val,
(10, Value::Digits(before_dot), _, Value::Digits(after_dot)) => {
let mut digits = before_dot;
digits.push('.');
digits.push_str(&after_dot[..]);
Value::Digits(digits)
}
(11, Value::Digits(mut num), Value::Digits(digit), _) => {
num.push_str(&digit[..]);
Value::Digits(num)
}
(12, val @ Value::Digits(..), _, _) => val,
args => panic!("unknown rule id {:?} or args {:?}", action, args),
}
}
}
impl CalcRecognizer {
pub fn parse(&mut self, expr: &str) -> f64 {
let [sum, factor, op_mul, op_div, lparen, rparen, _expr_sym, op_minus, op_plus, _number, _whole, digit, dot] =
self.grammar.symbols();
let symbols = self.grammar.symbols();
for (i, ch) in expr.chars().enumerate() {
let terminal = match ch {
'-' => op_minus,
'.' => dot,
'0'..='9' => digit,
'(' => lparen,
')' => rparen,
'*' => op_mul,
'/' => op_div,
'+' => op_plus,
' ' => continue,
other => panic!("invalid character {}", other),
};
self.recognizer.scan(terminal, ch as u32);
let success = self.recognizer.end_earleme();
#[cfg(feature = "debug")]
if !success {
self.recognizer.log_earley_set_diff();
}
assert!(success, "parse failed at character {}", i);
}
let finished_node = self.recognizer.finished_node.expect("parse failed");
#[cfg(any(feature = "compact_forest", feature = "linked_forest", feature = "rel_compact_forest"))]
let result = self.recognizer.forest.evaluator(E { symbols }).evaluate(finished_node);
#[cfg(feature = "simple_forest")]
let mut evaluator = Evaluator::new(
#[cfg(all(not(feature = "simple_forest"), not(feature = "compact_forest"), not(feature = "linked_forest")))]
|rule_id, result: &[Value], args: &[usize]| {
// println!("{:?}", (rule_id, result, args));
match (
rule_id,
args.get(0).cloned().and_then(|a| result.get(a)).cloned().unwrap_or(Value::None),
args.get(1).cloned().and_then(|a| result.get(a)).cloned().unwrap_or(Value::None),
args.get(2).cloned().and_then(|a| result.get(a)).cloned().unwrap_or(Value::None),
) {
(0, Value::Float(left), _, Value::Float(right)) => Value::Float(left + right),
(1, Value::Float(left), _, Value::Float(right)) => Value::Float(left - right),
(2, val, Value::None, Value::None) => val,
(3, Value::Float(left), _, Value::Float(right)) => Value::Float(left * right),
(4, Value::Float(left), _, Value::Float(right)) => Value::Float(left / right),
(5, val, Value::None, Value::None) => val,
(6, _, val, _) => val,
(7, _, Value::Float(num), Value::None) => Value::Float(-num),
(8, Value::Digits(digits), Value::None, Value::None) => {
Value::Float(digits.parse::<f64>().unwrap())
}
(9, val @ Value::Digits(..), _, _) => val,
(10, Value::Digits(before_dot), _, Value::Digits(after_dot)) => {
let mut digits = before_dot;
digits.push('.');
digits.push_str(&after_dot[..]);
Value::Digits(digits)
}
(11, Value::Digits(mut num), Value::Digits(digit), _) => {
num.push_str(&digit[..]);
Value::Digits(num)
}
(12, val @ Value::Digits(..), _, _) => val,
args => panic!("unknown rule id {:?} or args {:?}", rule_id, args),
}},
#[cfg(feature = "simple_forest")]
|rule_id, args: &[Value]| {
// println!("{:?}", (rule_id, result, args));
match (
rule_id,
args.get(0).cloned().unwrap_or(Value::None),
args.get(1).cloned().unwrap_or(Value::None),
args.get(2).cloned().unwrap_or(Value::None),
) {
(0, Value::Float(left), _, Value::Float(right)) => Value::Float(left + right),
(1, Value::Float(left), _, Value::Float(right)) => Value::Float(left - right),
(2, val, Value::None, Value::None) => val,
(3, Value::Float(left), _, Value::Float(right)) => Value::Float(left * right),
(4, Value::Float(left), _, Value::Float(right)) => Value::Float(left / right),
(5, val, Value::None, Value::None) => val,
(6, _, val, _) => val,
(7, _, Value::Float(num), Value::None) => Value::Float(-num),
(8, Value::Digits(digits), Value::None, Value::None) => {
Value::Float(digits.parse::<f64>().unwrap())
}
(9, val @ Value::Digits(..), _, _) => val,
(10, Value::Digits(before_dot), _, Value::Digits(after_dot)) => {
let mut digits = before_dot;
digits.push('.');
digits.push_str(&after_dot[..]);
Value::Digits(digits)
}
(11, Value::Digits(mut num), Value::Digits(digit), _) => {
num.push_str(&digit[..]);
Value::Digits(num)
}
(12, val @ Value::Digits(..), _, _) => val,
args => panic!("unknown rule id {:?} or args {:?}", rule_id, args),
}},
#[cfg(feature = "simple_forest")]
|terminal, values| {
if terminal == digit {
Value::Digits((values as u8 as char).to_string())
} else {
Value::None
}
}
);
#[cfg(feature = "forest_size")]
panic!("{}", self.recognizer.forest.memory_use());
#[cfg(feature = "simple_forest")]
let result = evaluator.evaluate(&mut self.recognizer.forest, finished_node);
if let Value::Float(num) = result {
num
} else {
panic!("evaluation failed")
}
}
}
pub fn calc(expr: &str) -> f64 {
let mut recognizer = calc_recognizer();
recognizer.parse(expr)
}
#[test]
fn test_parse() {
assert_eq!(calc("1.0 + 2.0"), 3.0);
}
#[bench]
fn bench_parser(bench: &mut Bencher) {
let recognizer = calc_recognizer();
bench.bytes = 9;
bench.iter(|| {
let mut parser = recognizer.clone();
parser.parse("1.0 + 2.0")
});
}
#[bench]
fn bench_parser2(bench: &mut Bencher) {
let recognizer = calc_recognizer();
bench.bytes = 76;
bench.iter(|| {
let mut parser = recognizer.clone();
parser.parse("1.0 + 2.0 * 3.0 + 1.0 + 2.0 * 3.0 + 1.0 + 2.0 * 3.0 / 1.0 + 2.0 * 3.01234234")
});
}
#[bench]
fn bench_parser3(bench: &mut Bencher) {
let recognizer = calc_recognizer();
bench.bytes = 68 + 92 + 74;
bench.iter(|| {
let mut parser = recognizer.clone();
parser.parse("1.0 + (2.0 * 3.0 + (1.0 + 2.0 * 3.0) + 1.0) + 2.0 * 3.0 / 1.0 + 2.0 \
* 3.01234234 + (2.0 * 3.0 + (1.0 + 2.0 * 3.0) + 1.0) + 2.0 * 3.0 / 1.0 + 2.0 * 3.01234234 + \
(2.0 * 3.0 + (1.0 + 2.0 * 3.0) + 1.0) + 2.0 * 3.0 / 1.0 + 2.0 * 3.01234234")
});
}
// Forest
use std::collections::LinkedList;
use super::*;
#[derive(Clone)]
pub struct Forest {
graph: Vec<Node>,
eval: Vec<Option<usize>>,
}
#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd)]
pub struct NodeHandle(usize);
#[derive(Clone)]
enum Node {
Product {
action: u32,
left_factor: NodeHandle,
right_factor: Option<NodeHandle>,
},
Leaf {
terminal: Symbol,
values: u32,
},
}
const NULL_ACTION: u32 = !0;
impl Forest {
pub fn memory_use(&self) -> usize {
self.graph.len() * std::mem::size_of::<Node>()
}
pub fn new<const S: usize>(grammar: &Grammar<S>) -> Self {
Self {
graph: vec![],
eval: grammar.rules.iter().map(|rule| rule.id).collect(),
}
}
pub fn leaf(&mut self, terminal: Symbol, _x: usize, values: u32) -> NodeHandle {
let handle = NodeHandle(self.graph.len());
self.graph.push(Node::Leaf { terminal, values });
handle
}
pub fn push_summand(&mut self, item: CompletedItem) -> NodeHandle {
let handle = NodeHandle(self.graph.len());
let eval = self.eval[item.dot].map(|id| id as u32);
self.graph.push(Node::Product {
action: eval.unwrap_or(NULL_ACTION),
left_factor: item.left_node,
right_factor: item.right_node,
});
handle
}
pub fn evaluator<T: Eval>(&mut self, eval: T) -> Evaluator<T> {
Evaluator {
forest: self,
eval,
}
}
}
pub trait Eval {
type Elem;
fn leaf(&mut self, terminal: Symbol, values: u32) -> Self::Elem;
fn product(&mut self, action: u32, list: &mut LinkedList<Self::Elem>) -> Self::Elem;
}
pub struct Evaluator<'a, T> {
eval: T,
forest: &'a mut Forest,
}
impl<'a, T: Eval> Evaluator<'a, T> {
pub fn evaluate(&mut self, finished_node: NodeHandle) -> T::Elem {
self.evaluate_rec(finished_node).into_iter().next().unwrap()
}
fn evaluate_rec(&mut self, handle: NodeHandle) -> LinkedList<T::Elem> {
match self.forest.graph[handle.0] {
Node::Product {
left_factor,
right_factor,
action,
} => {
let mut evald = self.evaluate_rec(left_factor);
if let Some(factor) = right_factor {
evald.append(&mut self.evaluate_rec(factor));
}
if action != NULL_ACTION {
let mut list = LinkedList::new();
list.push_back(self.eval.product(action as u32, &mut evald));
list
} else {
evald
}
}
Node::Leaf { terminal, values } => {
let mut list = LinkedList::new();
list.push_back(self.eval.leaf(terminal, values));
list
}
}
}
}
// Forest
use std::{collections::LinkedList, iter};
use super::*;
#[derive(Clone)]
pub struct Forest {
graph: Vec<u8>,
eval: Vec<Option<usize>>,
}
#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd)]
pub struct NodeHandle(usize, usize);
impl NodeHandle {
fn get(self) -> usize {
self.1 - self.0
}
}
#[derive(Clone)]
enum Node {
Product {
action: u32,
left_factor: NodeHandle,
right_factor: Option<NodeHandle>,
},
Leaf {
terminal: Symbol,
values: u32,
},
}
static NODE_SIZES: &'static [(u8, u8, u8, bool)] = &[
(8, 8, 0, false),
(16, 16, 0, false),
(8, 8, 0, true),
(8, 8, 8, true),
(8, 16, 8, true),
(16, 16, 8, true),
(8, 16, 16, true),
(8, 16, 0, true),
(8, 24, 24, true),
(16, 24, 24, true),
(16, 32, 32, true),
(16, 32, 0, true),
(32, 64, 64, true),
(32, 64, 0, true),
(32, 64, 16, true),
];
const NULL_ACTION: u32 = 0;
impl Forest {
fn push_node(&mut self, node: Node) {
let size = |mut x: u64| {
let mut result = 0;
while x != 0 {
x = x >> 8;
result += 8;
}
result
};
let tup = match node {
Node::Product { action, left_factor, right_factor } => {
(action as u32, (self.graph.len() - left_factor.get()) as u64, right_factor.map_or(0, |f| (self.graph.len() - f.get()) as u64), true)
}
Node::Leaf { terminal, values } => {
(terminal.0 as u32, values as u64, 0, false)
}
};
let s = (size(tup.0 as u64), size(tup.1), size(tup.2));
let (idx, size) = NODE_SIZES.iter().enumerate().find(|(_i, sizes)| s.0 <= sizes.0 && s.1 <= sizes.1 && s.2 <= sizes.2 && tup.3 == sizes.3).unwrap();
// if idx.is_none() {
// panic!("wrong size for {:?} {:?}", tup, size(tup.1));
// }
// let idx = idx.unwrap();
// let size = NODE_SIZES[idx];
let mut result = [0u8; 24];
result[0 .. size.0 as usize / 8].copy_from_slice(&u32::to_le_bytes(tup.0)[0 .. size.0 as usize / 8]);
result[size.0 as usize / 8 .. size.0 as usize / 8 + size.1 as usize / 8].copy_from_slice(&u64::to_le_bytes(tup.1)[0 .. size.1 as usize / 8]);
result[size.0 as usize / 8 + size.1 as usize / 8 .. size.0 as usize / 8 + size.1 as usize / 8 + size.2 as usize / 8].copy_from_slice(&u64::to_le_bytes(tup.2)[0 .. size.2 as usize / 8]);
self.graph.push(idx as u8);
self.graph.extend(result[0 .. size.0 as usize / 8 + size.1 as usize / 8 + size.2 as usize / 8].into_iter().cloned());
}
}
impl Forest {
pub fn memory_use(&self) -> usize {
self.graph.len()
}
pub fn new<const S: usize>(grammar: &Grammar<S>) -> Self {
Self {
graph: vec![0],
eval: grammar.rules.iter().map(|rule| rule.id).collect(),
}
}
pub fn leaf(&mut self, terminal: Symbol, _x: usize, values: u32) -> NodeHandle {
let handle = NodeHandle(0, self.graph.len());
self.push_node(Node::Leaf { terminal, values });
handle
}
pub fn push_summand(&mut self, item: CompletedItem) -> NodeHandle {
let handle = NodeHandle(0, self.graph.len());
let eval = self.eval[item.dot].map(|id| id as u32);
self.push_node(Node::Product {
action: eval.unwrap_or(NULL_ACTION),
left_factor: item.left_node,
right_factor: item.right_node,
});
handle
}
pub fn evaluator<T: Eval>(&mut self, eval: T) -> Evaluator<T> {
Evaluator {
forest: self,
eval,
}
}
fn get(&self, handle: NodeHandle) -> Node {
let slice = &self.graph[handle.get() ..];
let size = NODE_SIZES[slice[0] as usize];
let all = &slice[1 .. size.0 as usize / 8 + size.1 as usize / 8 + size.2 as usize / 8 + 1];
let (first, second) = all.split_at(size.0 as usize / 8);
let (second, third) = second.split_at(size.1 as usize / 8);
let mut a = [0; 4];
a[0 .. first.len()].copy_from_slice(first);
let mut b = [0; 8];
b[0 .. second.len()].copy_from_slice(second);
let mut c = [0; 8];
c[0 .. third.len()].copy_from_slice(third);
if size.3 {
Node::Product {
action: u32::from_le_bytes(a),
left_factor: NodeHandle(u64::from_le_bytes(b) as usize, handle.get()),
right_factor: if size.2 == 0 { None } else { Some(NodeHandle(u64::from_le_bytes(c) as usize, handle.get())) },
}
} else {
Node::Leaf {
terminal: Symbol(u32::from_le_bytes(a)),
values: u32::from_le_bytes([b[0], b[1], b[2], b[3]]),
}
}
}
}
pub trait Eval {
type Elem;
fn leaf(&mut self, terminal: Symbol, values: u32) -> Self::Elem;
fn product(&mut self, action: u32, list: &mut LinkedList<Self::Elem>) -> Self::Elem;
}
pub struct Evaluator<'a, T> {
eval: T,
forest: &'a mut Forest,
}
impl<'a, T: Eval> Evaluator<'a, T> {
pub fn evaluate(&mut self, finished_node: NodeHandle) -> T::Elem {
self.evaluate_rec(finished_node).into_iter().next().unwrap()
}
fn evaluate_rec(&mut self, handle: NodeHandle) -> LinkedList<T::Elem> {
match self.forest.get(handle) {
Node::Product {
left_factor,
right_factor,
action,
} => {
let mut evald = self.evaluate_rec(left_factor);
if let Some(factor) = right_factor {
evald.append(&mut self.evaluate_rec(factor));
}
if action != NULL_ACTION {
let mut list = LinkedList::new();
list.push_back(self.eval.product(action as u32, &mut evald));
list
} else {
evald
}
}
Node::Leaf { terminal, values } => {
let mut list = LinkedList::new();
list.push_back(self.eval.leaf(terminal, values));
list
}
}
}
}
// Forest
use super::*;
#[derive(Clone)]
pub struct Forest {
graph: Vec<Node>,
eval: Vec<Option<usize>>,
}
#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd)]
pub struct NodeHandle(usize);
#[derive(Clone)]
enum Node {
Product {
action: u32,
left_factor: NodeHandle,
right_factor: Option<NodeHandle>,
},
Leaf {
terminal: Symbol,
values: u32,
},
}
const NULL_ACTION: u32 = !0;
impl Forest {
pub fn new<const S: usize>(grammar: &Grammar<S>) -> Self {
Self {
graph: vec![],
eval: grammar.rules.iter().map(|rule| rule.id).collect(),
}
}
pub fn leaf(&mut self, terminal: Symbol, _x: usize, values: u32) -> NodeHandle {
let handle = NodeHandle(self.graph.len());
self.graph.push(Node::Leaf { terminal, values });
handle
}
pub fn push_summand(&mut self, item: CompletedItem) -> NodeHandle {
let handle = NodeHandle(self.graph.len());
let eval = self.eval[item.dot].map(|id| id as u32);
self.graph.push(Node::Product {
action: eval.unwrap_or(NULL_ACTION),
left_factor: item.left_node,
right_factor: item.right_node,
});
handle
}
}
pub struct Evaluator<F, G> {
eval_product: F,
eval_leaf: G,
}
struct Rec<T>(T, Option<Box<Rec<T>>>);
impl<T, F, G> Evaluator<F, G>
where
F: Fn(u32, &[T]) -> T + Copy,
G: Fn(Symbol, u32) -> T + Copy,
T: Clone + ::std::fmt::Debug,
{
pub fn new(eval_product: F, eval_leaf: G) -> Self {
Self {
eval_product,
eval_leaf,
}
}
pub fn evaluate(&mut self, forest: &mut Forest, finished_node: NodeHandle) -> T {
self.evaluate_rec(forest, finished_node)[0].clone()
}
fn evaluate_rec(&mut self, forest: &mut Forest, handle: NodeHandle) -> Vec<T> {
match forest.graph[handle.0] {
Node::Product {
left_factor,
right_factor,
action,
} => {
let mut result = self.evaluate_rec(forest, left_factor);
if let Some(factor) = right_factor {
result.extend(self.evaluate_rec(forest, factor));
}
if action != NULL_ACTION {
vec![(self.eval_product)(action as u32, &result)]
} else {
result
}
}
Node::Leaf { terminal, values } => {
vec![(self.eval_leaf)(terminal, values)]
}
}
}
}
// Forest
use std::iter::Product;
use super::*;
#[derive(Clone)]
pub struct Forest {
graph: Vec<Node>,
eval: Vec<Option<usize>>,
}
#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd)]
pub struct NodeHandle(usize);
#[derive(Clone)]
enum Node {
Product {
action: u32,
left_factor: NodeHandle,
right_factor: Option<NodeHandle>,
},
Leaf {
terminal: Symbol,
values: u32,
},
Evaluated {
index: usize,
}
}
const NULL_ACTION: u32 = !0;
impl Forest {
pub fn new<const S: usize>(grammar: &Grammar<S>) -> Self {
Self {
graph: vec![],
eval: grammar.rules.iter().map(|rule| rule.id).collect(),
}
}
pub fn leaf(&mut self, terminal: Symbol, _x: usize, values: u32) -> NodeHandle {
let handle = NodeHandle(self.graph.len());
self.graph.push(Node::Leaf { terminal, values });
handle
}
pub fn push_summand(&mut self, item: CompletedItem) -> NodeHandle {
let handle = NodeHandle(self.graph.len());
let eval = self.eval[item.dot].map(|id| id as u32);
self.graph.push(Node::Product {
action: eval.unwrap_or(NULL_ACTION),
left_factor: item.left_node,
right_factor: item.right_node,
});
handle
}
}
pub struct Evaluator<F, G> {
eval_product: F,
eval_leaf: G,
stack: Vec<NodeHandle>,
}
impl<T, F, G> Evaluator<F, G>
where
F: Fn(u32, &[T], &[usize]) -> T + Copy,
G: Fn(Symbol, u32) -> T + Copy,
T: Clone + ::std::fmt::Debug,
{
pub fn new(eval_product: F, eval_leaf: G) -> Self {
Self {
eval_product,
eval_leaf,
stack: vec![],
}
}
pub fn evaluate(&mut self, forest: &mut Forest, finished_node: NodeHandle) -> T {
let mut liveness = ::std::iter::repeat(false).take(forest.graph.len()).collect::<Vec<_>>();
let mut work = vec![finished_node];
let mut result = vec![];
let mut refs = vec![];
while let Some(node) = work.pop() {
if !liveness[node.0] {
match forest.graph[node.0] {
Node::Product { left_factor, right_factor, action } => {
liveness[node.0] = action != NULL_ACTION;
if let Some(right) = right_factor {
work.push(right);
}
work.push(left_factor);
}
Node::Leaf { .. } => {
liveness[node.0] = true;
}
Node::Evaluated { .. } => unreachable!()
}
}
}
for (i, alive) in (0 .. forest.graph.len()).zip(liveness.iter().cloned()) {
if alive {
self.evaluate_node(forest, NodeHandle(i), &mut result, &mut refs);
}
}
match forest.graph[finished_node.0] {
Node::Evaluated { index } => {
result[index].clone()
}
_ => unreachable!()
}
}
fn evaluate_node<'a>(&mut self, forest: &mut Forest, node: NodeHandle, result: &'a mut Vec<T>, refs: &'a mut Vec<usize>) {
#[derive(Eq, PartialEq)]
enum Action {
Leaf(Symbol, u32),
Product(u32),
}
self.stack.push(node);
let mut first_action = Action::Product(NULL_ACTION);
while let Some(factor) = self.stack.pop() {
match &forest.graph[factor.0] {
&Node::Product { action, left_factor, right_factor } => {
if action != NULL_ACTION {
first_action = Action::Product(action);
}
if let Some(rfactor) = right_factor {
self.stack.push(rfactor);
}
self.stack.push(left_factor);
}
&Node::Evaluated { index } => {
refs.push(index);
}
&Node::Leaf { terminal, values } => {
first_action = Action::Leaf(terminal, values)
}
}
}
let finished = match first_action {
Action::Leaf(terminal, values) => {
(self.eval_leaf)(terminal, values)
}
Action::Product(action) => {
(self.eval_product)(action, &result[..], &refs[..])
}
};
refs.clear();
self.stack.clear();
forest.graph[node.0] = Node::Evaluated { index: result.len() };
result.push(finished);
}
}
@pczarn
Copy link
Author

pczarn commented Jun 2, 2024

Will prototype ideas for earley parsers in this gist. Basically we can do conditional compilation to find improvements.

@pczarn
Copy link
Author

pczarn commented Jun 14, 2024

We now do lots of conditional compilation. This gist contains a full Cargo crate.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment