Skip to content

Instantly share code, notes, and snippets.

@benmkw
Created April 9, 2020 16:20
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save benmkw/c91fec41674ac66c51c0aac5ec276344 to your computer and use it in GitHub Desktop.
Save benmkw/c91fec41674ac66c51c0aac5ec276344 to your computer and use it in GitHub Desktop.
pratt parser (and simple interpreter), handels both associativity and precedence
// ported from https://www.craftinginterpreters.com/compiling-expressions.html#a-pratt-parser
fn main() {
let input = "10*2+3";
assert_eq!(run(input), 10 * 2 + 3);
let input = "293474*4^5-50";
assert_eq!(run(input), 300_517_376 - 50);
let input = "-50+3*2^3";
assert_eq!(run(input), -50+3*8);
}
#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq)]
pub enum Token {
Number(i64),
Plus,
Minus,
Star,
Slash,
UpArrow,
LParen,
RParen,
}
pub fn tokenize(s: &str) -> Vec<Token> {
use Token::*;
let mut tokens = vec![];
// could skip utf-8 validation and work on bytes but not clear if its really worth it
// benchmarks show different results, change is very easy though
let mut iter = s.chars().peekable();
while let Some(c) = iter.next() {
match c {
'+' => tokens.push(Plus),
'-' => tokens.push(Minus),
'*' => tokens.push(Star),
'/' => tokens.push(Slash),
'^' => tokens.push(UpArrow),
'(' => tokens.push(LParen),
')' => tokens.push(RParen),
' ' | '\n' | '\r' => {}
digit_c => {
debug_assert!(digit_c.is_digit(10));
let mut num_acc = digit_c.to_digit(10).unwrap() as i64;
while let Some(curr_number) = iter.peek().and_then(|&c| c.to_digit(10)) {
num_acc *= 10;
num_acc += curr_number as i64;
iter.next();
}
tokens.push(Number(num_acc));
}
}
}
tokens
}
#[derive(Clone, Debug, Hash, PartialEq, Eq)]
pub enum Operation {
Number(i64),
Add,
Sub,
Mult,
Div,
Exp,
Negate,
}
#[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd)]
pub enum Precedence {
NONE,
TERM,
FACTOR,
EXP,
NEG,
CALL,
MAX,
}
impl Precedence {
fn next_precedence(&self) -> Precedence {
match self {
Precedence::NONE => Precedence::TERM,
Precedence::TERM => Precedence::FACTOR,
Precedence::FACTOR => Precedence::EXP,
Precedence::EXP => Precedence::NEG,
Precedence::NEG => Precedence::CALL,
Precedence::CALL => Precedence::MAX,
Precedence::MAX => Precedence::MAX,
}
}
}
type ParseFn<'a> = dyn Fn(&'a [Token], &mut Vec<Operation>) -> &'a [Token];
type OptParseFn<'a> = Option<&'a ParseFn<'a>>;
struct ParseRule<'a> {
prefix: OptParseFn<'a>,
infix: OptParseFn<'a>,
precedence: Precedence,
}
#[must_use]
fn number<'a>(tokens: &'a [Token], ops: &mut Vec<Operation>) -> &'a [Token] {
match tokens[0] {
Token::Number(n) => ops.push(Operation::Number(n)),
x => panic!("found Token {:?} in call to number<'a>()... ", x),
}
&tokens[1..]
}
#[must_use]
fn grouping<'a>(mut tokens: &'a [Token], mut ops: &mut Vec<Operation>) -> &'a [Token] {
debug_assert_eq!(tokens[0], Token::LParen);
tokens = parse_precedence(&tokens[1..], &mut ops, Precedence::TERM);
debug_assert_eq!(tokens[0], Token::RParen);
&tokens[1..]
}
#[must_use]
fn negate<'a>(tokens: &'a [Token], mut ops: &mut Vec<Operation>) -> &'a [Token] {
debug_assert_eq!(tokens[0], Token::Minus);
let tokens = parse_precedence(&tokens[1..], &mut ops, Precedence::NEG);
ops.push(Operation::Negate);
tokens
}
#[must_use]
fn binary<'a>(mut tokens: &'a [Token], mut ops: &mut Vec<Operation>) -> &'a [Token] {
let curr_token = tokens[0];
let rule = token_to_rule(curr_token);
let precedence = rule.precedence;
let next_precedence = match curr_token {
Token::Number(_) | Token::Plus | Token::Minus | Token::Star | Token::Slash => {
precedence.next_precedence() // left associative
}
Token::UpArrow => precedence, // right associative
_ => unreachable!(),
};
tokens = parse_precedence(&tokens[1..], &mut ops, next_precedence);
match curr_token {
Token::Number(x) => ops.push(Operation::Number(x)),
Token::Plus => ops.push(Operation::Add),
Token::Minus => ops.push(Operation::Sub),
Token::Star => ops.push(Operation::Mult),
Token::Slash => ops.push(Operation::Div),
Token::UpArrow => ops.push(Operation::Exp),
Token::LParen | Token::RParen => unreachable!(),
}
tokens
}
macro_rules! rule {
($prefix : expr, $infix : expr ; $precedence : expr) => {{
ParseRule {
prefix: match $prefix {
Some(prefix) => Some(prefix),
None => None,
},
infix: match $infix {
Some(infix) => Some(infix),
None => None,
},
precedence: $precedence,
}
}};
}
#[must_use]
fn token_to_rule<'a>(token: Token) -> ParseRule<'a> {
use Token::*;
match token {
Number(_) => rule!(Some(&number), OptParseFn::None ; Precedence::NONE),
Plus => rule!(OptParseFn::None, Some(&binary) ; Precedence::TERM),
Minus => rule!(Some(&negate), Some(&binary) ; Precedence::TERM),
Star => rule!(OptParseFn::None, Some(&binary) ; Precedence::FACTOR),
Slash => rule!(OptParseFn::None, Some(&binary) ; Precedence::FACTOR),
UpArrow => rule!(OptParseFn::None, Some(&binary) ; Precedence::EXP),
LParen => rule!(Some(&grouping), OptParseFn::None ; Precedence::CALL),
RParen => rule!(OptParseFn::None, OptParseFn::None ; Precedence::NONE),
}
}
#[must_use]
pub fn parse_precedence<'a>(
mut tokens: &'a [Token],
mut ops: &mut Vec<Operation>,
precedence: Precedence,
) -> &'a [Token] {
// PREFIX RULE
let token = tokens[0];
let rule = token_to_rule(token);
if let Some(prefix_rule) = &rule.prefix {
tokens = prefix_rule(&tokens, &mut ops);
} else {
panic!(
"no prefix rule found for token: {:?} (precedence: {:?})",
&rule.precedence, &token
);
}
// INFIX RULES
while !tokens.is_empty() {
let token = tokens[0];
if precedence > token_to_rule(token).precedence {
// dong the check here instead of during the while condition has a huge performance impact
// in my current benchmarks
break;
}
if let Some(infix_rule) = token_to_rule(token).infix {
tokens = infix_rule(tokens, &mut ops);
} else {
panic!("no infix rule found for token: {:?}", token);
}
}
tokens
}
pub fn run(input: &str) -> i64 {
let tokens = tokenize(input);
let mut ops = vec![];
let tokens = parse_precedence(&tokens, &mut ops, Precedence::NONE);
debug_assert!(tokens.is_empty());
interpret(&ops)
}
macro_rules! binary_op {
($stack : expr => $operation : expr) => {{
let first = $stack.pop().unwrap();
let second = $stack.pop().unwrap();
match (first, second) {
(Number(a), Number(b)) => {
$stack.push(Number($operation(b, a)));
}
_ => unreachable!(),
}
}};
}
#[must_use]
pub fn interpret(ops: &[Operation]) -> i64 {
debug_assert!(!ops.is_empty());
let mut stack = vec![];
use Operation::*;
for op in ops {
match op {
Number(x) => stack.push(Number(*x)),
Negate => {
if let Some(Operation::Number(i)) = stack.pop() {
stack.push(Operation::Number(-i));
} else {
unreachable!()
}
}
Add => binary_op!(stack => {|a, b| a + b}),
Sub => binary_op!(stack => {|a, b| a - b}),
Mult => binary_op!(stack => { |a, b| a * b } ),
Div => binary_op!(stack => { |a, b| a / b } ),
Exp => binary_op!(stack =>{ |a :i64, b : i64| i64::pow(a,b as u32)} ),
}
}
if let Some(Number(x)) = stack.pop() {
debug_assert!(stack.is_empty());
x
} else {
panic!("could not interpret");
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment