Skip to content

Instantly share code, notes, and snippets.

@jmsdnns
Created November 14, 2024 05:45
Show Gist options
  • Save jmsdnns/2bb7b43bfe0e44a076457a1123ca5e00 to your computer and use it in GitHub Desktop.
Save jmsdnns/2bb7b43bfe0e44a076457a1123ca5e00 to your computer and use it in GitHub Desktop.
A simple example of an EBNF parser written in Rust, without support from external crates.
// <expression ::= <term> { ("+" | "-") <term> }
// <term> ::= <factor> { ("*" | "/") <factor> }
// <factor> ::= <number> | "(" <expression> ")"
// <number> ::= <digit> { <digit> }
// <digit> ::= "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
use std::str::FromStr;
#[derive(Debug, PartialEq, Clone)]
pub enum Token {
Number(i64),
Plus,
Minus,
Star,
Slash,
LParen,
RParen,
EOF,
}
pub struct Lexer<'a> {
input: &'a str,
pos: usize,
}
impl<'a> Lexer<'a> {
pub fn new(input: &'a str) -> Self {
Lexer { input, pos: 0 }
}
pub fn next_token(&mut self) -> Token {
self.skip_whitespace();
if self.pos >= self.input.len() {
return Token::EOF;
}
let c = self.input[self.pos..].chars().nth(0).unwrap();
match c {
'0'..='9' => self.number(),
'+' => {
self.pos += 1;
Token::Plus
}
'-' => {
self.pos += 1;
Token::Minus
}
'*' => {
self.pos += 1;
Token::Star
}
'/' => {
self.pos += 1;
Token::Slash
}
'(' => {
self.pos += 1;
Token::LParen
}
')' => {
self.pos += 1;
Token::RParen
}
_ => panic!("Whaaat is this? {}", c),
}
}
pub fn number(&mut self) -> Token {
let start = self.pos;
while self.pos < self.input.len()
&& self.input[self.pos..]
.chars()
.nth(0)
.unwrap()
.is_ascii_digit()
{
self.pos += 1;
}
let num_str = &self.input[start..self.pos]; // consumes multiple digits
let number = i64::from_str(num_str).unwrap();
Token::Number(number)
}
pub fn skip_whitespace(&mut self) {
while self.pos < self.input.len()
&& self.input[self.pos..]
.chars()
.nth(0)
.unwrap()
.is_whitespace()
{
self.pos += 1;
}
}
}
#[derive(Debug, PartialEq, Clone)]
pub enum Expr {
Number(i64),
BinaryOp(Box<Expr>, Operator, Box<Expr>),
}
#[derive(Debug, PartialEq, Clone)]
pub enum Operator {
Plus,
Minus,
Star,
Slash,
}
pub struct Parser<'a> {
lexer: Lexer<'a>,
current: Token,
}
impl<'a> Parser<'a> {
pub fn new(input: &'a str) -> Self {
let mut lexer = Lexer::new(input);
let current = lexer.next_token();
Parser { lexer, current }
}
fn eat(&mut self, token: Token) {
if self.current == token {
self.current = self.lexer.next_token();
} else {
panic!("Huh? Got {:?} but expected {:?}", token, self.current);
}
}
pub fn parse(&mut self) -> Expr {
self.expression()
}
fn expression(&mut self) -> Expr {
let mut left = self.term();
while self.current == Token::Plus || self.current == Token::Minus {
let op = match self.current {
Token::Plus => Operator::Plus,
Token::Minus => Operator::Minus,
_ => unreachable!(),
};
self.eat(self.current.clone());
let right = self.term();
left = Expr::BinaryOp(Box::new(left), op, Box::new(right));
}
left
}
fn term(&mut self) -> Expr {
let mut left = self.factor();
while self.current == Token::Star || self.current == Token::Slash {
let op = match self.current {
Token::Star => Operator::Star,
Token::Slash => Operator::Slash,
_ => unreachable!(),
};
self.eat(self.current.clone());
let right = self.factor();
left = Expr::BinaryOp(Box::new(left), op, Box::new(right));
}
left
}
fn factor(&mut self) -> Expr {
match self.current {
Token::Number(n) => {
self.eat(Token::Number(n));
Expr::Number(n)
}
Token::LParen => {
self.eat(Token::LParen);
let expr = self.expression();
self.eat(Token::RParen);
expr
}
_ => panic!("Unexpected token in factor {:?}", self.current),
}
}
}
fn main() {
let expr = "3 + 5 * (2 - 8)";
let mut parser = Parser::new(expr);
let ast = parser.parse();
println!("{:?}", ast);
}
#[cfg(test)]
mod lexer {
use super::*;
#[test]
fn test_lexer() {
let expr = "3 + 5 * (2 - 8)";
let mut l = Lexer::new(expr);
assert_eq!(Token::Number(3), l.next_token());
assert_eq!(Token::Plus, l.next_token());
assert_eq!(Token::Number(5), l.next_token());
assert_eq!(Token::Star, l.next_token());
assert_eq!(Token::LParen, l.next_token());
assert_eq!(Token::Number(2), l.next_token());
assert_eq!(Token::Minus, l.next_token());
assert_eq!(Token::Number(8), l.next_token());
assert_eq!(Token::RParen, l.next_token());
}
#[test]
fn test_number() {
let expr = "33";
let mut l = Lexer::new(expr);
assert_eq!(Token::Number(33), l.next_token());
}
}
#[cfg(test)]
mod parser {
use super::*;
#[test]
fn test_parser() {
use Expr::*;
use Operator::*;
let expr = "3 + 5 * (2 - 8)";
let mut p = Parser::new(expr);
let ast = p.parse();
let expected = BinaryOp(
Box::new(Number(3)),
Plus,
Box::new(BinaryOp(
Box::new(Number(5)),
Star,
Box::new(BinaryOp(Box::new(Number(2)), Minus, Box::new(Number(8)))),
)),
);
assert_eq!(expected, ast);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment