Skip to content

Instantly share code, notes, and snippets.

@iemelyanov
Last active March 4, 2023 19:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save iemelyanov/67a76fdc19e6bc2baf82968913fc3065 to your computer and use it in GitHub Desktop.
Save iemelyanov/67a76fdc19e6bc2baf82968913fc3065 to your computer and use it in GitHub Desktop.
use std::fmt::Display;
#[derive(Clone, Copy, PartialEq, Eq)]
enum Token {
Minus,
Plus,
Mul,
Div,
Num(i32),
Eof,
}
impl Display for Token {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Div => write!(f, "/"),
Self::Mul => write!(f, "*"),
Self::Minus => write!(f, "-"),
Self::Plus => write!(f, "+"),
Self::Num(n) => write!(f, "{}", n),
_ => unreachable!(),
}
}
}
struct Lexer<'input> {
input: &'input [u8],
start: usize,
curent: usize,
}
impl<'input> Lexer<'input> {
fn new(input: &'input str) -> Self {
Self {
input: input.as_bytes(),
start: 0,
curent: 0,
}
}
fn is_at_end(&self) -> bool {
self.curent >= self.input.len()
}
fn peek(&self) -> u8 {
if self.is_at_end() {
b'\0'
} else {
self.input[self.curent]
}
}
fn advance(&mut self) -> u8 {
if self.is_at_end() {
return b'\0';
}
let c = self.input[self.curent];
self.curent += 1;
c
}
fn next(&mut self) -> Token {
self.start = self.curent;
let mut c = self.advance();
while c.is_ascii_whitespace() {
c = self.advance();
self.start = self.curent - 1;
}
match c {
b'+' => Token::Plus,
b'-' => Token::Minus,
b'*' => Token::Mul,
b'/' => Token::Div,
b'0'..=b'9' => {
while self.peek().is_ascii_digit() {
self.advance();
}
let n = String::from_utf8_lossy(&self.input[self.start..self.curent])
.parse::<i32>()
.unwrap();
Token::Num(n)
}
_ => Token::Eof,
}
}
}
enum Expr {
Atom(Token),
Prefix {
op: Token,
rhs: Option<Box<Expr>>,
},
Infix {
op: Token,
lhs: Option<Box<Expr>>,
rhs: Option<Box<Expr>>,
},
}
#[derive(PartialEq, PartialOrd)]
enum P {
Lowest,
MinusPlus,
DivMul,
Prefix,
}
impl Display for Expr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Atom(v) => {
write!(f, "{}", v)
}
Self::Prefix { op, rhs } => {
write!(f, "({}{})", op, rhs.as_ref().unwrap())
}
Self::Infix { op, lhs, rhs } => {
write!(
f,
"({} {} {})",
lhs.as_ref().unwrap(),
op,
rhs.as_ref().unwrap()
)
}
}
}
}
struct Parser<'input> {
lexer: Lexer<'input>,
cur_tok: Token,
peek_tok: Token,
trace_tab_cnt: usize,
}
impl<'input> Parser<'input> {
fn new(input: &'input str) -> Self {
let mut s = Self {
lexer: Lexer::new(input),
cur_tok: Token::Eof,
peek_tok: Token::Eof,
trace_tab_cnt: 0,
};
s.next_token();
s.next_token();
s
}
fn next_token(&mut self) {
self.cur_tok = self.peek_tok;
self.peek_tok = self.lexer.next();
}
fn trace_begin(&mut self, name: &str) {
println!("{:width$}BEGIN {}", "", name, width = self.trace_tab_cnt);
self.trace_tab_cnt += 4;
}
fn trace_end(&mut self, name: &str) {
self.trace_tab_cnt -= 4;
println!("{:width$}END {}", "", name, width = self.trace_tab_cnt);
}
fn parse_expr(&mut self, p: P) -> Option<Box<Expr>> {
self.trace_begin("parse_expr");
let mut lhs = match self.cur_tok {
Token::Num(_) => self.parse_atom(),
Token::Minus | Token::Plus => self.parse_prefix(),
_ => None,
};
while self.peek_tok != Token::Eof && p < Self::bp(self.peek_tok) {
self.next_token();
lhs = self.parse_infix(lhs);
}
self.trace_end("parse_expr");
lhs
}
fn parse_prefix(&mut self) -> Option<Box<Expr>> {
self.trace_begin("parse_prefix");
let op = self.cur_tok;
self.next_token();
let expr = Some(Box::new(Expr::Prefix {
op,
rhs: self.parse_expr(P::Prefix),
}));
self.trace_end("parse_prefix");
expr
}
fn parse_infix(&mut self, lhs: Option<Box<Expr>>) -> Option<Box<Expr>> {
self.trace_begin("parse_infix");
let op = self.cur_tok;
self.next_token();
let rhs = self.parse_expr(Self::bp(op));
let expr = Some(Box::new(Expr::Infix {
op,
lhs: lhs,
rhs: rhs,
}));
self.trace_end("parse_infix");
expr
}
fn parse_atom(&mut self) -> Option<Box<Expr>> {
self.trace_begin("parse_atom");
let r = Some(Box::new(Expr::Atom(self.cur_tok)));
self.trace_end("parse_atom");
r
}
fn bp(t: Token) -> P {
match t {
Token::Plus | Token::Minus => P::MinusPlus,
Token::Mul | Token::Div => P::DivMul,
_ => P::Lowest,
}
}
}
fn eval(expr: &Option<Box<Expr>>) -> i32 {
match expr {
Some(ref expr) => match expr.as_ref() {
Expr::Atom(n) => match n {
Token::Num(ref n) => *n,
_ => unreachable!(),
},
Expr::Infix { op, lhs, rhs } => match op {
Token::Minus => eval(&lhs) - eval(&rhs),
Token::Plus => eval(&lhs) + eval(&rhs),
Token::Div => eval(&lhs) / eval(&rhs),
Token::Mul => eval(&lhs) * eval(&rhs),
_ => unreachable!(),
},
Expr::Prefix { op, rhs } => match op {
Token::Minus => -eval(&rhs),
_ => eval(&rhs),
},
},
None => 0,
}
}
fn main() {
let input = "-1 + 2 * 3";
let mut parser = Parser::new(&input);
let expr = parser.parse_expr(P::Lowest);
print!("{} -> ", input);
expr.as_ref().map(|ast| {
print!("{}", ast);
});
println!(" = {}", eval(&expr));
}
package main
import "fmt"
type tokenKind int
const (
tokenAtom tokenKind = iota
tokenOp
tokenEof
)
type token struct {
literal byte
kind tokenKind
}
type lexer struct {
tokens []token
pos int
}
func lexerNew(input []byte) *lexer {
l := &lexer{}
for _, b := range input {
switch b {
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
l.tokens = append(l.tokens, token{
literal: b,
kind: tokenAtom,
})
case ' ':
continue
default:
l.tokens = append(l.tokens, token{
literal: b,
kind: tokenOp,
})
}
}
return l
}
func (l *lexer) next() token {
if l.pos >= len(l.tokens) {
return token{kind: tokenEof}
}
t := l.tokens[l.pos]
l.pos++
return t
}
func (l *lexer) peek() token {
if l.pos >= len(l.tokens) {
return token{kind: tokenEof}
}
return l.tokens[l.pos]
}
type sexpr interface {
String() string
}
type atom struct {
b byte
}
type cons struct {
b byte
sexprs []sexpr
}
func (a *atom) String() string {
return string(a.b)
}
func (c *cons) String() string {
t := fmt.Sprintf("(%c", c.b)
for _, e := range c.sexprs {
t += " " + e.String()
}
t += ")"
return t
}
func expr(input []byte) sexpr {
l := lexerNew(input)
return exprBp(l, 0)
}
func exprBp(l *lexer, bp int) sexpr {
var lhs sexpr
switch t := l.next(); t.kind {
case tokenAtom:
lhs = &atom{b: t.literal}
default:
panic(fmt.Sprintf("unexpected token '%c'", t.literal))
}
loop:
for {
var op byte
switch t := l.peek(); t.kind {
case tokenOp:
op = t.literal
case tokenEof:
break loop
default:
panic(fmt.Sprintf("unexpected token '%c'", t.literal))
}
var lbp, rbp int
switch op {
case '+', '-':
lbp, rbp = 1, 2
case '*', '/':
lbp, rbp = 3, 4
default:
panic(fmt.Sprintf("bad op '%c'", op))
}
if lbp < bp {
break
}
l.next()
rhs := exprBp(l, rbp)
lhs = &cons{
b: op,
sexprs: []sexpr{lhs, rhs},
}
}
return lhs
}
func main() {
fmt.Println(expr([]byte("1 + 2 * 3 / 4 - 5 + 6")))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment