Skip to content

Instantly share code, notes, and snippets.

@zesterer
Last active July 15, 2021 23:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save zesterer/d2efc9663d59af1ebe3c73d0bd6c42b8 to your computer and use it in GitHub Desktop.
Save zesterer/d2efc9663d59af1ebe3c73d0bd6c42b8 to your computer and use it in GitHub Desktop.
Lexer for pythonic indentation-aware language that converts identation to token blocks *and* correctly handles parentheses
#[derive(Debug)]
enum Token {
Num(i64),
Paren(Vec<Token>),
Block(Vec<Token>),
}
fn lex(src: &str) -> Result<Vec<Token>, String> {
let mut ident_depth = 0;
let mut token_stack = vec![(ident_depth, Vec::new())];
enum State {
Num,
}
let mut state = State::Num;
let mut symbol = String::new();
let mut paren_depth = 0;
let mut chars = src.chars().peekable();
while let Some(c) = chars.next() {
match c {
c if c.is_ascii_digit() => match &state {
State::Num => symbol.push(c),
},
c => {
if symbol.len() > 0 {
match &state {
State::Num => if paren_depth == 0 {
token_stack.push((ident_depth, vec![Token::Num(symbol.parse().unwrap())]))
} else {
token_stack.last_mut().unwrap().1.push(Token::Num(symbol.parse().unwrap()))
},
}
symbol.clear();
}
match c {
'(' => { token_stack.push((ident_depth, Vec::new())); paren_depth += 1; },
')' => {
let paren = Token::Paren(token_stack.pop().unwrap().1);
token_stack.last_mut().unwrap().1.push(paren);
paren_depth -= 1;
},
'\n' if paren_depth == 0 => {
ident_depth = 0;
while let Some(_) = chars.peek().filter(|c| c.is_whitespace()) {
ident_depth += 1;
chars.next();
}
token_stack.push((ident_depth, Vec::new()));
},
c if c.is_whitespace() => {},
c => return Err(format!("Unknown character '{}'", c))
}
},
}
}
//dbg!(&token_stack);
let mut tokens = vec![(0, Vec::new())];
let mut current_depth = 0;
for (depth, toks) in token_stack {
if depth > current_depth {
tokens.push((depth, Vec::new()));
} else if depth < current_depth {
while tokens.last().unwrap().0 > depth {
let toks = tokens.pop().unwrap().1;
tokens.last_mut().unwrap().1.push(Token::Block(toks));
}
}
current_depth = depth;
for tok in toks {
tokens.last_mut().unwrap().1.push(tok);
}
}
//dbg!(&tokens);
while tokens.len() > 1 {
let toks = tokens.pop().unwrap().1;
tokens.last_mut().unwrap().1.push(Token::Block(toks));
}
//dbg!(&tokens);
Ok(tokens.pop().unwrap().1)
}
const SRC: &str = r#"
123 456
41 42
43
(44
45
46
)
47
48 49
"#;
fn main() {
dbg!(lex(SRC));
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment