Skip to content

Instantly share code, notes, and snippets.

@imjacobclark
Last active January 11, 2019 22:45
Show Gist options
  • Save imjacobclark/0f1d64303f736d6e407c120890c386c2 to your computer and use it in GitHub Desktop.
Save imjacobclark/0f1d64303f736d6e407c120890c386c2 to your computer and use it in GitHub Desktop.
A Lexer in Rust
#[derive(Debug, PartialEq)]
enum Token {
Symbol(Symbol),
Type(Type),
Keyword(Keyword),
Word(Word),
Char(char),
}
#[derive(Debug, PartialEq)]
enum Symbol {
OpenBrace,
CloseBrace,
OpenParenthesis,
CloseParenthesis,
Semicolon,
Identifier
}
#[derive(Debug, PartialEq)]
enum Type {
Int(Num),
}
#[derive(Debug, PartialEq)]
enum Keyword {
Return,
}
#[derive(Debug, PartialEq)]
struct Word {
value: String
}
#[derive(Debug, PartialEq)]
struct Num {
value: String
}
fn lex (input: String) -> Vec<Token> {
let mut characters = input.chars();
let mut tokens: Vec<Token> = Vec::new();
for _x in 0..input.len() {
let lookahead = characters.next();
if let Some(character) = lookahead {
if character == ' ' {
continue;
}
let token = match character {
'{' => Some(Token::Symbol(Symbol::OpenBrace)),
'}' => Some(Token::Symbol(Symbol::CloseBrace)),
'(' => Some(Token::Symbol(Symbol::OpenParenthesis)),
')' => Some(Token::Symbol(Symbol::CloseParenthesis)),
';' => Some(Token::Symbol(Symbol::Semicolon)),
'r' => Some(Token::Keyword(Keyword::Return)),
_ => None
};
if let Some(token) = token {
tokens.push(token);
continue;
}
if(character.is_digit(10)){
let mut number: Vec<char> = Vec::new();
let mut peek = characters.clone();
number.push(character);
loop {
if let Some(peeked) = peek.next() {
if(peeked.is_digit(10)){
number.push(peeked);
characters.next();
continue;
}else{
let num: String = number.into_iter().collect();
tokens.push(Token::Type(Type::Int(Num { value: num })));
break;
}
}
}
continue;
}
tokens.push(Token::Word(Word { value: character.to_string() }))
};
}
return tokens;
}
fn print_tokens(tokens: Vec<Token>){
for token in 0..tokens.len() {
println!("{:?}", tokens[token]);
}
}
fn main() {
print_tokens(lex("{ r (123) }".to_string()));
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn lex_returns_an_open_brace_symbol_when_given_an_opening_brace_char() {
let input = "{".to_string();
let tokens = lex(input);
assert_eq!(tokens[0], Token::Symbol(Symbol::OpenBrace));
}
#[test]
fn lex_returns_an_close_brace_symbol_when_given_an_closing_brace_char() {
let input = "}".to_string();
let tokens = lex(input);
assert_eq!(tokens[0], Token::Symbol(Symbol::CloseBrace));
}
#[test]
#[should_panic(expected = "index out of bounds: the len is 1 but the index is 1")]
fn lex_does_not_turn_whitespace_into_token() {
let input = "{ ".to_string();
let tokens = lex(input);
assert_eq!(tokens[0], Token::Symbol(Symbol::OpenBrace));
assert_eq!(tokens[1], Token::Symbol(Symbol::OpenBrace));
}
#[test]
fn lex_can_tokenise_multiple_symbols() {
let input = "{}".to_string();
let tokens = lex(input);
assert_eq!(tokens[0], Token::Symbol(Symbol::OpenBrace));
assert_eq!(tokens[1], Token::Symbol(Symbol::CloseBrace));
}
#[test]
fn lex_can_tokenise_keyword() {
let input = "r".to_string();
let tokens = lex(input);
assert_eq!(tokens[0], Token::Keyword(Keyword::Return));
}
#[test]
fn lex_can_tokenise_identifier_words() {
let input = "s".to_string();
let tokens = lex(input);
let word = Word { value: "s".to_string() };
assert_eq!(tokens[0], Token::Word(word));
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment