-
-
Save JamesOwenHall/2ec6e2c24f8b37009ab0 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std::{fmt, iter, str}; | |
#[allow(dead_code)] | |
enum TokenType { | |
Unknown, | |
OpenParen, | |
CloseParen, | |
OpenBracket, | |
CloseBracket, | |
Symbol, | |
Boolean, | |
Number, | |
StringToken, // Using the "Token" suffix because "String" is a keyword. | |
} | |
impl fmt::Show for TokenType { | |
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | |
match *self { | |
Unknown => write!(f, "Unknown"), | |
OpenParen => write!(f, "OpenParen"), | |
CloseParen => write!(f, "CloseParen"), | |
OpenBracket => write!(f, "OpenBracket"), | |
CloseBracket => write!(f, "CloseBracket"), | |
Symbol => write!(f, "Symbol"), | |
Boolean => write!(f, "Boolean"), | |
Number => write!(f, "Number"), | |
StringToken => write!(f, "String"), | |
} | |
} | |
} | |
pub struct Token { | |
pub typ: TokenType, | |
pub val: String, | |
} | |
pub struct Tokenizer<'a> { | |
it: iter::Peekable<char, str::Chars<'a>>, | |
current: String, | |
} | |
impl<'a> Tokenizer<'a> { | |
pub fn new(input: &'a str) -> Tokenizer<'a> { | |
let mut result = Tokenizer{it: input.chars().peekable(), current: "".into_string()}; | |
result.skip_white_space(); | |
result | |
} | |
// Returns true if there are more tokens. | |
pub fn has_next(&mut self) -> bool { | |
self.it.peek().is_some() | |
} | |
// Returns the next token. | |
pub fn next(&mut self) -> Token { | |
// Grab next char | |
let next = self.it.next().unwrap(); | |
self.current.push(next); | |
// Get the resulting token | |
let result = match next { | |
// Single char tokens | |
'(' => Token{typ: OpenParen, val: self.current.clone()}, | |
')' => Token{typ: CloseParen, val: self.current.clone()}, | |
'[' => Token{typ: OpenBracket, val: self.current.clone()}, | |
']' => Token{typ: CloseBracket, val: self.current.clone()}, | |
// Strings | |
'"' => self.tokenize_string(), | |
_ => Token{typ: Unknown, val: self.current.clone()}, | |
}; | |
// Reset current | |
self.current = "".into_string(); | |
self.skip_white_space(); | |
result | |
} | |
// Returns the token up to the first unescaped ". | |
fn tokenize_string(&mut self) -> Token { | |
loop { | |
let mut iterator = self.it; | |
match iterator.peek() { | |
None => break, | |
Some(x) if *x == '"' => { | |
self.current.push(self.it.next().unwrap()); | |
break; | |
}, | |
Some(x) if *x == '\\' => { | |
self.current.push(self.it.next().unwrap()); | |
self.current.push(self.it.next().unwrap()); | |
}, | |
Some(_) => self.current.push(self.it.next().unwrap()), | |
}; | |
}; | |
Token{typ: StringToken, val: self.current.clone()} | |
} | |
// Advances the iterator to the next char that is not white space. | |
fn skip_white_space(&mut self) { | |
loop { | |
let mut iterator = self.it; | |
match iterator.peek() { | |
None => break, | |
Some(x) if *x == ' ' || *x == '\t' => {self.it.next();}, | |
_ => break, | |
}; | |
}; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment