Skip to content

Instantly share code, notes, and snippets.

@JamesOwenHall
Created November 14, 2014 12:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save JamesOwenHall/2ec6e2c24f8b37009ab0 to your computer and use it in GitHub Desktop.
Save JamesOwenHall/2ec6e2c24f8b37009ab0 to your computer and use it in GitHub Desktop.
use std::{fmt, iter, str};
#[allow(dead_code)]
enum TokenType {
Unknown,
OpenParen,
CloseParen,
OpenBracket,
CloseBracket,
Symbol,
Boolean,
Number,
StringToken, // Using the "Token" suffix because "String" is a keyword.
}
impl fmt::Show for TokenType {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
Unknown => write!(f, "Unknown"),
OpenParen => write!(f, "OpenParen"),
CloseParen => write!(f, "CloseParen"),
OpenBracket => write!(f, "OpenBracket"),
CloseBracket => write!(f, "CloseBracket"),
Symbol => write!(f, "Symbol"),
Boolean => write!(f, "Boolean"),
Number => write!(f, "Number"),
StringToken => write!(f, "String"),
}
}
}
pub struct Token {
pub typ: TokenType,
pub val: String,
}
pub struct Tokenizer<'a> {
it: iter::Peekable<char, str::Chars<'a>>,
current: String,
}
impl<'a> Tokenizer<'a> {
pub fn new(input: &'a str) -> Tokenizer<'a> {
let mut result = Tokenizer{it: input.chars().peekable(), current: "".into_string()};
result.skip_white_space();
result
}
// Returns true if there are more tokens.
pub fn has_next(&mut self) -> bool {
self.it.peek().is_some()
}
// Returns the next token.
pub fn next(&mut self) -> Token {
// Grab next char
let next = self.it.next().unwrap();
self.current.push(next);
// Get the resulting token
let result = match next {
// Single char tokens
'(' => Token{typ: OpenParen, val: self.current.clone()},
')' => Token{typ: CloseParen, val: self.current.clone()},
'[' => Token{typ: OpenBracket, val: self.current.clone()},
']' => Token{typ: CloseBracket, val: self.current.clone()},
// Strings
'"' => self.tokenize_string(),
_ => Token{typ: Unknown, val: self.current.clone()},
};
// Reset current
self.current = "".into_string();
self.skip_white_space();
result
}
// Returns the token up to the first unescaped ".
fn tokenize_string(&mut self) -> Token {
loop {
let mut iterator = self.it;
match iterator.peek() {
None => break,
Some(x) if *x == '"' => {
self.current.push(self.it.next().unwrap());
break;
},
Some(x) if *x == '\\' => {
self.current.push(self.it.next().unwrap());
self.current.push(self.it.next().unwrap());
},
Some(_) => self.current.push(self.it.next().unwrap()),
};
};
Token{typ: StringToken, val: self.current.clone()}
}
// Advances the iterator to the next char that is not white space.
fn skip_white_space(&mut self) {
loop {
let mut iterator = self.it;
match iterator.peek() {
None => break,
Some(x) if *x == ' ' || *x == '\t' => {self.it.next();},
_ => break,
};
};
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment