Last active
August 29, 2015 14:06
-
-
Save pshc/c44ca26f7038b772eb3c to your computer and use it in GitHub Desktop.
Toy tokenizer
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std::io; | |
use std::io::IoResult; | |
use std::owned::BoxAny; | |
use std::task; | |
#[deriving(Show)] | |
enum Token { | |
Equals, | |
Lit(int), | |
Let, | |
Name(String), | |
Plus, | |
Semi, | |
} | |
type Input = io::BufferedReader<io::File>; | |
struct Tokenizer<'a> { | |
input: &'a mut Input, | |
saved: Option<char>, | |
} | |
impl<'a> Tokenizer<'a> { | |
fn new(input: &mut Input) -> Tokenizer { | |
Tokenizer {input: input, saved: None} | |
} | |
} | |
impl<'a> Iterator<Token> for Tokenizer<'a> { | |
fn next(&mut self) -> Option<Token> { | |
let mut ident: Vec<char> = Vec::new(); | |
let mut num: Vec<char> = Vec::new(); | |
// read chars until we get a token | |
loop { | |
let c = match self.saved.take() { | |
Some(c) => c, | |
None => match self.input.read_char() { | |
Ok(c) => c, | |
Err(io::IoError {kind: io::EndOfFile, ..}) => { | |
if !ident.is_empty() { | |
let name = String::from_chars(ident.as_slice()); | |
fail!("Trailing '{}' at EOF", name); | |
} | |
if !num.is_empty() { | |
let desc = String::from_chars(num.as_slice()); | |
fail!("Trailing '{}' at EOF", desc); | |
} | |
return None; | |
} | |
Err(e) => fail!("While reading tokens: {}", e) | |
} | |
}; | |
if !ident.is_empty() { | |
// look for name continuation | |
match c { | |
'a'..'z' | 'A'..'Z' | '0'..'9' | '_' => { ident.push(c); continue; }, | |
_ => { self.saved = Some(c); } | |
} | |
let name = String::from_chars(ident.as_slice()); | |
ident.clear(); | |
return Some(match name.as_slice() { | |
"let" => Let, | |
_ => Name(name) | |
}); | |
} | |
if !num.is_empty() { | |
// look for number continuation | |
if std::char::is_digit(c) { | |
num.push(c); | |
continue; | |
} | |
self.saved = Some(c); | |
let s = String::from_chars(num.as_slice()); | |
num.clear(); | |
return Some(Lit(from_str(s.as_slice()).expect("couldn't parse number"))); | |
} | |
match c { | |
'a'..'z' | 'A'..'Z' | '_' => ident.push(c), | |
'0'..'9' => num.push(c), | |
'=' => return Some(Equals), | |
'+' => return Some(Plus), | |
';' => return Some(Semi), | |
' ' | '\t' | '\n' | '\r' => {}, | |
_ => fail!("Unexpected {}", c) | |
} | |
} | |
} | |
} | |
fn process(filename: &Path) -> IoResult<()> { | |
let file = try!(io::File::open(filename)); | |
let mut reader = io::BufferedReader::new(file); | |
for token in Tokenizer::new(&mut reader) { | |
println!("{}", token); | |
} | |
Ok(()) | |
} | |
fn main() { | |
let error = match std::os::args().as_slice() { | |
[_, ref filename] => { | |
let path = Path::new(filename.as_bytes()); | |
let result = task::try(proc() process(&path)); | |
match result { | |
Ok(Ok(())) => { return; } | |
Ok(Err(e)) => format!("{}: {}", filename, e), | |
Err(e) => { | |
match e.downcast::<&str>() { | |
Ok(e) => format!("{}: {}", filename, e), | |
Err(_) => format!("{}: unknown task failure!", filename), | |
} | |
} | |
} | |
} | |
[ref me, ..] => format!("Usage: {} <input file>", me), | |
_ => format!("No args?!"), | |
}; | |
drop(io::stderr().write_line(error.as_slice())); | |
std::os::set_exit_status(1); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment