Created
March 28, 2018 22:59
-
-
Save learnopengles/38eb81df8ba57055c42d37bda937b0c4 to your computer and use it in GitHub Desktop.
Jack Analyzer for nand2tetris project 10 -- written in Rust.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Jack Analyzer for nand2tetris project 10 -- written in Rust. | |
// License: CC BY-SA 4.0 -- https://creativecommons.org/licenses/by-sa/4.0/ | |
use std::env; | |
use std::fs::File; | |
use std::io; | |
use std::io::{BufRead, BufReader, BufWriter, Read, Write}; | |
use std::iter::Peekable; | |
use std::path::Path; | |
use std::str::Chars; | |
// ------------------------------------------------ | |
// Tokenizer | |
// ------------------------------------------------ | |
#[derive(Clone, Debug, PartialEq)] | |
enum Token { | |
Keyword(Keyword), | |
Symbol(char), | |
Identifier(String), | |
IntegerConstant(i16), | |
StringConstant(String), | |
} | |
#[derive(Clone, Debug, PartialEq)] | |
enum Keyword { | |
Class, | |
Constructor, | |
Function, | |
Method, | |
Field, | |
Static, | |
Var, | |
Int, | |
Char, | |
Boolean, | |
Void, | |
True, | |
False, | |
Null, | |
This, | |
Let, | |
Do, | |
If, | |
Else, | |
While, | |
Return, | |
} | |
struct JackTokenizer<'a, R: Read> { | |
reader: BufReader<R>, | |
line_buffer: String, | |
current_line_chars: Option<Peekable<Chars<'a>>>, | |
line_count: u32, | |
is_inside_multiline_comment: bool, | |
} | |
impl<'a> JackTokenizer<'a, File> { | |
fn new(in_name: &str) -> io::Result<Self> { | |
let in_file = try!(File::open(in_name)); | |
let reader = BufReader::new(in_file); | |
Ok(JackTokenizer {reader: reader, line_buffer: String::with_capacity(256), current_line_chars: None, line_count: 0, is_inside_multiline_comment: false}) | |
} | |
} | |
impl<'a, R: Read> Iterator for JackTokenizer<'a, R> { | |
type Item = Token; | |
fn next(&mut self) -> Option<Token> { | |
fn munch(initial_char: char, iterator: &mut Peekable<Chars>, predicate: fn(char) -> bool) -> String { | |
let mut string = String::new(); | |
string.push(initial_char); | |
loop { | |
// Dereference the char to avoid double borrow at the "let _ = iterator.next();" line below. | |
let peek = iterator.peek().map(|c| *c); | |
if let Some(peeked) = peek { | |
if predicate(peeked) { | |
string.push(peeked); | |
// Consume | |
let _ = iterator.next(); | |
continue; | |
} | |
} | |
break; | |
} | |
return string; | |
} | |
loop { | |
if let Some(ref mut current_line_chars) = self.current_line_chars { | |
while let Some(next_char) = current_line_chars.next() { | |
if self.is_inside_multiline_comment { | |
// We need a "*/" pattern to exit a multiline comment. | |
if next_char == '*' { | |
let peek = current_line_chars.peek().map(|c| *c); | |
if let Some(peeked) = peek { | |
if peeked == '/' { | |
// Consume | |
let _ = current_line_chars.next(); | |
self.is_inside_multiline_comment = false; | |
} | |
} | |
} | |
continue; | |
} | |
match next_char { | |
'/' => { | |
// If the next symbol is a "*", this is the start of a multiline comment. Otherwise, process as a | |
// regular symbol. | |
let peek = current_line_chars.peek().map(|c| *c); | |
if let Some(peeked) = peek { | |
if peeked == '*' { | |
// Consume | |
let _ = current_line_chars.next(); | |
self.is_inside_multiline_comment = true; | |
continue; | |
} | |
} | |
// It's a symbol | |
return Some(Token::Symbol(next_char)) | |
} | |
'{'|'}'|'('|')'|'['|']'|'.'|','|';'|'+'|'-'|'*'|'&'|'|'|'<'|'>'|'='|'~' => { | |
return Some(Token::Symbol(next_char)) | |
}, | |
'"' => { | |
let mut constant = String::new(); | |
while let Some(next_char) = current_line_chars.next() { | |
match next_char { | |
'"' => { return Some(Token::StringConstant(constant)); }, | |
_ => { constant.push(next_char); } | |
} | |
} | |
// Never found the closing quote | |
panic!("Never found closing quote for string: {} on line: {}", constant, self.line_count); | |
}, | |
_ => { | |
if next_char.is_whitespace() { | |
// Ignore whitespace | |
continue; | |
} else if next_char.is_digit(10) { | |
// Collect the number | |
// Note: Would be more efficient to slice the string here rather than accumulating in an extra string. | |
// We check if is alphanumeric, since we want to get a panic if letters follow the number. | |
let digit_string = munch(next_char, current_line_chars, |c| c.is_alphanumeric()); | |
let number = digit_string.parse::<i16>().expect(&format!("Not a number: {} on line: {}", digit_string, self.line_count)); | |
return Some(Token::IntegerConstant(number)); | |
} else if next_char.is_alphanumeric() || next_char == '_' { | |
let string = munch(next_char, current_line_chars, |c| c.is_alphanumeric() || c == '_'); | |
return Some(match string.as_ref() { | |
"class" => { Token::Keyword(Keyword::Class) }, | |
"constructor" => { Token::Keyword(Keyword::Constructor) }, | |
"function" => { Token::Keyword(Keyword::Function) }, | |
"method" => { Token::Keyword(Keyword::Method) }, | |
"field" => { Token::Keyword(Keyword::Field) }, | |
"static" => { Token::Keyword(Keyword::Static) }, | |
"var" => { Token::Keyword(Keyword::Var) }, | |
"int" => { Token::Keyword(Keyword::Int) }, | |
"char" => { Token::Keyword(Keyword::Char) }, | |
"boolean" => { Token::Keyword(Keyword::Boolean) }, | |
"void" => { Token::Keyword(Keyword::Void) }, | |
"true" => { Token::Keyword(Keyword::True) }, | |
"false" => { Token::Keyword(Keyword::False) }, | |
"null" => { Token::Keyword(Keyword::Null) }, | |
"this" => { Token::Keyword(Keyword::This) }, | |
"let" => { Token::Keyword(Keyword::Let) }, | |
"do" => { Token::Keyword(Keyword::Do) }, | |
"if" => { Token::Keyword(Keyword::If) }, | |
"else" => { Token::Keyword(Keyword::Else) }, | |
"while" => { Token::Keyword(Keyword::While) }, | |
"return" => { Token::Keyword(Keyword::Return) }, | |
_ => { Token::Identifier(string) }, | |
}); | |
} else { | |
panic!("Invalid char: {} at line: {}", next_char, self.line_count); | |
} | |
}, | |
} | |
} | |
} | |
// We have no more chars -- process the next line | |
self.line_buffer.clear(); | |
match self.reader.read_line(&mut self.line_buffer) { | |
Ok(len) => { | |
self.line_count += 1; | |
if len == 0 { | |
// We've hit EOF. | |
return None; | |
} | |
let line = get_trimmed_line(&self.line_buffer); | |
if line.is_empty() { | |
// Skip this line. | |
continue; | |
} | |
// Note: This is terrible. Would be better to just figure out how to get the lifetimes working, or use the rental or owning_ref crates. | |
// Basically any changes to line_buffer will also invalidate this iterator, so we need to be careful with that. | |
unsafe { | |
let a = line as *const str; | |
let b: &str = &*a; | |
self.current_line_chars = Some(b.chars().peekable()); | |
} | |
}, | |
Err(_) => { | |
return None | |
} | |
} | |
} | |
} | |
} | |
fn get_trimmed_line(line: &str) -> &str { | |
let mut line = line.trim(); | |
// Strip any comments | |
if let Some(idx_comment) = line.find("//") { | |
line = &line[0..idx_comment].trim(); | |
} | |
return line; | |
} | |
// ------------------------------------------------ | |
// Parser | |
// ------------------------------------------------ | |
// Note: This code can be somewhat cleaned up (i.e. there are a lot of | |
// repetitive statements); that's left as an exercise for the reader. ;) | |
struct JackParser<'a, R: Read, W: Write> { | |
tokenizer: Peekable<JackTokenizer<'a, R>>, | |
writer: BufWriter<W>, | |
} | |
impl<'a, R: Read> JackParser<'a, R, File> { | |
fn new(tokenizer: JackTokenizer<'a, R>, out_name: &str) -> io::Result<Self> { | |
let out_file = try!(File::create(out_name)); | |
let writer = BufWriter::new(out_file); | |
Ok(JackParser {tokenizer: tokenizer.peekable(), writer: writer}) | |
} | |
} | |
// Follows the recommended implementation of the "CompilationEngine" in Chapter 10. | |
impl<'a, R: Read, W: Write> JackParser<'a, R, W> { | |
fn compile_class(&mut self, indent_level: u32) -> io::Result<()> { | |
// 'class' className '{' classVarDec* subroutineDec* '}' | |
try!(self.write_with_indentation(indent_level, b"<class>\n")); | |
let keyword = self.tokenizer.next().unwrap(); | |
let class_name = self.tokenizer.next().unwrap(); | |
let opening_braces = self.tokenizer.next().unwrap(); | |
expect_keyword(&keyword, Keyword::Class); | |
expect_identifier(&class_name); | |
expect_symbol(&opening_braces, '{'); | |
try!(self.write_token(keyword, indent_level + 2)); | |
try!(self.write_token(class_name, indent_level + 2)); | |
try!(self.write_token(opening_braces, indent_level + 2)); | |
loop { | |
// NOTE: Has to be cloned to avoid a borrow-checker error. This is inefficient. | |
let peek = self.tokenizer.peek().unwrap().clone(); | |
match peek { | |
Token::Keyword(Keyword::Static) | Token::Keyword(Keyword::Field) => { | |
try!(self.compile_class_var_dec(indent_level + 2)); | |
}, | |
Token::Keyword(Keyword::Constructor) | Token::Keyword(Keyword::Function) | Token::Keyword(Keyword::Method) => { | |
try!(self.compile_subroutine(indent_level + 2)); | |
}, | |
_ => { break; } | |
} | |
} | |
let closing_braces = self.tokenizer.next().unwrap(); | |
expect_symbol(&closing_braces, '}'); | |
try!(self.write_token(closing_braces, indent_level + 2)); | |
try!(self.write_with_indentation(indent_level, b"</class>\n")); | |
Ok(()) | |
} | |
fn compile_class_var_dec(&mut self, indent_level: u32) -> io::Result<()> { | |
// ('static' | 'field' ) type varName (',' varName)* ';' | |
try!(self.write_with_indentation(indent_level, b"<classVarDec>\n")); | |
let keyword = self.tokenizer.next().unwrap(); | |
let type_name = self.tokenizer.next().unwrap(); | |
let var_name = self.tokenizer.next().unwrap(); | |
expect_keywords(&keyword, &[Keyword::Static, Keyword::Field]); | |
expect_typename(&type_name); | |
expect_identifier(&var_name); | |
try!(self.write_token(keyword, indent_level + 2)); | |
try!(self.write_token(type_name, indent_level + 2)); | |
try!(self.write_token(var_name, indent_level + 2)); | |
loop { | |
let next = self.tokenizer.next().unwrap(); | |
match next { | |
Token::Symbol(',') => { | |
try!(self.write_token(next, indent_level + 2)); | |
let next_var_name = self.tokenizer.next().unwrap(); | |
expect_identifier(&next_var_name); | |
try!(self.write_token(next_var_name, indent_level + 2)); | |
}, | |
Token::Symbol(';') => { | |
try!(self.write_token(next, indent_level + 2)); | |
break; | |
}, | |
_ => { | |
panic!("Expected ',' or ';', got {:?}", next); | |
}, | |
} | |
} | |
try!(self.write_with_indentation(indent_level, b"</classVarDec>\n")); | |
Ok(()) | |
} | |
fn compile_subroutine(&mut self, indent_level: u32) -> io::Result<()> { | |
// subroutineDec: ('constructor' | 'function' | 'method') ('void' | type) subroutineName '(' parameterList ')' subroutineBody | |
// subroutineBody: '{' varDec* statements '}' | |
try!(self.write_with_indentation(indent_level, b"<subroutineDec>\n")); | |
let subroutine_type = self.tokenizer.next().unwrap(); | |
let subroutine_return_value_type = self.tokenizer.next().unwrap(); | |
let subroutine_name = self.tokenizer.next().unwrap(); | |
let opening_parenthesis = self.tokenizer.next().unwrap(); | |
expect_keywords(&subroutine_type, &[Keyword::Constructor, Keyword::Function, Keyword::Method]); | |
expect_typename_including_void(&subroutine_return_value_type); | |
expect_identifier(&subroutine_name); | |
expect_symbol(&opening_parenthesis, '('); | |
try!(self.write_token(subroutine_type, indent_level + 2)); | |
try!(self.write_token(subroutine_return_value_type, indent_level + 2)); | |
try!(self.write_token(subroutine_name, indent_level + 2)); | |
try!(self.write_token(opening_parenthesis, indent_level + 2)); | |
try!(self.compile_parameter_list(indent_level + 2)); | |
let closing_parenthesis = self.tokenizer.next().unwrap(); | |
expect_symbol(&closing_parenthesis, ')'); | |
try!(self.write_token(closing_parenthesis, indent_level + 2)); | |
try!(self.compile_subroutine_body(indent_level + 2)); | |
try!(self.write_with_indentation(indent_level, b"</subroutineDec>\n")); | |
Ok(()) | |
} | |
fn compile_parameter_list(&mut self, indent_level: u32) -> io::Result<()> { | |
// ( (type varName) (',' type varName)*)? | |
// Parameter list could be empty | |
try!(self.write_with_indentation(indent_level, b"<parameterList>\n")); | |
loop { | |
// NOTE: Has to be cloned to avoid a borrow-checker error. This is inefficient. | |
let peek = self.tokenizer.peek().unwrap().clone(); | |
match peek { | |
Token::Keyword(Keyword::Int) | Token::Keyword(Keyword::Char) | Token::Keyword(Keyword::Boolean) | Token::Identifier(_) => { | |
let consumed = self.tokenizer.next().unwrap(); | |
try!(self.write_token(consumed, indent_level + 2)); | |
let var_name = self.tokenizer.next().unwrap(); | |
expect_identifier(&var_name); | |
try!(self.write_token(var_name, indent_level + 2)); | |
}, | |
Token::Symbol(',') => { | |
let consumed = self.tokenizer.next().unwrap(); | |
try!(self.write_token(consumed, indent_level + 2)); | |
}, | |
_ => { break; } | |
} | |
} | |
try!(self.write_with_indentation(indent_level, b"</parameterList>\n")); | |
Ok(()) | |
} | |
fn compile_subroutine_body(&mut self, indent_level: u32) -> io::Result<()> { | |
// '{' varDec* statements '}' | |
try!(self.write_with_indentation(indent_level, b"<subroutineBody>\n")); | |
let opening_braces = self.tokenizer.next().unwrap(); | |
expect_symbol(&opening_braces, '{'); | |
try!(self.write_token(opening_braces, indent_level + 2)); | |
loop { | |
// NOTE: Has to be cloned to avoid a borrow-checker error. This is inefficient. | |
let peek = self.tokenizer.peek().unwrap().clone(); | |
match peek { | |
Token::Keyword(Keyword::Var) => { | |
try!(self.compile_var_dec(indent_level + 2)); | |
}, | |
_ => { break; } | |
} | |
} | |
try!(self.compile_statements(indent_level + 2)); | |
let closing_braces = self.tokenizer.next().unwrap(); | |
expect_symbol(&closing_braces, '}'); | |
try!(self.write_token(closing_braces, indent_level + 2)); | |
try!(self.write_with_indentation(indent_level, b"</subroutineBody>\n")); | |
Ok(()) | |
} | |
fn compile_var_dec(&mut self, indent_level: u32) -> io::Result<()> { | |
// 'var' type varName (',' varName)* ';' | |
try!(self.write_with_indentation(indent_level, b"<varDec>\n")); | |
let var_keyword = self.tokenizer.next().unwrap(); | |
let var_type = self.tokenizer.next().unwrap(); | |
let var_name = self.tokenizer.next().unwrap(); | |
expect_keyword(&var_keyword, Keyword::Var); | |
expect_typename(&var_type); | |
expect_identifier(&var_name); | |
try!(self.write_token(var_keyword, indent_level + 2)); | |
try!(self.write_token(var_type, indent_level + 2)); | |
try!(self.write_token(var_name, indent_level + 2)); | |
loop { | |
let next = self.tokenizer.next().unwrap(); | |
match next { | |
Token::Symbol(',') => { | |
try!(self.write_token(next, indent_level + 2)); | |
let next_var_name = self.tokenizer.next().unwrap(); | |
expect_identifier(&next_var_name); | |
try!(self.write_token(next_var_name, indent_level + 2)); | |
}, | |
Token::Symbol(';') => { | |
try!(self.write_token(next, indent_level + 2)); | |
break; | |
}, | |
_ => { | |
panic!("Expected ',' or ';', got {:?}", next); | |
}, | |
} | |
} | |
try!(self.write_with_indentation(indent_level, b"</varDec>\n")); | |
Ok(()) | |
} | |
fn compile_statements(&mut self, indent_level: u32) -> io::Result<()> { | |
// statements: statement* | |
// statement: letStatement | ifStatement | whileStatement | doStatement | returnStatement | |
try!(self.write_with_indentation(indent_level, b"<statements>\n")); | |
// Could have no statements | |
loop { | |
// NOTE: Has to be cloned to avoid a borrow-checker error. This is inefficient. | |
let peek = self.tokenizer.peek().unwrap().clone(); | |
match peek { | |
Token::Keyword(Keyword::Let) => { | |
try!(self.compile_let(indent_level + 2)); | |
}, | |
Token::Keyword(Keyword::If) => { | |
try!(self.compile_if(indent_level + 2)); | |
}, | |
Token::Keyword(Keyword::While) => { | |
try!(self.compile_while(indent_level + 2)); | |
}, | |
Token::Keyword(Keyword::Do) => { | |
try!(self.compile_do(indent_level + 2)); | |
}, | |
Token::Keyword(Keyword::Return) => { | |
try!(self.compile_return(indent_level + 2)); | |
}, | |
_ => { break; } | |
} | |
} | |
try!(self.write_with_indentation(indent_level, b"</statements>\n")); | |
Ok(()) | |
} | |
fn compile_let(&mut self, indent_level: u32) -> io::Result<()> { | |
// 'let' varName ('[' expression ']')? '=' expression ';' | |
try!(self.write_with_indentation(indent_level, b"<letStatement>\n")); | |
let keyword = self.tokenizer.next().unwrap(); | |
let var_name = self.tokenizer.next().unwrap(); | |
expect_keyword(&keyword, Keyword::Let); | |
expect_identifier(&var_name); | |
try!(self.write_token(keyword, indent_level + 2)); | |
try!(self.write_token(var_name, indent_level + 2)); | |
// TODO loop over '[' expression ']' | |
loop { | |
let next = self.tokenizer.next().unwrap(); | |
match next { | |
Token::Symbol('[') => { | |
try!(self.write_token(next, indent_level + 2)); | |
try!(self.compile_expression(indent_level + 2)); | |
let next_next = self.tokenizer.next().unwrap(); | |
expect_symbol(&next_next, ']'); | |
try!(self.write_token(next_next, indent_level + 2)); | |
}, | |
Token::Symbol('=') => { | |
try!(self.write_token(next, indent_level + 2)); | |
break; | |
}, | |
_ => {}, | |
} | |
} | |
try!(self.compile_expression(indent_level + 2)); | |
let semicolon = self.tokenizer.next().unwrap(); | |
expect_symbol(&semicolon, ';'); | |
try!(self.write_token(semicolon, indent_level + 2)); | |
try!(self.write_with_indentation(indent_level, b"</letStatement>\n")); | |
Ok(()) | |
} | |
fn compile_if(&mut self, indent_level: u32) -> io::Result<()> { | |
// 'if' '(' expression ')' '{' statements '}' ( 'else' '{' statements '}' )? | |
try!(self.write_with_indentation(indent_level, b"<ifStatement>\n")); | |
let keyword = self.tokenizer.next().unwrap(); | |
let opening_parenthesis = self.tokenizer.next().unwrap(); | |
expect_keyword(&keyword, Keyword::If); | |
expect_symbol(&opening_parenthesis, '('); | |
try!(self.write_token(keyword, indent_level + 2)); | |
try!(self.write_token(opening_parenthesis, indent_level + 2)); | |
try!(self.compile_expression(indent_level + 2)); | |
let closing_parenthesis = self.tokenizer.next().unwrap(); | |
expect_symbol(&closing_parenthesis, ')'); | |
try!(self.write_token(closing_parenthesis, indent_level + 2)); | |
let opening_braces = self.tokenizer.next().unwrap(); | |
expect_symbol(&opening_braces, '{'); | |
try!(self.write_token(opening_braces, indent_level + 2)); | |
try!(self.compile_statements(indent_level + 2)); | |
let closing_braces = self.tokenizer.next().unwrap(); | |
expect_symbol(&closing_braces, '}'); | |
try!(self.write_token(closing_braces, indent_level + 2)); | |
// NOTE: Has to be cloned to avoid a borrow-checker error. This is inefficient. | |
let peek = self.tokenizer.peek().unwrap().clone(); | |
match peek { | |
Token::Keyword(Keyword::Else) => { | |
let consumed = self.tokenizer.next().unwrap(); | |
try!(self.write_token(consumed, indent_level + 2)); | |
let opening_braces = self.tokenizer.next().unwrap(); | |
expect_symbol(&opening_braces, '{'); | |
try!(self.write_token(opening_braces, indent_level + 2)); | |
try!(self.compile_statements(indent_level + 2)); | |
let closing_braces = self.tokenizer.next().unwrap(); | |
expect_symbol(&closing_braces, '}'); | |
try!(self.write_token(closing_braces, indent_level + 2)); | |
}, | |
_ => { | |
// Don't do anything | |
}, | |
} | |
try!(self.write_with_indentation(indent_level, b"</ifStatement>\n")); | |
Ok(()) | |
} | |
fn compile_while(&mut self, indent_level: u32) -> io::Result<()> { | |
// 'while' '(' expression ')' '{' statements '}' | |
try!(self.write_with_indentation(indent_level, b"<whileStatement>\n")); | |
let keyword = self.tokenizer.next().unwrap(); | |
let opening_parenthesis = self.tokenizer.next().unwrap(); | |
expect_keyword(&keyword, Keyword::While); | |
expect_symbol(&opening_parenthesis, '('); | |
try!(self.write_token(keyword, indent_level + 2)); | |
try!(self.write_token(opening_parenthesis, indent_level + 2)); | |
try!(self.compile_expression(indent_level + 2)); | |
let closing_parenthesis = self.tokenizer.next().unwrap(); | |
expect_symbol(&closing_parenthesis, ')'); | |
try!(self.write_token(closing_parenthesis, indent_level + 2)); | |
let opening_braces = self.tokenizer.next().unwrap(); | |
expect_symbol(&opening_braces, '{'); | |
try!(self.write_token(opening_braces, indent_level + 2)); | |
try!(self.compile_statements(indent_level + 2)); | |
let closing_braces = self.tokenizer.next().unwrap(); | |
expect_symbol(&closing_braces, '}'); | |
try!(self.write_token(closing_braces, indent_level + 2)); | |
try!(self.write_with_indentation(indent_level, b"</whileStatement>\n")); | |
Ok(()) | |
} | |
fn compile_do(&mut self, indent_level: u32) -> io::Result<()> { | |
// 'do' subroutineCall ';' | |
try!(self.write_with_indentation(indent_level, b"<doStatement>\n")); | |
let keyword = self.tokenizer.next().unwrap(); | |
expect_keyword(&keyword, Keyword::Do); | |
try!(self.write_token(keyword, indent_level + 2)); | |
let subroutine_name = self.tokenizer.next().unwrap(); | |
expect_identifier(&subroutine_name); | |
try!(self.write_token(subroutine_name, indent_level + 2)); | |
// NOTE: Has to be cloned to avoid a borrow-checker error. This is inefficient. | |
let peek = self.tokenizer.peek().unwrap().clone(); | |
match peek { | |
Token::Symbol('.') => { | |
let consumed = self.tokenizer.next().unwrap(); | |
try!(self.write_token(consumed, indent_level + 2)); | |
let identifier = self.tokenizer.next().unwrap(); | |
expect_identifier(&identifier); | |
try!(self.write_token(identifier, indent_level + 2)); | |
let opening_parenthesis = self.tokenizer.next().unwrap(); | |
expect_symbol(&opening_parenthesis, '('); | |
try!(self.write_token(opening_parenthesis, indent_level + 2)); | |
}, | |
Token::Symbol('(') => { | |
let consumed = self.tokenizer.next().unwrap(); | |
try!(self.write_token(consumed, indent_level + 2)); | |
}, | |
_ => { | |
// Do nothing | |
}, | |
} | |
try!(self.compile_expression_list(indent_level + 2)); | |
let closing_parenthesis = self.tokenizer.next().unwrap(); | |
expect_symbol(&closing_parenthesis, ')'); | |
try!(self.write_token(closing_parenthesis, indent_level + 2)); | |
let semicolon = self.tokenizer.next().unwrap(); | |
expect_symbol(&semicolon, ';'); | |
try!(self.write_token(semicolon, indent_level + 2)); | |
try!(self.write_with_indentation(indent_level, b"</doStatement>\n")); | |
Ok(()) | |
} | |
fn compile_return(&mut self, indent_level: u32) -> io::Result<()> { | |
// 'return' expression? ';' | |
try!(self.write_with_indentation(indent_level, b"<returnStatement>\n")); | |
let keyword = self.tokenizer.next().unwrap(); | |
expect_keyword(&keyword, Keyword::Return); | |
try!(self.write_token(keyword, indent_level + 2)); | |
// NOTE: Has to be cloned to avoid a borrow-checker error. This is inefficient. | |
let peek = self.tokenizer.peek().unwrap().clone(); | |
match peek { | |
Token::Symbol(';') => { | |
let consumed = self.tokenizer.next().unwrap(); | |
try!(self.write_token(consumed, indent_level + 2)); | |
}, | |
_ => { | |
try!(self.compile_expression(indent_level + 2)); | |
let semicolon = self.tokenizer.next().unwrap(); | |
expect_symbol(&semicolon, ';'); | |
try!(self.write_token(semicolon, indent_level + 2)); | |
}, | |
} | |
try!(self.write_with_indentation(indent_level, b"</returnStatement>\n")); | |
Ok(()) | |
} | |
fn compile_expression(&mut self, indent_level: u32) -> io::Result<()> { | |
// term (op term)* | |
try!(self.write_with_indentation(indent_level, b"<expression>\n")); | |
try!(self.compile_term(indent_level + 2)); | |
loop { | |
// NOTE: Has to be cloned to avoid a borrow-checker error. This is inefficient. | |
let peek = self.tokenizer.peek().unwrap().clone(); | |
match peek { | |
Token::Symbol('+') | Token::Symbol('-') | Token::Symbol('*') | Token::Symbol('/') | | |
Token::Symbol('&') | Token::Symbol('|') | Token::Symbol('<') | Token::Symbol('>') | | |
Token::Symbol('=') => { | |
let consumed = self.tokenizer.next().unwrap(); | |
try!(self.write_token(consumed, indent_level + 2)); | |
try!(self.compile_term(indent_level + 2)); | |
}, | |
_ => { break; } | |
} | |
} | |
try!(self.write_with_indentation(indent_level, b"</expression>\n")); | |
Ok(()) | |
} | |
fn compile_term(&mut self, indent_level: u32) -> io::Result<()> { | |
// integerConstant | stringConstant | keywordConstant | | |
// varName | varName '[' expression ']' | subroutineCall | '(' expression ')' | unaryOp term | |
try!(self.write_with_indentation(indent_level, b"<term>\n")); | |
let next = self.tokenizer.next().unwrap(); | |
match next { | |
Token::IntegerConstant(_) | Token::StringConstant(_) | Token::Keyword(Keyword::True) | | |
Token::Keyword(Keyword::False) | Token::Keyword(Keyword::Null) | Token::Keyword(Keyword::This) => { | |
try!(self.write_token(next, indent_level + 2)); | |
}, | |
Token::Identifier(_) => { | |
try!(self.write_token(next, indent_level + 2)); | |
// NOTE: Has to be cloned to avoid a borrow-checker error. This is inefficient. | |
let peek = self.tokenizer.peek().unwrap().clone(); | |
// Following description from the book: Compiles a term. This routine is faced with a slight difficulty when | |
// trying to decide between some of the alternative parsing rules. Specifically, if the current token is an | |
// identifier, the routine must distinguish between a variable, an array entry, and a subroutine call. A | |
// single look-ahead token, which may be one of “[“, “(“, or “.” suffices to distinguish between the three | |
// possibilities. Any other token is not part of this term and should not be advanced over. | |
match peek { | |
Token::Symbol('[') => { | |
// Array access | |
let consumed = self.tokenizer.next().unwrap(); | |
try!(self.write_token(consumed, indent_level + 2)); | |
try!(self.compile_expression(indent_level + 2)); | |
let next_next = self.tokenizer.next().unwrap(); | |
expect_symbol(&next_next, ']'); | |
try!(self.write_token(next_next, indent_level + 2)); | |
}, | |
Token::Symbol('(') => { | |
// Subroutine call | |
let consumed = self.tokenizer.next().unwrap(); | |
try!(self.write_token(consumed, indent_level + 2)); | |
try!(self.compile_expression_list(indent_level + 2)); | |
let next_next = self.tokenizer.next().unwrap(); | |
expect_symbol(&next_next, ')'); | |
try!(self.write_token(next_next, indent_level + 2)); | |
}, | |
Token::Symbol('.') => { | |
// Subroutine call | |
let consumed = self.tokenizer.next().unwrap(); | |
try!(self.write_token(consumed, indent_level + 2)); | |
let identifier = self.tokenizer.next().unwrap(); | |
expect_identifier(&identifier); | |
try!(self.write_token(identifier, indent_level + 2)); | |
let opening_parenthesis = self.tokenizer.next().unwrap(); | |
expect_symbol(&opening_parenthesis, '('); | |
try!(self.write_token(opening_parenthesis, indent_level + 2)); | |
try!(self.compile_expression_list(indent_level + 2)); | |
let next_next = self.tokenizer.next().unwrap(); | |
expect_symbol(&next_next, ')'); | |
try!(self.write_token(next_next, indent_level + 2)); | |
}, | |
_ => { | |
// Do nothing -- not part of this term | |
} | |
} | |
}, | |
Token::Symbol('(') => { | |
try!(self.write_token(next, indent_level + 2)); | |
try!(self.compile_expression(indent_level + 2)); | |
let next_next = self.tokenizer.next().unwrap(); | |
expect_symbol(&next_next, ')'); | |
try!(self.write_token(next_next, indent_level + 2)); | |
}, | |
Token::Symbol('-') | Token::Symbol('~') => { | |
try!(self.write_token(next, indent_level + 2)); | |
try!(self.compile_term(indent_level + 2)); | |
}, | |
_ => { | |
panic!("Expected one of integerConstant | stringConstant | keywordConstant | varName | varName '[' expression ']' | subroutineCall | '(' expression ')' | unaryOp term, got {:?}", next); | |
}, | |
} | |
try!(self.write_with_indentation(indent_level, b"</term>\n")); | |
Ok(()) | |
} | |
fn compile_expression_list(&mut self, indent_level: u32) -> io::Result<()> { | |
// (expression (',' expression)* )? | |
// Might not have any expressions | |
try!(self.write_with_indentation(indent_level, b"<expressionList>\n")); | |
loop { | |
// NOTE: Has to be cloned to avoid a borrow-checker error. This is inefficient. | |
let peek = self.tokenizer.peek().unwrap().clone(); | |
match peek { | |
Token::Symbol(')') => { | |
// No expressions | |
break; | |
}, | |
Token::Symbol(',') => { | |
// May have more expressions | |
let consumed = self.tokenizer.next().unwrap(); | |
try!(self.write_token(consumed, indent_level + 2)); | |
}, | |
_ => { | |
try!(self.compile_expression(indent_level + 2)); | |
} | |
} | |
} | |
try!(self.write_with_indentation(indent_level, b"</expressionList>\n")); | |
Ok(()) | |
} | |
fn write_with_indentation(&mut self, indent_level: u32, buf: &[u8]) -> io::Result<usize> { | |
try!(self.write_indentation(indent_level)); | |
return self.writer.write(buf); | |
} | |
fn write_indentation(&mut self, indent_level: u32) -> io::Result<()> { | |
for _ in 0..indent_level { | |
try!(self.writer.write(b" ")); | |
} | |
Ok(()) | |
} | |
fn write_token(&mut self, token: Token, indent_level: u32) -> io::Result<()> { | |
try!(self.write_indentation(indent_level)); | |
try!(write_token(&mut self.writer, token)); | |
Ok(()) | |
} | |
} | |
fn write_token<W: Write>(writer: &mut W, token: Token) -> io::Result<()> { | |
match token { | |
Token::Keyword(keyword) => { | |
try!(writer.write(b"<keyword> ")); | |
let keyword_string = match keyword { | |
Keyword::Class => "class", | |
Keyword::Constructor => "constructor", | |
Keyword::Function => "function", | |
Keyword::Method => "method", | |
Keyword::Field => "field", | |
Keyword::Static => "static", | |
Keyword::Var => "var", | |
Keyword::Int => "int", | |
Keyword::Char => "char", | |
Keyword::Boolean => "boolean", | |
Keyword::Void => "void", | |
Keyword::True => "true", | |
Keyword::False => "false", | |
Keyword::Null => "null", | |
Keyword::This => "this", | |
Keyword::Let => "let", | |
Keyword::Do => "do", | |
Keyword::If => "if", | |
Keyword::Else => "else", | |
Keyword::While => "while", | |
Keyword::Return => "return", | |
}; | |
try!(writer.write(keyword_string.as_bytes())); | |
try!(writer.write(b" </keyword>\n")); | |
}, | |
Token::Symbol(symbol) => { | |
try!(writer.write(b"<symbol> ")); | |
match symbol { | |
'&' => { try!(writer.write(b"&")); }, | |
'>' => { try!(writer.write(b">")); }, | |
'<' => { try!(writer.write(b"<")); }, | |
_ => { try!(writer.write(&format!("{}", symbol).as_bytes())); }, | |
} | |
try!(writer.write(b" </symbol>\n")); | |
}, | |
Token::Identifier(identifier) => { | |
try!(writer.write(b"<identifier> ")); | |
try!(writer.write(identifier.as_bytes())); | |
try!(writer.write(b" </identifier>\n")); | |
}, | |
Token::IntegerConstant(int) => { | |
try!(writer.write(b"<integerConstant> ")); | |
try!(writer.write(&format!("{}", int).as_bytes())); | |
try!(writer.write(b" </integerConstant>\n")); | |
}, | |
Token::StringConstant(string) => { | |
try!(writer.write(b"<stringConstant> ")); | |
try!(writer.write(string.as_bytes())); | |
try!(writer.write(b" </stringConstant>\n")); | |
}, | |
} | |
Ok(()) | |
} | |
fn expect_keyword(token: &Token, keyword: Keyword) { | |
match token { | |
&Token::Keyword(ref actual_keyword) => { | |
if keyword == *actual_keyword { return; } | |
}, | |
_ => {}, | |
} | |
panic!("Expected \"{:?}\" keyword, had \"{:?}\"", keyword, token); | |
} | |
fn expect_keywords(token: &Token, keywords: &[Keyword]) { | |
for keyword in keywords { | |
match token { | |
&Token::Keyword(ref actual_keyword) => { | |
if keyword == actual_keyword { return; } | |
}, | |
_ => {}, | |
} | |
} | |
panic!("Expected one of \"{:?}\" keywords, had \"{:?}\"", keywords, token); | |
} | |
fn expect_typename(token: &Token) { | |
match token { | |
&Token::Keyword(Keyword::Int) | &Token::Keyword(Keyword::Char) | &Token::Keyword(Keyword::Boolean) | &Token::Identifier(_) => { | |
// OK | |
}, | |
_ => { | |
panic!("Expected 'int', 'char', 'boolean', or class name: {:?}", token); | |
} | |
} | |
} | |
fn expect_typename_including_void(token: &Token) { | |
match token { | |
&Token::Keyword(Keyword::Void) | &Token::Keyword(Keyword::Int) | &Token::Keyword(Keyword::Char) | &Token::Keyword(Keyword::Boolean) | &Token::Identifier(_) => { | |
// OK | |
}, | |
_ => { | |
panic!("Expected 'void', 'int', 'char', 'boolean', or class name: {:?}", token); | |
} | |
} | |
} | |
fn expect_identifier(identifier: &Token) { | |
match identifier { | |
&Token::Identifier(_) => { | |
// OK | |
}, | |
_ => { | |
panic!("Expected identifier, had \"{:?}\"", identifier); | |
} | |
} | |
} | |
fn expect_symbol(token: &Token, symbol: char) { | |
match token { | |
&Token::Symbol(actual_symbol) if actual_symbol == symbol => { | |
// OK | |
}, | |
_ => { | |
panic!("Expected symbol {}, had \"{:?}\"", symbol, token); | |
} | |
} | |
} | |
/* | |
impl<W> JackParser<W: Write> { | |
}*/ | |
// ------------------------------------------------ | |
// Main entry point | |
// ------------------------------------------------ | |
// Refs: | |
// http://nand2tetris.org/10.php | |
// http://www.cs.huji.ac.il/course/2002/nand2tet/docs/ch_9_jack.pdf | |
// http://www1.idc.ac.il/tecs/book/chapter10.pdf | |
fn main() { | |
let mut args = env::args(); | |
match args.len() { | |
2 => { | |
// TODO file or directory | |
}, | |
3 => { | |
let command = args.nth(1).unwrap(); | |
let input = args.next().unwrap(); | |
match command.as_ref() { | |
"tokenize" => { | |
let reader = JackTokenizer::new(&input).unwrap(); | |
let input_path = Path::new(&input); | |
let input_name_without_extension = input_path.file_stem().unwrap(); | |
let output_name = format!("{}T.xml", input_name_without_extension.to_str().unwrap()); | |
let mut writer = BufWriter::new(File::create(output_name).unwrap()); | |
writer.write(b"<tokens>\n").unwrap(); | |
for token in reader { | |
write_token(&mut writer, token).unwrap(); | |
} | |
writer.write(b"</tokens>\n").unwrap(); | |
}, | |
"parse" => { | |
let reader = JackTokenizer::new(&input).unwrap(); | |
let input_path = Path::new(&input); | |
let input_name_without_extension = input_path.file_stem().unwrap(); | |
let output_name = format!("{}.xml", input_name_without_extension.to_str().unwrap()); | |
let mut writer = JackParser::new(reader, &output_name).unwrap(); | |
writer.compile_class(0).unwrap(); | |
}, | |
_ => { | |
println!("Unknown command: {}", command); | |
return; | |
}, | |
} | |
}, | |
_ => { | |
println!("Usage: JackAnalyzer command input"); | |
println!("\nCommands:\n"); | |
println!("tokenize Creates a list of tokens from the input Jack file."); | |
println!("parse Creates a parse tree from the input Jack file."); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment