Skip to content

Instantly share code, notes, and snippets.

@learnopengles
Created March 28, 2018 22:59
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save learnopengles/38eb81df8ba57055c42d37bda937b0c4 to your computer and use it in GitHub Desktop.
Save learnopengles/38eb81df8ba57055c42d37bda937b0c4 to your computer and use it in GitHub Desktop.
Jack Analyzer for nand2tetris project 10 -- written in Rust.
// Jack Analyzer for nand2tetris project 10 -- written in Rust.
// License: CC BY-SA 4.0 -- https://creativecommons.org/licenses/by-sa/4.0/
use std::env;
use std::fs::File;
use std::io;
use std::io::{BufRead, BufReader, BufWriter, Read, Write};
use std::iter::Peekable;
use std::path::Path;
use std::str::Chars;
// ------------------------------------------------
// Tokenizer
// ------------------------------------------------
#[derive(Clone, Debug, PartialEq)]
enum Token {
Keyword(Keyword),
Symbol(char),
Identifier(String),
IntegerConstant(i16),
StringConstant(String),
}
#[derive(Clone, Debug, PartialEq)]
enum Keyword {
Class,
Constructor,
Function,
Method,
Field,
Static,
Var,
Int,
Char,
Boolean,
Void,
True,
False,
Null,
This,
Let,
Do,
If,
Else,
While,
Return,
}
struct JackTokenizer<'a, R: Read> {
reader: BufReader<R>,
line_buffer: String,
current_line_chars: Option<Peekable<Chars<'a>>>,
line_count: u32,
is_inside_multiline_comment: bool,
}
impl<'a> JackTokenizer<'a, File> {
fn new(in_name: &str) -> io::Result<Self> {
let in_file = try!(File::open(in_name));
let reader = BufReader::new(in_file);
Ok(JackTokenizer {reader: reader, line_buffer: String::with_capacity(256), current_line_chars: None, line_count: 0, is_inside_multiline_comment: false})
}
}
impl<'a, R: Read> Iterator for JackTokenizer<'a, R> {
type Item = Token;
fn next(&mut self) -> Option<Token> {
fn munch(initial_char: char, iterator: &mut Peekable<Chars>, predicate: fn(char) -> bool) -> String {
let mut string = String::new();
string.push(initial_char);
loop {
// Dereference the char to avoid double borrow at the "let _ = iterator.next();" line below.
let peek = iterator.peek().map(|c| *c);
if let Some(peeked) = peek {
if predicate(peeked) {
string.push(peeked);
// Consume
let _ = iterator.next();
continue;
}
}
break;
}
return string;
}
loop {
if let Some(ref mut current_line_chars) = self.current_line_chars {
while let Some(next_char) = current_line_chars.next() {
if self.is_inside_multiline_comment {
// We need a "*/" pattern to exit a multiline comment.
if next_char == '*' {
let peek = current_line_chars.peek().map(|c| *c);
if let Some(peeked) = peek {
if peeked == '/' {
// Consume
let _ = current_line_chars.next();
self.is_inside_multiline_comment = false;
}
}
}
continue;
}
match next_char {
'/' => {
// If the next symbol is a "*", this is the start of a multiline comment. Otherwise, process as a
// regular symbol.
let peek = current_line_chars.peek().map(|c| *c);
if let Some(peeked) = peek {
if peeked == '*' {
// Consume
let _ = current_line_chars.next();
self.is_inside_multiline_comment = true;
continue;
}
}
// It's a symbol
return Some(Token::Symbol(next_char))
}
'{'|'}'|'('|')'|'['|']'|'.'|','|';'|'+'|'-'|'*'|'&'|'|'|'<'|'>'|'='|'~' => {
return Some(Token::Symbol(next_char))
},
'"' => {
let mut constant = String::new();
while let Some(next_char) = current_line_chars.next() {
match next_char {
'"' => { return Some(Token::StringConstant(constant)); },
_ => { constant.push(next_char); }
}
}
// Never found the closing quote
panic!("Never found closing quote for string: {} on line: {}", constant, self.line_count);
},
_ => {
if next_char.is_whitespace() {
// Ignore whitespace
continue;
} else if next_char.is_digit(10) {
// Collect the number
// Note: Would be more efficient to slice the string here rather than accumulating in an extra string.
// We check if is alphanumeric, since we want to get a panic if letters follow the number.
let digit_string = munch(next_char, current_line_chars, |c| c.is_alphanumeric());
let number = digit_string.parse::<i16>().expect(&format!("Not a number: {} on line: {}", digit_string, self.line_count));
return Some(Token::IntegerConstant(number));
} else if next_char.is_alphanumeric() || next_char == '_' {
let string = munch(next_char, current_line_chars, |c| c.is_alphanumeric() || c == '_');
return Some(match string.as_ref() {
"class" => { Token::Keyword(Keyword::Class) },
"constructor" => { Token::Keyword(Keyword::Constructor) },
"function" => { Token::Keyword(Keyword::Function) },
"method" => { Token::Keyword(Keyword::Method) },
"field" => { Token::Keyword(Keyword::Field) },
"static" => { Token::Keyword(Keyword::Static) },
"var" => { Token::Keyword(Keyword::Var) },
"int" => { Token::Keyword(Keyword::Int) },
"char" => { Token::Keyword(Keyword::Char) },
"boolean" => { Token::Keyword(Keyword::Boolean) },
"void" => { Token::Keyword(Keyword::Void) },
"true" => { Token::Keyword(Keyword::True) },
"false" => { Token::Keyword(Keyword::False) },
"null" => { Token::Keyword(Keyword::Null) },
"this" => { Token::Keyword(Keyword::This) },
"let" => { Token::Keyword(Keyword::Let) },
"do" => { Token::Keyword(Keyword::Do) },
"if" => { Token::Keyword(Keyword::If) },
"else" => { Token::Keyword(Keyword::Else) },
"while" => { Token::Keyword(Keyword::While) },
"return" => { Token::Keyword(Keyword::Return) },
_ => { Token::Identifier(string) },
});
} else {
panic!("Invalid char: {} at line: {}", next_char, self.line_count);
}
},
}
}
}
// We have no more chars -- process the next line
self.line_buffer.clear();
match self.reader.read_line(&mut self.line_buffer) {
Ok(len) => {
self.line_count += 1;
if len == 0 {
// We've hit EOF.
return None;
}
let line = get_trimmed_line(&self.line_buffer);
if line.is_empty() {
// Skip this line.
continue;
}
// Note: This is terrible. Would be better to just figure out how to get the lifetimes working, or use the rental or owning_ref crates.
// Basically any changes to line_buffer will also invalidate this iterator, so we need to be careful with that.
unsafe {
let a = line as *const str;
let b: &str = &*a;
self.current_line_chars = Some(b.chars().peekable());
}
},
Err(_) => {
return None
}
}
}
}
}
fn get_trimmed_line(line: &str) -> &str {
let mut line = line.trim();
// Strip any comments
if let Some(idx_comment) = line.find("//") {
line = &line[0..idx_comment].trim();
}
return line;
}
// ------------------------------------------------
// Parser
// ------------------------------------------------
// Note: This code can be somewhat cleaned up (i.e. there are a lot of
// repetitive statements); that's left as an exercise for the reader. ;)
struct JackParser<'a, R: Read, W: Write> {
tokenizer: Peekable<JackTokenizer<'a, R>>,
writer: BufWriter<W>,
}
impl<'a, R: Read> JackParser<'a, R, File> {
fn new(tokenizer: JackTokenizer<'a, R>, out_name: &str) -> io::Result<Self> {
let out_file = try!(File::create(out_name));
let writer = BufWriter::new(out_file);
Ok(JackParser {tokenizer: tokenizer.peekable(), writer: writer})
}
}
// Follows the recommended implementation of the "CompilationEngine" in Chapter 10.
impl<'a, R: Read, W: Write> JackParser<'a, R, W> {
fn compile_class(&mut self, indent_level: u32) -> io::Result<()> {
// 'class' className '{' classVarDec* subroutineDec* '}'
try!(self.write_with_indentation(indent_level, b"<class>\n"));
let keyword = self.tokenizer.next().unwrap();
let class_name = self.tokenizer.next().unwrap();
let opening_braces = self.tokenizer.next().unwrap();
expect_keyword(&keyword, Keyword::Class);
expect_identifier(&class_name);
expect_symbol(&opening_braces, '{');
try!(self.write_token(keyword, indent_level + 2));
try!(self.write_token(class_name, indent_level + 2));
try!(self.write_token(opening_braces, indent_level + 2));
loop {
// NOTE: Has to be cloned to avoid a borrow-checker error. This is inefficient.
let peek = self.tokenizer.peek().unwrap().clone();
match peek {
Token::Keyword(Keyword::Static) | Token::Keyword(Keyword::Field) => {
try!(self.compile_class_var_dec(indent_level + 2));
},
Token::Keyword(Keyword::Constructor) | Token::Keyword(Keyword::Function) | Token::Keyword(Keyword::Method) => {
try!(self.compile_subroutine(indent_level + 2));
},
_ => { break; }
}
}
let closing_braces = self.tokenizer.next().unwrap();
expect_symbol(&closing_braces, '}');
try!(self.write_token(closing_braces, indent_level + 2));
try!(self.write_with_indentation(indent_level, b"</class>\n"));
Ok(())
}
fn compile_class_var_dec(&mut self, indent_level: u32) -> io::Result<()> {
// ('static' | 'field' ) type varName (',' varName)* ';'
try!(self.write_with_indentation(indent_level, b"<classVarDec>\n"));
let keyword = self.tokenizer.next().unwrap();
let type_name = self.tokenizer.next().unwrap();
let var_name = self.tokenizer.next().unwrap();
expect_keywords(&keyword, &[Keyword::Static, Keyword::Field]);
expect_typename(&type_name);
expect_identifier(&var_name);
try!(self.write_token(keyword, indent_level + 2));
try!(self.write_token(type_name, indent_level + 2));
try!(self.write_token(var_name, indent_level + 2));
loop {
let next = self.tokenizer.next().unwrap();
match next {
Token::Symbol(',') => {
try!(self.write_token(next, indent_level + 2));
let next_var_name = self.tokenizer.next().unwrap();
expect_identifier(&next_var_name);
try!(self.write_token(next_var_name, indent_level + 2));
},
Token::Symbol(';') => {
try!(self.write_token(next, indent_level + 2));
break;
},
_ => {
panic!("Expected ',' or ';', got {:?}", next);
},
}
}
try!(self.write_with_indentation(indent_level, b"</classVarDec>\n"));
Ok(())
}
fn compile_subroutine(&mut self, indent_level: u32) -> io::Result<()> {
// subroutineDec: ('constructor' | 'function' | 'method') ('void' | type) subroutineName '(' parameterList ')' subroutineBody
// subroutineBody: '{' varDec* statements '}'
try!(self.write_with_indentation(indent_level, b"<subroutineDec>\n"));
let subroutine_type = self.tokenizer.next().unwrap();
let subroutine_return_value_type = self.tokenizer.next().unwrap();
let subroutine_name = self.tokenizer.next().unwrap();
let opening_parenthesis = self.tokenizer.next().unwrap();
expect_keywords(&subroutine_type, &[Keyword::Constructor, Keyword::Function, Keyword::Method]);
expect_typename_including_void(&subroutine_return_value_type);
expect_identifier(&subroutine_name);
expect_symbol(&opening_parenthesis, '(');
try!(self.write_token(subroutine_type, indent_level + 2));
try!(self.write_token(subroutine_return_value_type, indent_level + 2));
try!(self.write_token(subroutine_name, indent_level + 2));
try!(self.write_token(opening_parenthesis, indent_level + 2));
try!(self.compile_parameter_list(indent_level + 2));
let closing_parenthesis = self.tokenizer.next().unwrap();
expect_symbol(&closing_parenthesis, ')');
try!(self.write_token(closing_parenthesis, indent_level + 2));
try!(self.compile_subroutine_body(indent_level + 2));
try!(self.write_with_indentation(indent_level, b"</subroutineDec>\n"));
Ok(())
}
fn compile_parameter_list(&mut self, indent_level: u32) -> io::Result<()> {
// ( (type varName) (',' type varName)*)?
// Parameter list could be empty
try!(self.write_with_indentation(indent_level, b"<parameterList>\n"));
loop {
// NOTE: Has to be cloned to avoid a borrow-checker error. This is inefficient.
let peek = self.tokenizer.peek().unwrap().clone();
match peek {
Token::Keyword(Keyword::Int) | Token::Keyword(Keyword::Char) | Token::Keyword(Keyword::Boolean) | Token::Identifier(_) => {
let consumed = self.tokenizer.next().unwrap();
try!(self.write_token(consumed, indent_level + 2));
let var_name = self.tokenizer.next().unwrap();
expect_identifier(&var_name);
try!(self.write_token(var_name, indent_level + 2));
},
Token::Symbol(',') => {
let consumed = self.tokenizer.next().unwrap();
try!(self.write_token(consumed, indent_level + 2));
},
_ => { break; }
}
}
try!(self.write_with_indentation(indent_level, b"</parameterList>\n"));
Ok(())
}
fn compile_subroutine_body(&mut self, indent_level: u32) -> io::Result<()> {
// '{' varDec* statements '}'
try!(self.write_with_indentation(indent_level, b"<subroutineBody>\n"));
let opening_braces = self.tokenizer.next().unwrap();
expect_symbol(&opening_braces, '{');
try!(self.write_token(opening_braces, indent_level + 2));
loop {
// NOTE: Has to be cloned to avoid a borrow-checker error. This is inefficient.
let peek = self.tokenizer.peek().unwrap().clone();
match peek {
Token::Keyword(Keyword::Var) => {
try!(self.compile_var_dec(indent_level + 2));
},
_ => { break; }
}
}
try!(self.compile_statements(indent_level + 2));
let closing_braces = self.tokenizer.next().unwrap();
expect_symbol(&closing_braces, '}');
try!(self.write_token(closing_braces, indent_level + 2));
try!(self.write_with_indentation(indent_level, b"</subroutineBody>\n"));
Ok(())
}
fn compile_var_dec(&mut self, indent_level: u32) -> io::Result<()> {
// 'var' type varName (',' varName)* ';'
try!(self.write_with_indentation(indent_level, b"<varDec>\n"));
let var_keyword = self.tokenizer.next().unwrap();
let var_type = self.tokenizer.next().unwrap();
let var_name = self.tokenizer.next().unwrap();
expect_keyword(&var_keyword, Keyword::Var);
expect_typename(&var_type);
expect_identifier(&var_name);
try!(self.write_token(var_keyword, indent_level + 2));
try!(self.write_token(var_type, indent_level + 2));
try!(self.write_token(var_name, indent_level + 2));
loop {
let next = self.tokenizer.next().unwrap();
match next {
Token::Symbol(',') => {
try!(self.write_token(next, indent_level + 2));
let next_var_name = self.tokenizer.next().unwrap();
expect_identifier(&next_var_name);
try!(self.write_token(next_var_name, indent_level + 2));
},
Token::Symbol(';') => {
try!(self.write_token(next, indent_level + 2));
break;
},
_ => {
panic!("Expected ',' or ';', got {:?}", next);
},
}
}
try!(self.write_with_indentation(indent_level, b"</varDec>\n"));
Ok(())
}
fn compile_statements(&mut self, indent_level: u32) -> io::Result<()> {
// statements: statement*
// statement: letStatement | ifStatement | whileStatement | doStatement | returnStatement
try!(self.write_with_indentation(indent_level, b"<statements>\n"));
// Could have no statements
loop {
// NOTE: Has to be cloned to avoid a borrow-checker error. This is inefficient.
let peek = self.tokenizer.peek().unwrap().clone();
match peek {
Token::Keyword(Keyword::Let) => {
try!(self.compile_let(indent_level + 2));
},
Token::Keyword(Keyword::If) => {
try!(self.compile_if(indent_level + 2));
},
Token::Keyword(Keyword::While) => {
try!(self.compile_while(indent_level + 2));
},
Token::Keyword(Keyword::Do) => {
try!(self.compile_do(indent_level + 2));
},
Token::Keyword(Keyword::Return) => {
try!(self.compile_return(indent_level + 2));
},
_ => { break; }
}
}
try!(self.write_with_indentation(indent_level, b"</statements>\n"));
Ok(())
}
fn compile_let(&mut self, indent_level: u32) -> io::Result<()> {
// 'let' varName ('[' expression ']')? '=' expression ';'
try!(self.write_with_indentation(indent_level, b"<letStatement>\n"));
let keyword = self.tokenizer.next().unwrap();
let var_name = self.tokenizer.next().unwrap();
expect_keyword(&keyword, Keyword::Let);
expect_identifier(&var_name);
try!(self.write_token(keyword, indent_level + 2));
try!(self.write_token(var_name, indent_level + 2));
// TODO loop over '[' expression ']'
loop {
let next = self.tokenizer.next().unwrap();
match next {
Token::Symbol('[') => {
try!(self.write_token(next, indent_level + 2));
try!(self.compile_expression(indent_level + 2));
let next_next = self.tokenizer.next().unwrap();
expect_symbol(&next_next, ']');
try!(self.write_token(next_next, indent_level + 2));
},
Token::Symbol('=') => {
try!(self.write_token(next, indent_level + 2));
break;
},
_ => {},
}
}
try!(self.compile_expression(indent_level + 2));
let semicolon = self.tokenizer.next().unwrap();
expect_symbol(&semicolon, ';');
try!(self.write_token(semicolon, indent_level + 2));
try!(self.write_with_indentation(indent_level, b"</letStatement>\n"));
Ok(())
}
fn compile_if(&mut self, indent_level: u32) -> io::Result<()> {
// 'if' '(' expression ')' '{' statements '}' ( 'else' '{' statements '}' )?
try!(self.write_with_indentation(indent_level, b"<ifStatement>\n"));
let keyword = self.tokenizer.next().unwrap();
let opening_parenthesis = self.tokenizer.next().unwrap();
expect_keyword(&keyword, Keyword::If);
expect_symbol(&opening_parenthesis, '(');
try!(self.write_token(keyword, indent_level + 2));
try!(self.write_token(opening_parenthesis, indent_level + 2));
try!(self.compile_expression(indent_level + 2));
let closing_parenthesis = self.tokenizer.next().unwrap();
expect_symbol(&closing_parenthesis, ')');
try!(self.write_token(closing_parenthesis, indent_level + 2));
let opening_braces = self.tokenizer.next().unwrap();
expect_symbol(&opening_braces, '{');
try!(self.write_token(opening_braces, indent_level + 2));
try!(self.compile_statements(indent_level + 2));
let closing_braces = self.tokenizer.next().unwrap();
expect_symbol(&closing_braces, '}');
try!(self.write_token(closing_braces, indent_level + 2));
// NOTE: Has to be cloned to avoid a borrow-checker error. This is inefficient.
let peek = self.tokenizer.peek().unwrap().clone();
match peek {
Token::Keyword(Keyword::Else) => {
let consumed = self.tokenizer.next().unwrap();
try!(self.write_token(consumed, indent_level + 2));
let opening_braces = self.tokenizer.next().unwrap();
expect_symbol(&opening_braces, '{');
try!(self.write_token(opening_braces, indent_level + 2));
try!(self.compile_statements(indent_level + 2));
let closing_braces = self.tokenizer.next().unwrap();
expect_symbol(&closing_braces, '}');
try!(self.write_token(closing_braces, indent_level + 2));
},
_ => {
// Don't do anything
},
}
try!(self.write_with_indentation(indent_level, b"</ifStatement>\n"));
Ok(())
}
fn compile_while(&mut self, indent_level: u32) -> io::Result<()> {
// 'while' '(' expression ')' '{' statements '}'
try!(self.write_with_indentation(indent_level, b"<whileStatement>\n"));
let keyword = self.tokenizer.next().unwrap();
let opening_parenthesis = self.tokenizer.next().unwrap();
expect_keyword(&keyword, Keyword::While);
expect_symbol(&opening_parenthesis, '(');
try!(self.write_token(keyword, indent_level + 2));
try!(self.write_token(opening_parenthesis, indent_level + 2));
try!(self.compile_expression(indent_level + 2));
let closing_parenthesis = self.tokenizer.next().unwrap();
expect_symbol(&closing_parenthesis, ')');
try!(self.write_token(closing_parenthesis, indent_level + 2));
let opening_braces = self.tokenizer.next().unwrap();
expect_symbol(&opening_braces, '{');
try!(self.write_token(opening_braces, indent_level + 2));
try!(self.compile_statements(indent_level + 2));
let closing_braces = self.tokenizer.next().unwrap();
expect_symbol(&closing_braces, '}');
try!(self.write_token(closing_braces, indent_level + 2));
try!(self.write_with_indentation(indent_level, b"</whileStatement>\n"));
Ok(())
}
fn compile_do(&mut self, indent_level: u32) -> io::Result<()> {
// 'do' subroutineCall ';'
try!(self.write_with_indentation(indent_level, b"<doStatement>\n"));
let keyword = self.tokenizer.next().unwrap();
expect_keyword(&keyword, Keyword::Do);
try!(self.write_token(keyword, indent_level + 2));
let subroutine_name = self.tokenizer.next().unwrap();
expect_identifier(&subroutine_name);
try!(self.write_token(subroutine_name, indent_level + 2));
// NOTE: Has to be cloned to avoid a borrow-checker error. This is inefficient.
let peek = self.tokenizer.peek().unwrap().clone();
match peek {
Token::Symbol('.') => {
let consumed = self.tokenizer.next().unwrap();
try!(self.write_token(consumed, indent_level + 2));
let identifier = self.tokenizer.next().unwrap();
expect_identifier(&identifier);
try!(self.write_token(identifier, indent_level + 2));
let opening_parenthesis = self.tokenizer.next().unwrap();
expect_symbol(&opening_parenthesis, '(');
try!(self.write_token(opening_parenthesis, indent_level + 2));
},
Token::Symbol('(') => {
let consumed = self.tokenizer.next().unwrap();
try!(self.write_token(consumed, indent_level + 2));
},
_ => {
// Do nothing
},
}
try!(self.compile_expression_list(indent_level + 2));
let closing_parenthesis = self.tokenizer.next().unwrap();
expect_symbol(&closing_parenthesis, ')');
try!(self.write_token(closing_parenthesis, indent_level + 2));
let semicolon = self.tokenizer.next().unwrap();
expect_symbol(&semicolon, ';');
try!(self.write_token(semicolon, indent_level + 2));
try!(self.write_with_indentation(indent_level, b"</doStatement>\n"));
Ok(())
}
fn compile_return(&mut self, indent_level: u32) -> io::Result<()> {
// 'return' expression? ';'
try!(self.write_with_indentation(indent_level, b"<returnStatement>\n"));
let keyword = self.tokenizer.next().unwrap();
expect_keyword(&keyword, Keyword::Return);
try!(self.write_token(keyword, indent_level + 2));
// NOTE: Has to be cloned to avoid a borrow-checker error. This is inefficient.
let peek = self.tokenizer.peek().unwrap().clone();
match peek {
Token::Symbol(';') => {
let consumed = self.tokenizer.next().unwrap();
try!(self.write_token(consumed, indent_level + 2));
},
_ => {
try!(self.compile_expression(indent_level + 2));
let semicolon = self.tokenizer.next().unwrap();
expect_symbol(&semicolon, ';');
try!(self.write_token(semicolon, indent_level + 2));
},
}
try!(self.write_with_indentation(indent_level, b"</returnStatement>\n"));
Ok(())
}
fn compile_expression(&mut self, indent_level: u32) -> io::Result<()> {
// term (op term)*
try!(self.write_with_indentation(indent_level, b"<expression>\n"));
try!(self.compile_term(indent_level + 2));
loop {
// NOTE: Has to be cloned to avoid a borrow-checker error. This is inefficient.
let peek = self.tokenizer.peek().unwrap().clone();
match peek {
Token::Symbol('+') | Token::Symbol('-') | Token::Symbol('*') | Token::Symbol('/') |
Token::Symbol('&') | Token::Symbol('|') | Token::Symbol('<') | Token::Symbol('>') |
Token::Symbol('=') => {
let consumed = self.tokenizer.next().unwrap();
try!(self.write_token(consumed, indent_level + 2));
try!(self.compile_term(indent_level + 2));
},
_ => { break; }
}
}
try!(self.write_with_indentation(indent_level, b"</expression>\n"));
Ok(())
}
fn compile_term(&mut self, indent_level: u32) -> io::Result<()> {
// integerConstant | stringConstant | keywordConstant |
// varName | varName '[' expression ']' | subroutineCall | '(' expression ')' | unaryOp term
try!(self.write_with_indentation(indent_level, b"<term>\n"));
let next = self.tokenizer.next().unwrap();
match next {
Token::IntegerConstant(_) | Token::StringConstant(_) | Token::Keyword(Keyword::True) |
Token::Keyword(Keyword::False) | Token::Keyword(Keyword::Null) | Token::Keyword(Keyword::This) => {
try!(self.write_token(next, indent_level + 2));
},
Token::Identifier(_) => {
try!(self.write_token(next, indent_level + 2));
// NOTE: Has to be cloned to avoid a borrow-checker error. This is inefficient.
let peek = self.tokenizer.peek().unwrap().clone();
// Following description from the book: Compiles a term. This routine is faced with a slight difficulty when
// trying to decide between some of the alternative parsing rules. Specifically, if the current token is an
// identifier, the routine must distinguish between a variable, an array entry, and a subroutine call. A
// single look-ahead token, which may be one of “[“, “(“, or “.” suffices to distinguish between the three
// possibilities. Any other token is not part of this term and should not be advanced over.
match peek {
Token::Symbol('[') => {
// Array access
let consumed = self.tokenizer.next().unwrap();
try!(self.write_token(consumed, indent_level + 2));
try!(self.compile_expression(indent_level + 2));
let next_next = self.tokenizer.next().unwrap();
expect_symbol(&next_next, ']');
try!(self.write_token(next_next, indent_level + 2));
},
Token::Symbol('(') => {
// Subroutine call
let consumed = self.tokenizer.next().unwrap();
try!(self.write_token(consumed, indent_level + 2));
try!(self.compile_expression_list(indent_level + 2));
let next_next = self.tokenizer.next().unwrap();
expect_symbol(&next_next, ')');
try!(self.write_token(next_next, indent_level + 2));
},
Token::Symbol('.') => {
// Subroutine call
let consumed = self.tokenizer.next().unwrap();
try!(self.write_token(consumed, indent_level + 2));
let identifier = self.tokenizer.next().unwrap();
expect_identifier(&identifier);
try!(self.write_token(identifier, indent_level + 2));
let opening_parenthesis = self.tokenizer.next().unwrap();
expect_symbol(&opening_parenthesis, '(');
try!(self.write_token(opening_parenthesis, indent_level + 2));
try!(self.compile_expression_list(indent_level + 2));
let next_next = self.tokenizer.next().unwrap();
expect_symbol(&next_next, ')');
try!(self.write_token(next_next, indent_level + 2));
},
_ => {
// Do nothing -- not part of this term
}
}
},
Token::Symbol('(') => {
try!(self.write_token(next, indent_level + 2));
try!(self.compile_expression(indent_level + 2));
let next_next = self.tokenizer.next().unwrap();
expect_symbol(&next_next, ')');
try!(self.write_token(next_next, indent_level + 2));
},
Token::Symbol('-') | Token::Symbol('~') => {
try!(self.write_token(next, indent_level + 2));
try!(self.compile_term(indent_level + 2));
},
_ => {
panic!("Expected one of integerConstant | stringConstant | keywordConstant | varName | varName '[' expression ']' | subroutineCall | '(' expression ')' | unaryOp term, got {:?}", next);
},
}
try!(self.write_with_indentation(indent_level, b"</term>\n"));
Ok(())
}
fn compile_expression_list(&mut self, indent_level: u32) -> io::Result<()> {
// (expression (',' expression)* )?
// Might not have any expressions
try!(self.write_with_indentation(indent_level, b"<expressionList>\n"));
loop {
// NOTE: Has to be cloned to avoid a borrow-checker error. This is inefficient.
let peek = self.tokenizer.peek().unwrap().clone();
match peek {
Token::Symbol(')') => {
// No expressions
break;
},
Token::Symbol(',') => {
// May have more expressions
let consumed = self.tokenizer.next().unwrap();
try!(self.write_token(consumed, indent_level + 2));
},
_ => {
try!(self.compile_expression(indent_level + 2));
}
}
}
try!(self.write_with_indentation(indent_level, b"</expressionList>\n"));
Ok(())
}
fn write_with_indentation(&mut self, indent_level: u32, buf: &[u8]) -> io::Result<usize> {
try!(self.write_indentation(indent_level));
return self.writer.write(buf);
}
fn write_indentation(&mut self, indent_level: u32) -> io::Result<()> {
for _ in 0..indent_level {
try!(self.writer.write(b" "));
}
Ok(())
}
fn write_token(&mut self, token: Token, indent_level: u32) -> io::Result<()> {
try!(self.write_indentation(indent_level));
try!(write_token(&mut self.writer, token));
Ok(())
}
}
fn write_token<W: Write>(writer: &mut W, token: Token) -> io::Result<()> {
match token {
Token::Keyword(keyword) => {
try!(writer.write(b"<keyword> "));
let keyword_string = match keyword {
Keyword::Class => "class",
Keyword::Constructor => "constructor",
Keyword::Function => "function",
Keyword::Method => "method",
Keyword::Field => "field",
Keyword::Static => "static",
Keyword::Var => "var",
Keyword::Int => "int",
Keyword::Char => "char",
Keyword::Boolean => "boolean",
Keyword::Void => "void",
Keyword::True => "true",
Keyword::False => "false",
Keyword::Null => "null",
Keyword::This => "this",
Keyword::Let => "let",
Keyword::Do => "do",
Keyword::If => "if",
Keyword::Else => "else",
Keyword::While => "while",
Keyword::Return => "return",
};
try!(writer.write(keyword_string.as_bytes()));
try!(writer.write(b" </keyword>\n"));
},
Token::Symbol(symbol) => {
try!(writer.write(b"<symbol> "));
match symbol {
'&' => { try!(writer.write(b"&amp;")); },
'>' => { try!(writer.write(b"&gt;")); },
'<' => { try!(writer.write(b"&lt;")); },
_ => { try!(writer.write(&format!("{}", symbol).as_bytes())); },
}
try!(writer.write(b" </symbol>\n"));
},
Token::Identifier(identifier) => {
try!(writer.write(b"<identifier> "));
try!(writer.write(identifier.as_bytes()));
try!(writer.write(b" </identifier>\n"));
},
Token::IntegerConstant(int) => {
try!(writer.write(b"<integerConstant> "));
try!(writer.write(&format!("{}", int).as_bytes()));
try!(writer.write(b" </integerConstant>\n"));
},
Token::StringConstant(string) => {
try!(writer.write(b"<stringConstant> "));
try!(writer.write(string.as_bytes()));
try!(writer.write(b" </stringConstant>\n"));
},
}
Ok(())
}
fn expect_keyword(token: &Token, keyword: Keyword) {
match token {
&Token::Keyword(ref actual_keyword) => {
if keyword == *actual_keyword { return; }
},
_ => {},
}
panic!("Expected \"{:?}\" keyword, had \"{:?}\"", keyword, token);
}
fn expect_keywords(token: &Token, keywords: &[Keyword]) {
for keyword in keywords {
match token {
&Token::Keyword(ref actual_keyword) => {
if keyword == actual_keyword { return; }
},
_ => {},
}
}
panic!("Expected one of \"{:?}\" keywords, had \"{:?}\"", keywords, token);
}
fn expect_typename(token: &Token) {
match token {
&Token::Keyword(Keyword::Int) | &Token::Keyword(Keyword::Char) | &Token::Keyword(Keyword::Boolean) | &Token::Identifier(_) => {
// OK
},
_ => {
panic!("Expected 'int', 'char', 'boolean', or class name: {:?}", token);
}
}
}
fn expect_typename_including_void(token: &Token) {
match token {
&Token::Keyword(Keyword::Void) | &Token::Keyword(Keyword::Int) | &Token::Keyword(Keyword::Char) | &Token::Keyword(Keyword::Boolean) | &Token::Identifier(_) => {
// OK
},
_ => {
panic!("Expected 'void', 'int', 'char', 'boolean', or class name: {:?}", token);
}
}
}
fn expect_identifier(identifier: &Token) {
match identifier {
&Token::Identifier(_) => {
// OK
},
_ => {
panic!("Expected identifier, had \"{:?}\"", identifier);
}
}
}
fn expect_symbol(token: &Token, symbol: char) {
match token {
&Token::Symbol(actual_symbol) if actual_symbol == symbol => {
// OK
},
_ => {
panic!("Expected symbol {}, had \"{:?}\"", symbol, token);
}
}
}
/*
impl<W> JackParser<W: Write> {
}*/
// ------------------------------------------------
// Main entry point
// ------------------------------------------------
// Refs:
// http://nand2tetris.org/10.php
// http://www.cs.huji.ac.il/course/2002/nand2tet/docs/ch_9_jack.pdf
// http://www1.idc.ac.il/tecs/book/chapter10.pdf
fn main() {
let mut args = env::args();
match args.len() {
2 => {
// TODO file or directory
},
3 => {
let command = args.nth(1).unwrap();
let input = args.next().unwrap();
match command.as_ref() {
"tokenize" => {
let reader = JackTokenizer::new(&input).unwrap();
let input_path = Path::new(&input);
let input_name_without_extension = input_path.file_stem().unwrap();
let output_name = format!("{}T.xml", input_name_without_extension.to_str().unwrap());
let mut writer = BufWriter::new(File::create(output_name).unwrap());
writer.write(b"<tokens>\n").unwrap();
for token in reader {
write_token(&mut writer, token).unwrap();
}
writer.write(b"</tokens>\n").unwrap();
},
"parse" => {
let reader = JackTokenizer::new(&input).unwrap();
let input_path = Path::new(&input);
let input_name_without_extension = input_path.file_stem().unwrap();
let output_name = format!("{}.xml", input_name_without_extension.to_str().unwrap());
let mut writer = JackParser::new(reader, &output_name).unwrap();
writer.compile_class(0).unwrap();
},
_ => {
println!("Unknown command: {}", command);
return;
},
}
},
_ => {
println!("Usage: JackAnalyzer command input");
println!("\nCommands:\n");
println!("tokenize Creates a list of tokens from the input Jack file.");
println!("parse Creates a parse tree from the input Jack file.");
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment