Created
June 28, 2013 22:13
-
-
Save rubber-duck/5888555 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std::vec; | |
use std::str; | |
use std::char; | |
use std::iterator; | |
use std::num::strconv; | |
use token; | |
struct Scanner<'self> { | |
source: &'self str, | |
next: uint, | |
token_start: uint, | |
tokens: ~[token::Token<'self>] | |
} | |
impl<'self> Scanner<'self> { | |
/// | |
fn peek_char(&self) -> Option<char> { | |
if self.next >= self.source.len() { | |
None | |
} else { | |
Some(self.source.char_range_at(self.next).ch) | |
} | |
} | |
/// | |
fn peek_char_with_offset(&self, offset_chars: uint) -> Option<char> { | |
let mut end = self.next; | |
for offset_chars.times { | |
if end >= self.source.len() { | |
return None | |
} | |
end = self.source.char_range_at(end).next | |
} | |
Some(self.source.char_range_at(end).ch) | |
} | |
/// | |
fn read_char(&mut self) -> Option<char> { | |
if self.next >= self.source.len() { | |
None | |
} else { | |
let str::CharRange { next, ch } = self.source.char_range_at(self.next); | |
self.next = next; | |
Some(ch) | |
} | |
} | |
/// | |
fn read_chars(&mut self, count: uint) -> Option<&'self str> { | |
let start = self.next; | |
let mut end = self.next; | |
for count.times { | |
if end >= self.source.len() { | |
return None | |
} | |
end = self.source.char_range_at(end).next | |
} | |
self.next = end; | |
Some(self.source.slice(start, end)) | |
} | |
/// Move self.next forward by specified number of chars | |
fn advance(&mut self, chars: uint) { | |
for chars.times { | |
if self.next >= self.source.len() { | |
return; | |
} | |
self.next = self.source.char_range_at(self.next).next; | |
} | |
} | |
/// Advance the reader to next non-whitespace character | |
fn skip_whitespace(&mut self) { | |
let mut range = self.source.char_range_at(self.next); | |
while char::is_whitespace(range.ch) { | |
self.next = range.next; | |
range = self.source.char_range_at(range.next); | |
} | |
} | |
/// Advance the reader to next whitespace character | |
fn skip_to_whitespace(&mut self) { | |
let mut range = self.source.char_range_at(self.next); | |
while !char::is_whitespace(range.ch) { | |
self.next = range.next; | |
range = self.source.char_range_at(range.next); | |
} | |
} | |
/// Push a Token with TokenValue and (current location - token start) as length | |
fn push_token(&mut self, value: token::TokenValue<'self>) { | |
let length = self.next - self.token_start; | |
self.token_start = self.next; | |
self.tokens.push(token::Token { | |
length: length, | |
value: value | |
}); | |
} | |
/// Advance current location by offset an push a Token with TokenValue and (current location - token start) as length | |
fn push_token_with_offset(&mut self, value: token::TokenValue<'self>, offset_chars: uint) { | |
self.advance(offset_chars); | |
self.push_token(value); | |
} | |
/// | |
fn push_error(&mut self, error: token::TokenError) { | |
let length = self.next - self.token_start; | |
self.token_start = self.next | |
} | |
fn push_error_with_offset(&mut self, error: token::TokenError, offset_chars: uint) { | |
} | |
} | |
fn is_identifier_start(ch : char) -> bool { | |
} | |
fn is_identifier_body(ch : char) -> bool { | |
} | |
fn scan_identifier<'a>(scanner: &mut Scanner<'a>) { | |
} | |
fn scan_lifetime_identifier<'a>(scanner: &mut Scanner<'a>) { | |
} | |
fn scan_numeric_literal<'a>(scanner: &mut Scanner<'a>) { | |
} | |
/// | |
fn scan_char_literal<'a>(scanner : &mut Scanner<'a>) { | |
// opening ' char has already been recongnized before this function called | |
match scanner.read_char() { | |
Some('\\') => match scanner.read_char() { | |
Some('\\') => scanner.push_token(token::Char('\\')), | |
Some('\'') => scanner.push_token(token::Char('\'')), | |
Some('n') => scanner.push_token(token::Char('\n')), | |
Some('t') => scanner.push_token(token::Char('\t')), | |
Some('r') => scanner.push_token(token::Char('\r')), | |
Some(esc) if esc == 'x' || esc == 'u' || esc == 'U' => { | |
let chlen : uint = match esc { | |
'x' => 2, | |
'u' => 4, | |
'U' => 8 | |
}; | |
if scanner.peek_char_with_offset(chlen) == Some('\'') { | |
match strconv::from_str_common::<u32>( | |
scanner.source.slice(scanner.next, scanner.next + chlen), | |
16, false, false, false, strconv::ExpNone, false, false) { | |
Some(c) => scanner.push_token_with_offset(token::Char(c as char), chlen + 1), | |
_ => scanner.push_error_with_offset(token::CharLiteralInvalid, chlen + 1) | |
} | |
} else { | |
scanner.skip_to_whitespace(); | |
scanner.push_error(token::CharLiteralUnterminated) | |
} | |
} | |
}, | |
Some(c) if scanner.peek_char_with_offset(1) == Some('\'') => scanner.push_token_with_offset(token::Char(c), 1), | |
_ => { | |
scanner.skip_to_whitespace(); | |
scanner.push_error(token::CharLiteralInvalid); | |
} | |
} | |
} | |
fn scan_string_literal<'a>(scanner: &mut Scanner<'a>) { | |
} | |
/// | |
fn scan<'a>(source: &'a str) -> ~[token::Token<'a>] { | |
let mut scanner = Scanner { | |
source: source, | |
next: 0, | |
token_start: 0, | |
tokens: ~[] | |
}; | |
// read source from scanner until end | |
scanner.skip_whitespace(); | |
do scanner.read_char().while_some |ch| { | |
match ch { | |
// Bracket delimiters | |
'(' => scanner.push_token(token::OpenParenthesis), | |
')' => scanner.push_token(token::CloseParenthesis), | |
'{' => scanner.push_token(token::OpenBrace), | |
'}' => scanner.push_token(token::CloseBrace), | |
'[' => scanner.push_token(token::OpenBracket), | |
']' => scanner.push_token(token::CloseBracket), | |
// Single character symbols | |
'@' => scanner.push_token(token::At), | |
'~' => scanner.push_token(token::Tilde), | |
',' => scanner.push_token(token::Comma), | |
';' => scanner.push_token(token::Semicolon), | |
'$' => scanner.push_token(token::Dollar), | |
// Multi character symbol matching | |
// . .. | |
'.' => match scanner.peek_char() { | |
Some('.') => scanner.push_token_with_offset(token::DotDot, 1), | |
_ => scanner.push_token(token::Dot) | |
}, | |
// : :: | |
':' => match scanner.peek_char() { | |
Some(':') => scanner.push_token_with_offset(token::ColonColon, 1), | |
_ => scanner.push_token(token::Colon) | |
}, | |
// = => == | |
'=' => match scanner.peek_char() { | |
Some('>') => scanner.push_token_with_offset(token::FatArrow, 1), | |
Some('=') => scanner.push_token_with_offset(token::EqualEqual, 1), | |
_ => scanner.push_token(token::Equal) | |
}, | |
// ! != | |
'!' => match scanner.peek_char() { | |
Some('=') => scanner.push_token_with_offset(token::NotEqual, 1), | |
_ => scanner.push_token(token::Not) | |
}, | |
// < <= << <<= | |
'<' => match scanner.peek_char() { | |
Some('<') => match scanner.peek_char_with_offset(1) { | |
Some('=') => scanner.push_token_with_offset(token::ShiftLeftEqual, 2), | |
_ => scanner.push_token_with_offset(token::ShiftLeft, 1) | |
}, | |
Some('=') => scanner.push_token_with_offset(token::LessEqual), | |
_ => scanner.push_token(token::Less) | |
}, | |
// > >= >> >>= | |
'>' => match scanner.peek_char() { | |
Some('>') => match scanner.peek_char_with_offset(1) { | |
Some('=') => scanner.push_token_with_offset(token::ShiftRightEqual, 2), | |
_ => scanner.push_token_with_offset(token::ShiftRight, 1) | |
}, | |
Some('=') => scanner.push_token_with_offset(token::GreaterEqual, 1), | |
_ => scanner.push_token(token::Greater) | |
}, | |
// & && &= | |
'&' => match scanner.peek_char() { | |
Some('&') => scanner.push_token_with_offset(token::AndAnd, 1), | |
Some('=') => scanner.push_token_with_offset(token::AndEqual, 1), | |
_ => scanner.push_token(token::And) | |
}, | |
// | || |= | |
'|' => match scanner.peek_char() { | |
Some('|') => scanner.push_token_with_offset(token::OrOr, 1), | |
Some('=') => scanner.push_token_with_offset(token::OrEqual, 1), | |
_ => scanner.push_token(token::Or) | |
}, | |
// + += | |
'+' => match scanner.peek_char() { | |
Some('=') => scanner.push_token_with_offset(token::PlusEqual, 1), | |
_ => scanner.push_token(token::Plus) | |
}, | |
// - -> -= | |
'-' => match scanner.peek_char() { | |
Some('>') => scanner.push_token_with_offset(token::RightArrow, 1), | |
Some('=') => scanner.push_token_with_offset(token::MinusEqual, 1), | |
_ => scanner.push_token(token::Minus) | |
}, | |
// * *= | |
'*' => match scanner.peek_char() { | |
Some('=') => scanner.push_token_with_offset(token::StarEqual, 1), | |
_ => scanner.push_token(token::Star) | |
}, | |
// / /= /* // | |
'/' => match scanner.peek_char() { | |
Some('=') => scanner.push_token_with_offset(token::SlashEqual, 1), | |
_ => scanner.push_token(token::Slash) | |
}, | |
// % %= | |
'%' => match scanner.peek_char() { | |
Some('=') => scanner.push_token_with_offset(token::PercentEqual, 1), | |
_ => scanner.push_token(token::Percent) | |
}, | |
// ^ ^= | |
'^' => match scanner.peek_char() { | |
Some('=') => scanner.push_token_with_offset(token::CaretEqual, 1), | |
_ => scanner.push_token(token::Caret) | |
}, | |
// Character literal or a lifetime identifier | |
'\'' => { | |
if scanner.peek_char() == Some('\\') || | |
scanner.peek_char_with_offset(2) == Some('\'') { | |
scan_char_literal(scanner); | |
} else { | |
scan_lifetime_identifier(scanner); | |
} | |
}, | |
// String literal parser | |
'"' => scan_string_literal(scanner), | |
// Numeric literal parser | |
d if char::is_digit_radix(d, 10) => scan_numeric_literal(scanner, d), | |
// Identifier parser | |
ids if is_identifier_start(ids) => scan_identifier(scanner, ids) | |
} | |
// skip whitespace and pass next non whitespace char to the next loop iteration | |
scanner.skip_whitespace(); | |
scanner.read_char() | |
} | |
// return scanned tokens | |
scanner.tokens | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
scanner.rs:183:7: 183:27 error: failed to find an implementation of trait std::io::Reader for scanner::Scanner<> | |
scanner.rs:183 do scanner.read_char().while_some |ch| { |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment