Skip to content

Instantly share code, notes, and snippets.

@rubber-duck
Created June 28, 2013 22:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rubber-duck/5888555 to your computer and use it in GitHub Desktop.
Save rubber-duck/5888555 to your computer and use it in GitHub Desktop.
use std::vec;
use std::str;
use std::char;
use std::iterator;
use std::num::strconv;
use token;
struct Scanner<'self> {
source: &'self str,
next: uint,
token_start: uint,
tokens: ~[token::Token<'self>]
}
impl<'self> Scanner<'self> {
///
fn peek_char(&self) -> Option<char> {
if self.next >= self.source.len() {
None
} else {
Some(self.source.char_range_at(self.next).ch)
}
}
///
fn peek_char_with_offset(&self, offset_chars: uint) -> Option<char> {
let mut end = self.next;
for offset_chars.times {
if end >= self.source.len() {
return None
}
end = self.source.char_range_at(end).next
}
Some(self.source.char_range_at(end).ch)
}
///
fn read_char(&mut self) -> Option<char> {
if self.next >= self.source.len() {
None
} else {
let str::CharRange { next, ch } = self.source.char_range_at(self.next);
self.next = next;
Some(ch)
}
}
///
fn read_chars(&mut self, count: uint) -> Option<&'self str> {
let start = self.next;
let mut end = self.next;
for count.times {
if end >= self.source.len() {
return None
}
end = self.source.char_range_at(end).next
}
self.next = end;
Some(self.source.slice(start, end))
}
/// Move self.next forward by specified number of chars
fn advance(&mut self, chars: uint) {
for chars.times {
if self.next >= self.source.len() {
return;
}
self.next = self.source.char_range_at(self.next).next;
}
}
/// Advance the reader to next non-whitespace character
fn skip_whitespace(&mut self) {
let mut range = self.source.char_range_at(self.next);
while char::is_whitespace(range.ch) {
self.next = range.next;
range = self.source.char_range_at(range.next);
}
}
/// Advance the reader to next whitespace character
fn skip_to_whitespace(&mut self) {
let mut range = self.source.char_range_at(self.next);
while !char::is_whitespace(range.ch) {
self.next = range.next;
range = self.source.char_range_at(range.next);
}
}
/// Push a Token with TokenValue and (current location - token start) as length
fn push_token(&mut self, value: token::TokenValue<'self>) {
let length = self.next - self.token_start;
self.token_start = self.next;
self.tokens.push(token::Token {
length: length,
value: value
});
}
/// Advance current location by offset an push a Token with TokenValue and (current location - token start) as length
fn push_token_with_offset(&mut self, value: token::TokenValue<'self>, offset_chars: uint) {
self.advance(offset_chars);
self.push_token(value);
}
///
fn push_error(&mut self, error: token::TokenError) {
let length = self.next - self.token_start;
self.token_start = self.next
}
fn push_error_with_offset(&mut self, error: token::TokenError, offset_chars: uint) {
}
}
fn is_identifier_start(ch : char) -> bool {
}
fn is_identifier_body(ch : char) -> bool {
}
fn scan_identifier<'a>(scanner: &mut Scanner<'a>) {
}
fn scan_lifetime_identifier<'a>(scanner: &mut Scanner<'a>) {
}
fn scan_numeric_literal<'a>(scanner: &mut Scanner<'a>) {
}
///
fn scan_char_literal<'a>(scanner : &mut Scanner<'a>) {
// opening ' char has already been recongnized before this function called
match scanner.read_char() {
Some('\\') => match scanner.read_char() {
Some('\\') => scanner.push_token(token::Char('\\')),
Some('\'') => scanner.push_token(token::Char('\'')),
Some('n') => scanner.push_token(token::Char('\n')),
Some('t') => scanner.push_token(token::Char('\t')),
Some('r') => scanner.push_token(token::Char('\r')),
Some(esc) if esc == 'x' || esc == 'u' || esc == 'U' => {
let chlen : uint = match esc {
'x' => 2,
'u' => 4,
'U' => 8
};
if scanner.peek_char_with_offset(chlen) == Some('\'') {
match strconv::from_str_common::<u32>(
scanner.source.slice(scanner.next, scanner.next + chlen),
16, false, false, false, strconv::ExpNone, false, false) {
Some(c) => scanner.push_token_with_offset(token::Char(c as char), chlen + 1),
_ => scanner.push_error_with_offset(token::CharLiteralInvalid, chlen + 1)
}
} else {
scanner.skip_to_whitespace();
scanner.push_error(token::CharLiteralUnterminated)
}
}
},
Some(c) if scanner.peek_char_with_offset(1) == Some('\'') => scanner.push_token_with_offset(token::Char(c), 1),
_ => {
scanner.skip_to_whitespace();
scanner.push_error(token::CharLiteralInvalid);
}
}
}
fn scan_string_literal<'a>(scanner: &mut Scanner<'a>) {
}
///
fn scan<'a>(source: &'a str) -> ~[token::Token<'a>] {
let mut scanner = Scanner {
source: source,
next: 0,
token_start: 0,
tokens: ~[]
};
// read source from scanner until end
scanner.skip_whitespace();
do scanner.read_char().while_some |ch| {
match ch {
// Bracket delimiters
'(' => scanner.push_token(token::OpenParenthesis),
')' => scanner.push_token(token::CloseParenthesis),
'{' => scanner.push_token(token::OpenBrace),
'}' => scanner.push_token(token::CloseBrace),
'[' => scanner.push_token(token::OpenBracket),
']' => scanner.push_token(token::CloseBracket),
// Single character symbols
'@' => scanner.push_token(token::At),
'~' => scanner.push_token(token::Tilde),
',' => scanner.push_token(token::Comma),
';' => scanner.push_token(token::Semicolon),
'$' => scanner.push_token(token::Dollar),
// Multi character symbol matching
// . ..
'.' => match scanner.peek_char() {
Some('.') => scanner.push_token_with_offset(token::DotDot, 1),
_ => scanner.push_token(token::Dot)
},
// : ::
':' => match scanner.peek_char() {
Some(':') => scanner.push_token_with_offset(token::ColonColon, 1),
_ => scanner.push_token(token::Colon)
},
// = => ==
'=' => match scanner.peek_char() {
Some('>') => scanner.push_token_with_offset(token::FatArrow, 1),
Some('=') => scanner.push_token_with_offset(token::EqualEqual, 1),
_ => scanner.push_token(token::Equal)
},
// ! !=
'!' => match scanner.peek_char() {
Some('=') => scanner.push_token_with_offset(token::NotEqual, 1),
_ => scanner.push_token(token::Not)
},
// < <= << <<=
'<' => match scanner.peek_char() {
Some('<') => match scanner.peek_char_with_offset(1) {
Some('=') => scanner.push_token_with_offset(token::ShiftLeftEqual, 2),
_ => scanner.push_token_with_offset(token::ShiftLeft, 1)
},
Some('=') => scanner.push_token_with_offset(token::LessEqual),
_ => scanner.push_token(token::Less)
},
// > >= >> >>=
'>' => match scanner.peek_char() {
Some('>') => match scanner.peek_char_with_offset(1) {
Some('=') => scanner.push_token_with_offset(token::ShiftRightEqual, 2),
_ => scanner.push_token_with_offset(token::ShiftRight, 1)
},
Some('=') => scanner.push_token_with_offset(token::GreaterEqual, 1),
_ => scanner.push_token(token::Greater)
},
// & && &=
'&' => match scanner.peek_char() {
Some('&') => scanner.push_token_with_offset(token::AndAnd, 1),
Some('=') => scanner.push_token_with_offset(token::AndEqual, 1),
_ => scanner.push_token(token::And)
},
// | || |=
'|' => match scanner.peek_char() {
Some('|') => scanner.push_token_with_offset(token::OrOr, 1),
Some('=') => scanner.push_token_with_offset(token::OrEqual, 1),
_ => scanner.push_token(token::Or)
},
// + +=
'+' => match scanner.peek_char() {
Some('=') => scanner.push_token_with_offset(token::PlusEqual, 1),
_ => scanner.push_token(token::Plus)
},
// - -> -=
'-' => match scanner.peek_char() {
Some('>') => scanner.push_token_with_offset(token::RightArrow, 1),
Some('=') => scanner.push_token_with_offset(token::MinusEqual, 1),
_ => scanner.push_token(token::Minus)
},
// * *=
'*' => match scanner.peek_char() {
Some('=') => scanner.push_token_with_offset(token::StarEqual, 1),
_ => scanner.push_token(token::Star)
},
// / /= /* //
'/' => match scanner.peek_char() {
Some('=') => scanner.push_token_with_offset(token::SlashEqual, 1),
_ => scanner.push_token(token::Slash)
},
// % %=
'%' => match scanner.peek_char() {
Some('=') => scanner.push_token_with_offset(token::PercentEqual, 1),
_ => scanner.push_token(token::Percent)
},
// ^ ^=
'^' => match scanner.peek_char() {
Some('=') => scanner.push_token_with_offset(token::CaretEqual, 1),
_ => scanner.push_token(token::Caret)
},
// Character literal or a lifetime identifier
'\'' => {
if scanner.peek_char() == Some('\\') ||
scanner.peek_char_with_offset(2) == Some('\'') {
scan_char_literal(scanner);
} else {
scan_lifetime_identifier(scanner);
}
},
// String literal parser
'"' => scan_string_literal(scanner),
// Numeric literal parser
d if char::is_digit_radix(d, 10) => scan_numeric_literal(scanner, d),
// Identifier parser
ids if is_identifier_start(ids) => scan_identifier(scanner, ids)
}
// skip whitespace and pass next non whitespace char to the next loop iteration
scanner.skip_whitespace();
scanner.read_char()
}
// return scanned tokens
scanner.tokens
}
scanner.rs:183:7: 183:27 error: failed to find an implementation of trait std::io::Reader for scanner::Scanner<>
scanner.rs:183 do scanner.read_char().while_some |ch| {
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment