Skip to content

Instantly share code, notes, and snippets.

@tiffany352
Created November 21, 2013 13:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save tiffany352/7581497 to your computer and use it in GitHub Desktop.
Save tiffany352/7581497 to your computer and use it in GitHub Desktop.
use std::str::*;
use std::hashmap::*;
pub trait Parser<T>:Clone {
fn parse(&mut self, text: &str, position: uint) -> Result<Token<T>, SyntaxError>;
fn clone_as_owned_trait(&self) -> ~Parser<T>;
}
impl<T> Clone for ~Parser<T> {
fn clone(&self) -> ~Parser<T> {
self.clone_as_owned_trait()
}
}
#[deriving(Clone)]
pub enum Pattern<'self, T> {
// reference into a hash table containing the pattern
Rule(&'self str),
// lexing
Literal(&'self str),
Range(char, char),
Chars(uint),
Set(~[char]),
More(~Pattern<'self, T>),
MoreThan(uint, ~Pattern<'self, T>),
Exactly(uint, ~Pattern<'self, T>),
LessThan(uint, ~Pattern<'self, T>),
Seq(~[Pattern<'self, T>]),
Or(~[Pattern<'self, T>]),
Diff(~Pattern<'self, T>, ~Pattern<'self, T>),
// matches but doesn't consume
And(~Pattern<'self, T>),
Always(T),
// parsing
Match(~Parser<T>),
Build(~Pattern<'self, T>, extern fn(~str) -> Result<T, ~str>),
Map(~Pattern<'self, T>, extern fn(T) -> Result<T, ~str>)
}
#[deriving(Clone)]
pub struct Nonterminal<'self,T> {
ctx: &'self ParseContext<'self,T>,
rule: &'self Pattern<'self,T>
}
impl<'self,T> Nonterminal<'self,T> {
fn new(ctx: &'self ParseContext<'self,T>, rule: &'self str) -> Nonterminal<'self,T> {
Nonterminal {
ctx: ctx,
rule: ctx.grammar.get(&rule)
}
}
}
impl<'self,T:Clone> ToStr for Pattern<'self,T> {
fn to_str(&self) -> ~str {
match self.clone() {
Rule(s) => s.to_owned(),
Literal(s) => format!("\"{}\"", s.to_owned()),
Range(x,y) => format!("%{:x}-{:x}", x as uint, y as uint),
Chars(n) => format!("any*{}", n),
Set(a) => format!("<One of {:s}>", a.to_str()),
More(p) => format!("{:s}*", p.to_str()),
MoreThan(n, p) => format!("{:s}*{:u} {:s}*", p.to_str(), n, p.to_str()),
Exactly(n, p) => format!("{:s}*{:u}", p.to_str(), n),
LessThan(n, p) => format!("{:s}*-{:u}", p.to_str(), n),
Seq(a) => "(" + a.map(|x| x.to_str()).connect(" ") + ")",
Or(a) => "(" + a.map(|x| x.to_str()).connect(" | ") + ")",
Diff(p1, p2) => format!("({:s} - {:s})", p1.to_str(), p2.to_str()),
Build(p, _) => p.to_str(),
Map(p, _) => p.to_str(),
_ => ~"NYI"
}
}
}
pub trait TokenCreator {
fn sequence(~[Token<Self>]) -> Self;
fn raw(~str) -> Self;
}
impl<'self, T:Clone> Mul<~Pattern<'self, T>, ~Pattern<'self, T>> for ~Pattern<'self, T> {
fn mul(&self, rhs: &~Pattern<'self, T>) -> ~Pattern<'self, T> {
match (*self.clone(), *rhs.clone()) {
(Seq(x), y ) => ~Seq(x + ~[y]),
(x, Seq(y) ) => ~Seq(~[x] + y),
(x, y ) => ~Seq(~[x, y])
}
}
}
impl<'self, T:Clone> Add<~Pattern<'self, T>, ~Pattern<'self, T>> for ~Pattern<'self, T> {
fn add(&self, rhs: &~Pattern<'self, T>) -> ~Pattern<'self, T> {
match (*self.clone(), *rhs.clone()) {
(Or(x), y ) => ~Or(x + ~[y]),
(x, Or(y) ) => ~Or(~[x] + y),
(x, y ) => ~Or(~[x, y])
}
}
}
impl<'self, T:Clone> Index<int, ~Pattern<'self, T>> for ~Pattern<'self, T> {
fn index(&self, rhs: &int) -> ~Pattern<'self, T> {
match (*rhs) {
0 => ~More(self.clone()),
x if x < 0 => ~LessThan(-x as uint, self.clone()),
x => ~MoreThan(x as uint, self.clone())
}
}
}
#[deriving(Clone)]
pub struct LineInfo {
line: int,
startcol: uint,
endcol: uint,
startslice: uint,
endslice: uint
}
impl LineInfo {
pub fn new(text: &str, start: uint, end: uint) -> LineInfo {
fn compute_line(text: &str, max: uint) -> (int, uint) {
let mut line = 0;
let mut offset = 0;
let mut temp = 0;
for c in text.iter() {
if c == '\n' {
line += 1;
offset += temp;
temp = 0;
}
temp += 1;
if temp >= max {
break;
}
}
(line, offset)
}
let (line, offset) = compute_line(text, start);
LineInfo {line: line, startcol: start-offset, endcol: end-offset, startslice: start, endslice: end}
}
}
impl ToStr for LineInfo {
fn to_str(&self) -> ~str {
match (self.line, self.startcol, self.endcol) {
(0, -1, -1) => ~"",
(0, s, e) => format!("[{:u}:{:u}]", s, e),
(l, -1, -1) => format!("[line {:i}]", l),
(l, s, e) => format!("[line {:i} @ {:u}:{:u}]", l, s, e),
}
}
}
#[deriving(Clone)]
pub struct Token<T> {
value: T,
line: LineInfo
}
pub struct SyntaxError {
pats: ~[~str],
instead: Option<~str>,
user_msg: Option<~str>,
line: LineInfo,
is_malformed: bool
}
impl ToStr for SyntaxError {
fn to_str(&self) -> ~str {
fn pretty_arr(arr: ~[~str]) -> ~str {
let len = arr.len();
let mut s = arr[0].clone();
if len > 2 {
for i in range(1, len - 2) {
s.push_str(", ");
s.push_str(arr[i]);
}
}
if len > 1 {
s + ", or " + *arr.last()
} else {
s
}
}
let mut err = self.line.to_str();
err.push_str(format!(" Expected {}", pretty_arr(self.pats.clone())));
match self.instead.clone() {
Some(x) => err.push_str(format!(", got {}", x)),
None => ()
};
match self.user_msg.clone() {
Some(x) => err.push_str(format!(": {}", x)),
None => ()
};
err
}
}
pub struct ParseContext<'self, T> {
grammar: HashMap<&'self str, Pattern<'self, T>>,
variables: HashMap<~str, Token<T>>,
}
impl<'self, T> ParseContext<'self, T> {
pub fn new() -> ParseContext<'self, T> {
ParseContext {grammar: HashMap::new(), variables: HashMap::new()}
}
pub fn rule(&mut self, name: &'self str, rule: ~Pattern<'self, T>) {
self.grammar.insert(name, *rule);
}
}
impl<'self, T:'static+Clone+TokenCreator> Parser<T> for Nonterminal<'self, T> {
fn clone_as_owned_trait(&self) -> ~Parser<T> {
~self.clone() as ~Parser<T>
}
fn parse<'a,'b>(&mut self, text: &str, position: uint) -> Result<Token<T>, SyntaxError> {
let tok: &fn(uint, uint) -> Result<Token<T>, SyntaxError> = |start, end| {
Ok(Token {value: TokenCreator::raw(text.slice(start, end).to_owned()), line: LineInfo::new(text, start+position, end+position)})
};
let seq: &fn(~[Token<T>], uint, uint) -> Result<Token<T>, SyntaxError> = |children, start, end| {
Ok(Token {value: TokenCreator::sequence(children), line: LineInfo::new(text, start+position, end+position)})
};
let err = |name, instead, start:uint, end:uint, is_malformed| {
Err(SyntaxError {pats: ~[name], instead: instead, user_msg: None, line: LineInfo::new(text, start+position, end+position), is_malformed: is_malformed})
};
match *self.rule {
Rule(name) => Nonterminal::new(self.ctx, name).parse(text, position),
Literal(s) => {
if text.len() >= s.len() && text.slice_chars(0, s.char_len()) == s {
tok(0, s.len())
} else {
err(s.to_owned(), None, 0, s.len(), false)
}
}
Range(x, y) => {
if text.char_len() < 1 {
return err(format!("Character between {:c} and {:c}", x, y), Some(~"EOF"), 0, 1, false)
}
let CharRange{ch, next} = text.char_range_at(0);
if ch <= y && ch >= x {
tok(0, next)
} else {
err(format!("Character between {:c} and {:c}", x, y), None, 0, 1, false)
}
}
Chars(n) => {
if text.char_len() >= n {
tok(0, text.slice_chars(0, n).len())
} else {
err(format!("{:u} characters", n), Some(~"EOF"), 0, n, false)
}
}
Set(ref arr) => {
if text.char_len() < 1 {
return err(format!("One of {:?}", from_chars(*arr)), Some(~"EOF"), 0, 1, false)
}
let CharRange{ch, next} = text.char_range_at(0);
for elem in arr.iter() {
if *elem == ch {
return tok(0, next)
}
}
err(format!("One of {:?}", from_chars(*arr)), None, 0, 1, false)
}
More(ref p) => {
let mut acc = 0;
let mut res = ~[];
while acc <= text.len() {
match Nonterminal {ctx: self.ctx, rule: *p}.parse(text.slice_from(acc), acc + position) {
Ok(x) => {
acc = x.line.endslice - position;
res.push(x);
}
Err(e) => if e.is_malformed {
return Err(e)
} else {
break
}
}
}
seq(res, 0, acc)
}
MoreThan(n, ref p) => {
let mut acc = 0;
let mut res = ~[];
for _ in range(0, n) {
match Nonterminal {ctx: self.ctx, rule: *p}.parse(text.slice_from(acc), acc + position) {
Ok(x) => {
acc = x.line.endslice - position;
res.push(x);
}
Err(x) => return Err(x)
}
}
while acc <= text.len() {
match Nonterminal {ctx: self.ctx, rule: *p}.parse(text.slice_from(acc), acc + position) {
Ok(x) => {
acc = x.line.endslice - position;
res.push(x);
}
Err(e) => if e.is_malformed {
return Err(e)
} else {
break
}
}
}
seq(res, 0, acc)
}
Exactly(n, ref p) => {
let mut acc = 0;
let mut res = ~[];
for _ in range(0, n) {
match Nonterminal {ctx: self.ctx, rule: *p}.parse(text.slice_from(acc), acc + position) {
Ok(x) => {
acc = x.line.endslice - position;
res.push(x);
}
Err(x) => return Err(x)
}
}
seq(res, 0, acc)
}
LessThan(n, ref p) => {
let mut acc = 0;
let mut res = ~[];
for _ in range(0, n) {
match Nonterminal {ctx: self.ctx, rule: *p}.parse(text.slice_from(acc), acc + position) {
Ok(x) => {
acc = x.line.endslice - position;
res.push(x);
}
Err(e) => if e.is_malformed {
return Err(e)
}
}
}
seq(res, 0, acc)
}
Seq(ref arr) => {
let mut acc = 0;
let mut res = ~[];
for elem in arr.iter() {
match Nonterminal {ctx: self.ctx, rule: elem}.parse(text.slice_from(acc), position + acc) {
Ok(x) => {
acc = x.line.endslice - position;
res.push(x);
}
Err(x) => if res.len() < 1 {
return Err(x)
} else {
return Err(SyntaxError {pats: x.pats.clone(), instead: x.instead.clone(), user_msg: x.user_msg.clone(), line: x.line.clone(), is_malformed: true})
}
}
}
seq(res, 0, acc)
}
Or(ref arr) => {
let mut malformed = None;
for elem in arr.iter() {
match Nonterminal {ctx: self.ctx, rule: elem}.parse(text, position) {
Ok(x) => return Ok(x),
Err(x) => if x.is_malformed && malformed.is_none() {
malformed = Some(x)
},
}
}
match malformed {
Some(e) => Err(e),
None => err(self.rule.to_str(), None, 0, text.len(), false)
}
}
Diff(ref a, ref b) => match Nonterminal {ctx: self.ctx, rule: *b}.parse(text, position) {
Ok(_) => err(format!("Not {:?}",b), None, 0, text.len(), false),
Err(_) => Nonterminal {ctx: self.ctx, rule: *a}.parse(text, position)
},
And(ref p) => match Nonterminal {ctx: self.ctx, rule: *p}.parse(text, position) {
Ok(x) => Ok(Token {value: x.value, line: LineInfo::new(text, position, position)}),
Err(x) => Err(x)
},
Always(ref v) => Ok(Token {value: v.clone(), line: LineInfo::new(text, position, position)}),
Match(ref p) => p.parse(text, position),
Build(ref p, ref f) => match Nonterminal {ctx: self.ctx, rule: *p}.parse(text, position) {
Ok(x) => match (*f)(text.slice(x.line.startslice-position, x.line.endslice-position).to_owned()) {
Ok(v) => {
Ok(Token {value: v, line: x.line})
}
Err(s) => {
Err(SyntaxError {pats: ~[format!("{:?}", p)], instead: None, user_msg: Some(s), line: x.line, is_malformed: true})
}
},
Err(x) => Err(x)
},
Map(ref p, ref f) => match Nonterminal {ctx: self.ctx, rule: *p}.parse(text, position) {
Ok(x) => match (*f)(x.value.clone()) {
Ok(v) => Ok(Token {value: v, line: x.line}),
Err(s) => {
Err(SyntaxError {pats: ~[format!("{:?}", p)], instead: None, user_msg: Some(s), line: x.line, is_malformed: true})
}
},
Err(x) => Err(x)
},
//_ => fail!("NYI")
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment