Created
August 12, 2015 02:02
-
-
Save m4rw3r/a91f75ce06378cc5a190 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
mod parser { | |
use std::fmt::Debug; | |
#[derive(Debug)] | |
pub enum Error<T> { | |
DidNotSatisfy(T) | |
} | |
#[derive(Debug)] | |
enum State<T, E> { | |
Ok(T), | |
Err(E), | |
Incomplete | |
} | |
use self::State::*; | |
impl<T, E> State<T, E> { | |
fn map<F, U>(self, f: F) -> State<U, E> | |
where F: Fn(T) -> U { | |
match self { | |
Ok(t) => Ok(f(t)), | |
Err(t) => Err(t), | |
Incomplete => Incomplete | |
} | |
} | |
fn ok(&self) -> bool { | |
match *self { | |
Ok(_) => true, | |
_ => false, | |
} | |
} | |
} | |
#[derive(Debug)] | |
pub struct Parser<'a, I: 'a + Copy, T, E>(&'a [I], State<T, E>); | |
impl<'a, I: Copy> Parser<'a, I, (), Error<I>> { | |
pub fn new(i: &'a [I]) -> Self { | |
Parser(i, Ok(())) | |
} | |
} | |
// Functor | |
impl<'a, I: 'a + Copy, T, E> Parser<'a, I, T, E> { | |
pub fn map<F, U>(self, f: F) -> Parser<'a, I, U, E> | |
where F: Fn(T) -> U { | |
Parser(self.0, self.1.map(f)) | |
} | |
} | |
// Monad | |
impl<'a, I: 'a + Copy, T, E> Parser<'a, I, T, E> { | |
pub fn bind<F, U: Copy>(self, f: F) -> Parser<'a, I, U, E> | |
where F: Fn(T, Parser<'a, I, (), E>) -> Parser<'a, I, U, E> { | |
match self.1 { | |
Ok(s) => { | |
let r = f(s, Parser(self.0, Ok(()))); | |
Parser(if r.1.ok() { r.0 } else { self.0 }, r.1) | |
}, | |
Err(e) => Parser(self.0, Err(e)), | |
Incomplete => Parser(self.0, Incomplete) | |
} | |
} | |
pub fn ret<U>(self, v: U) -> Parser<'a, I, U, E> { | |
Parser(self.0, Ok(v)) | |
} | |
pub fn err<F, U>(self, e: F) -> Parser<'a, I, U, F> { | |
Parser(self.0, Err(e)) | |
} | |
} | |
// Getters | |
impl<'a, I: 'a + Copy, T, E> Parser<'a, I, T, E> { | |
/// Ensures that at least ``n`` items are present and returns a slice of | |
/// items from the current input. | |
fn ensure(self, n: usize) -> Parser<'a, I, &'a [I], Error<I>> { | |
if self.0.len() >= n { | |
Parser(self.0, Ok(&self.0[..n])) | |
} else { | |
Parser(self.0, Incomplete) | |
} | |
} | |
pub fn peek(self) -> Parser<'a, I, I, Error<I>> { | |
if self.0.len() > 0 { | |
Parser(self.0, Ok(self.0[0])) | |
} else { | |
Parser(self.0, Incomplete) | |
} | |
} | |
pub fn get(self) -> Parser<'a, I, I, Error<I>> { | |
if self.0.len() > 0 { | |
let (d, r) = self.0.split_at(1); | |
Parser(r, Ok(d[0])) | |
} else { | |
Parser(self.0, Incomplete) | |
} | |
} | |
fn advance(self, n: usize) -> Parser<'a, I, T, E> { | |
Parser(&self.0[n..], self.1) | |
} | |
pub fn satisfy<F>(self, f: F) -> Parser<'a, I, I, Error<I>> | |
where F: Fn(I) -> bool { | |
self.peek().bind(|v, p| | |
if f(v) { | |
p.advance(1) | |
.ret(v) | |
} else { | |
p.err(Error::DidNotSatisfy(v)) | |
} | |
) | |
} | |
} | |
// Combinators | |
impl<'a, I: 'a + Copy + Debug, T: Debug, E: Debug> Parser<'a, I, T, E> { | |
pub fn or<F>(self, f: F) -> Self | |
where F: Fn(Parser<'a, I, (), E>) -> Parser<'a, I, T, E> { | |
match self.1 { | |
Ok(_) => self, | |
Err(_) => f(Parser(self.0, Ok(()))), | |
Incomplete => self, | |
} | |
} | |
} | |
} | |
use parser::Parser; | |
fn ascii<'a>(p: Parser<'a, u8, (), parser::Error<u8>>) -> Parser<'a, u8, u32, parser::Error<u8>> { | |
p.satisfy(|c| c < 128).map(|c| c as u32) | |
} | |
fn trailing<'a>(a: u32, p: Parser<'a, u8, (), parser::Error<u8>>) -> Parser<'a, u8, u32, parser::Error<u8>> { | |
p.satisfy(|c| c & 0b11000000 == 0b10000000).map(|c| (a << 6) + (c & 0b00111111) as u32) | |
} | |
fn twobyte<'a>(p: Parser<'a, u8, (), parser::Error<u8>>) -> Parser<'a, u8, u32, parser::Error<u8>> { | |
p.satisfy(|c| (c & 0b11100000) == 0b11000000).map(|c| (c & 0b000111111) as u32) | |
.bind(trailing) | |
} | |
fn threebyte<'a>(p: Parser<'a, u8, (), parser::Error<u8>>) -> Parser<'a, u8, u32, parser::Error<u8>> { | |
p.satisfy(|c| c & 0b11110000 == 0b1110000).map(|c| (c & 0b000111111) as u32) | |
.bind(trailing) | |
.bind(trailing) | |
} | |
fn parse_utf8<'a>(p: Parser<'a, u8, (), parser::Error<u8>>) -> Parser<'a, u8, u32, parser::Error<u8>> { | |
ascii(p) | |
.or(twobyte) | |
.or(threebyte) | |
} | |
/* | |
Type inferrence problems: | |
fn parse_utf8_inline<'a>(p: Parser<'a, u8, (), parser::Error<u8>>) -> Parser<'a, u8, u32, parser::Error<u8>> { | |
let ascii = |p| | |
p.satisfy(|c| c < 128) | |
.map(|c| c as u32); | |
let tail = |a, p| | |
p.satisfy(|c| c & 0b11000000 == 0b10000000) | |
.map(|c| (a << 6) + (c & 0b00111111) as u32); | |
let twobyte = |p| | |
p.satisfy(|c| (c & 0b11100000) == 0b11000000) | |
.map(|c| (c & 0b000111111) as u32) | |
.bind(trailing); | |
let threebyte = |p| | |
p.satisfy(|c| c & 0b11110000 == 0b1110000) | |
.map(|c| (c & 0b000111111) as u32) | |
.bind(trailing) | |
.bind(trailing); | |
ascii(p) | |
.or(twobyte) | |
.or(threebyte) | |
} | |
*/ | |
fn parse_utf8_inline<'a>(p: Parser<'a, u8, (), parser::Error<u8>>) -> Parser<'a, u8, u32, parser::Error<u8>> { | |
let tail = |a: u32, p: Parser<'a, u8, (), parser::Error<u8>>| | |
p.satisfy(|c| c & 0b11000000 == 0b10000000) | |
.map(|c| (a << 6) + (c & 0b00111111) as u32); | |
p.satisfy(|c| c < 128) | |
.map(|c| c as u32) | |
.or(|p| // twobyte | |
p.satisfy(|c| (c & 0b11100000) == 0b11000000) | |
.map(|c| (c & 0b000111111) as u32) | |
.bind(trailing)) | |
.or(|p| // threebyte | |
p.satisfy(|c| c & 0b11110000 == 0b1110000) | |
.map(|c| (c & 0b000111111) as u32) | |
.bind(trailing) | |
.bind(trailing)) | |
} | |
fn main() { | |
let data = "ä-123".as_bytes(); | |
let p = Parser::new(data); | |
parse_utf8(p).map(|c| println!("{:x}", c)); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment