Skip to content

Instantly share code, notes, and snippets.

@m4rw3r
Created August 12, 2015 02:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save m4rw3r/a91f75ce06378cc5a190 to your computer and use it in GitHub Desktop.
Save m4rw3r/a91f75ce06378cc5a190 to your computer and use it in GitHub Desktop.
mod parser {
use std::fmt::Debug;
#[derive(Debug)]
pub enum Error<T> {
DidNotSatisfy(T)
}
#[derive(Debug)]
enum State<T, E> {
Ok(T),
Err(E),
Incomplete
}
use self::State::*;
impl<T, E> State<T, E> {
fn map<F, U>(self, f: F) -> State<U, E>
where F: Fn(T) -> U {
match self {
Ok(t) => Ok(f(t)),
Err(t) => Err(t),
Incomplete => Incomplete
}
}
fn ok(&self) -> bool {
match *self {
Ok(_) => true,
_ => false,
}
}
}
#[derive(Debug)]
pub struct Parser<'a, I: 'a + Copy, T, E>(&'a [I], State<T, E>);
impl<'a, I: Copy> Parser<'a, I, (), Error<I>> {
pub fn new(i: &'a [I]) -> Self {
Parser(i, Ok(()))
}
}
// Functor
impl<'a, I: 'a + Copy, T, E> Parser<'a, I, T, E> {
pub fn map<F, U>(self, f: F) -> Parser<'a, I, U, E>
where F: Fn(T) -> U {
Parser(self.0, self.1.map(f))
}
}
// Monad
impl<'a, I: 'a + Copy, T, E> Parser<'a, I, T, E> {
pub fn bind<F, U: Copy>(self, f: F) -> Parser<'a, I, U, E>
where F: Fn(T, Parser<'a, I, (), E>) -> Parser<'a, I, U, E> {
match self.1 {
Ok(s) => {
let r = f(s, Parser(self.0, Ok(())));
Parser(if r.1.ok() { r.0 } else { self.0 }, r.1)
},
Err(e) => Parser(self.0, Err(e)),
Incomplete => Parser(self.0, Incomplete)
}
}
pub fn ret<U>(self, v: U) -> Parser<'a, I, U, E> {
Parser(self.0, Ok(v))
}
pub fn err<F, U>(self, e: F) -> Parser<'a, I, U, F> {
Parser(self.0, Err(e))
}
}
// Getters
impl<'a, I: 'a + Copy, T, E> Parser<'a, I, T, E> {
/// Ensures that at least ``n`` items are present and returns a slice of
/// items from the current input.
fn ensure(self, n: usize) -> Parser<'a, I, &'a [I], Error<I>> {
if self.0.len() >= n {
Parser(self.0, Ok(&self.0[..n]))
} else {
Parser(self.0, Incomplete)
}
}
pub fn peek(self) -> Parser<'a, I, I, Error<I>> {
if self.0.len() > 0 {
Parser(self.0, Ok(self.0[0]))
} else {
Parser(self.0, Incomplete)
}
}
pub fn get(self) -> Parser<'a, I, I, Error<I>> {
if self.0.len() > 0 {
let (d, r) = self.0.split_at(1);
Parser(r, Ok(d[0]))
} else {
Parser(self.0, Incomplete)
}
}
fn advance(self, n: usize) -> Parser<'a, I, T, E> {
Parser(&self.0[n..], self.1)
}
pub fn satisfy<F>(self, f: F) -> Parser<'a, I, I, Error<I>>
where F: Fn(I) -> bool {
self.peek().bind(|v, p|
if f(v) {
p.advance(1)
.ret(v)
} else {
p.err(Error::DidNotSatisfy(v))
}
)
}
}
// Combinators
impl<'a, I: 'a + Copy + Debug, T: Debug, E: Debug> Parser<'a, I, T, E> {
pub fn or<F>(self, f: F) -> Self
where F: Fn(Parser<'a, I, (), E>) -> Parser<'a, I, T, E> {
match self.1 {
Ok(_) => self,
Err(_) => f(Parser(self.0, Ok(()))),
Incomplete => self,
}
}
}
}
use parser::Parser;
fn ascii<'a>(p: Parser<'a, u8, (), parser::Error<u8>>) -> Parser<'a, u8, u32, parser::Error<u8>> {
p.satisfy(|c| c < 128).map(|c| c as u32)
}
fn trailing<'a>(a: u32, p: Parser<'a, u8, (), parser::Error<u8>>) -> Parser<'a, u8, u32, parser::Error<u8>> {
p.satisfy(|c| c & 0b11000000 == 0b10000000).map(|c| (a << 6) + (c & 0b00111111) as u32)
}
fn twobyte<'a>(p: Parser<'a, u8, (), parser::Error<u8>>) -> Parser<'a, u8, u32, parser::Error<u8>> {
p.satisfy(|c| (c & 0b11100000) == 0b11000000).map(|c| (c & 0b000111111) as u32)
.bind(trailing)
}
fn threebyte<'a>(p: Parser<'a, u8, (), parser::Error<u8>>) -> Parser<'a, u8, u32, parser::Error<u8>> {
p.satisfy(|c| c & 0b11110000 == 0b1110000).map(|c| (c & 0b000111111) as u32)
.bind(trailing)
.bind(trailing)
}
fn parse_utf8<'a>(p: Parser<'a, u8, (), parser::Error<u8>>) -> Parser<'a, u8, u32, parser::Error<u8>> {
ascii(p)
.or(twobyte)
.or(threebyte)
}
/*
Type inferrence problems:
fn parse_utf8_inline<'a>(p: Parser<'a, u8, (), parser::Error<u8>>) -> Parser<'a, u8, u32, parser::Error<u8>> {
let ascii = |p|
p.satisfy(|c| c < 128)
.map(|c| c as u32);
let tail = |a, p|
p.satisfy(|c| c & 0b11000000 == 0b10000000)
.map(|c| (a << 6) + (c & 0b00111111) as u32);
let twobyte = |p|
p.satisfy(|c| (c & 0b11100000) == 0b11000000)
.map(|c| (c & 0b000111111) as u32)
.bind(trailing);
let threebyte = |p|
p.satisfy(|c| c & 0b11110000 == 0b1110000)
.map(|c| (c & 0b000111111) as u32)
.bind(trailing)
.bind(trailing);
ascii(p)
.or(twobyte)
.or(threebyte)
}
*/
fn parse_utf8_inline<'a>(p: Parser<'a, u8, (), parser::Error<u8>>) -> Parser<'a, u8, u32, parser::Error<u8>> {
let tail = |a: u32, p: Parser<'a, u8, (), parser::Error<u8>>|
p.satisfy(|c| c & 0b11000000 == 0b10000000)
.map(|c| (a << 6) + (c & 0b00111111) as u32);
p.satisfy(|c| c < 128)
.map(|c| c as u32)
.or(|p| // twobyte
p.satisfy(|c| (c & 0b11100000) == 0b11000000)
.map(|c| (c & 0b000111111) as u32)
.bind(trailing))
.or(|p| // threebyte
p.satisfy(|c| c & 0b11110000 == 0b1110000)
.map(|c| (c & 0b000111111) as u32)
.bind(trailing)
.bind(trailing))
}
fn main() {
let data = "ä-123".as_bytes();
let p = Parser::new(data);
parse_utf8(p).map(|c| println!("{:x}", c));
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment