Skip to content

Instantly share code, notes, and snippets.

@ploki
Created September 12, 2024 22:42
Parser Monad Combinators in rust for a json parser
/// Parser Monad Combinators
/// https://www.cs.nott.ac.uk/~pszgmh/monparsing.pdf
//ploki@blackmilk.fr
use std::env;
use std::fs;
use std::ops::{Add, Deref, Shr};
use std::sync::Arc;
#[derive(Clone)]
struct Interpretation<A>(A, String);
#[derive(Clone)]
struct VecInterpretation<A> {
result: Vec<Interpretation<A>>,
}
#[derive(Clone)]
struct Parser<'a, A>
where
A: Clone + 'a,
{
parse: Arc<dyn Fn(String) -> VecInterpretation<A> + 'a>,
}
struct TParser<'a, A, B>
where
A: Clone + 'a,
B: Clone + 'a,
{
t_parse: Arc<dyn Fn(A) -> Parser<'a, B> + 'a>,
}
impl<A> Add for VecInterpretation<A>
where
A: Clone,
{
type Output = VecInterpretation<A>;
fn add(self, other: VecInterpretation<A>) -> VecInterpretation<A> {
let mut result = self.result.clone();
result.extend(other.result.clone());
VecInterpretation { result }
}
}
impl<'a, A> Add for Parser<'a, A>
where
A: Clone + 'a,
{
type Output = Parser<'a, A>;
fn add(self, other: Parser<'a, A>) -> Parser<'a, A> {
Parser::new(move |input: String| self(input.clone()) + other(input))
}
}
impl<'a, A> Parser<'a, A>
where
A: Clone + 'a,
{
fn new<F>(f: F) -> Self
where
F: Fn(String) -> VecInterpretation<A> + 'a,
{
Parser { parse: Arc::new(f) }
}
}
// Parser<A> elements are callable with this
impl<'a, A> Deref for Parser<'a, A>
where
A: Clone + 'a,
{
type Target = dyn Fn(String) -> VecInterpretation<A> + 'a;
fn deref(&self) -> &Self::Target {
&*self.parse
}
}
impl<'a, A, B> TParser<'a, A, B>
where
A: Clone + 'a,
B: Clone + 'a,
{
fn new<F>(f: F) -> Self
where
F: Fn(A) -> Parser<'a, B> + 'a,
{
TParser {
t_parse: Arc::new(f),
}
}
}
// Make TParsers callable
impl<'a, A, B> Deref for TParser<'a, A, B>
where
A: Clone + 'a,
B: Clone + 'a,
{
type Target = dyn Fn(A) -> Parser<'a, B> + 'a;
fn deref(&self) -> &Self::Target {
&*self.t_parse
}
}
// monadic bind. then(parser, tparser)
fn then<'a, A, B>(p: Parser<'a, A>, rhs: TParser<'a, A, B>) -> Parser<'a, B>
where
A: Clone + 'a,
B: Clone + 'a,
{
Parser::new(move |input: String| {
let mut res = VecInterpretation { result: vec![] };
for Interpretation(v, unconsumed) in p(input).result {
res = res + rhs(v)(unconsumed);
}
res
})
}
// syntactic sugar for monadic bind
// parser >> tparser
impl<'a, A, B, F> Shr<F> for Parser<'a, A>
where
A: Clone + 'a,
B: Clone + 'a,
F: Fn(A) -> Parser<'a, B> + 'a,
{
type Output = Parser<'a, B>;
fn shr(self, f: F) -> Parser<'a, B> {
then(self, TParser::new(f))
}
}
// pure function of the monad
fn success<'a, A>(a: A) -> Parser<'a, A>
where
A: Clone + 'a,
{
Parser::new(move |input: String| VecInterpretation {
result: vec![Interpretation(a.clone(), input)],
})
}
// discards input and gives nothing
fn zero<'a, A>() -> Parser<'a, A>
where
A: Clone + 'a,
{
Parser::new(|_| VecInterpretation { result: vec![] })
}
// get an item from the input
fn item<'a>() -> Parser<'a, char> {
Parser::new(|input: String| {
if let Some(c) = input.chars().next() {
VecInterpretation {
result: vec![Interpretation(c, input.chars().skip(1).collect())],
}
} else {
zero()(input)
}
})
}
fn satisfies<'a, P>(p: P) -> Parser<'a, char>
where
P: Fn(char) -> bool + Clone + 'a,
{
item() >> move |x| if p(x) { success(x) } else { zero() }
}
fn character<'a>(x: char) -> Parser<'a, char> {
satisfies(move |c| c == x)
}
fn digit<'a>() -> Parser<'a, char> {
satisfies(|c| c.is_ascii_digit())
}
#[allow(dead_code)]
fn lowercase<'a>() -> Parser<'a, char> {
satisfies(|c| c.is_lowercase())
}
#[allow(dead_code)]
fn uppercase<'a>() -> Parser<'a, char> {
satisfies(|c| c.is_uppercase())
}
#[allow(dead_code)]
fn alphanumeric<'a>() -> Parser<'a, char> {
satisfies(|c| c.is_alphanumeric())
}
#[allow(dead_code)]
fn letter<'a>() -> Parser<'a, char> {
lowercase() + uppercase()
}
fn blank<'a>() -> Parser<'a, ()> {
many(satisfies(|c| c.is_whitespace())) >> |_| success(())
}
fn many<'a, A>(p: Parser<'a, A>) -> Parser<'a, Vec<A>>
where
A: Clone + 'a,
{
many1(p) + success(vec![])
}
fn many1<'a, A>(p: Parser<'a, A>) -> Parser<'a, Vec<A>>
where
A: Clone + 'a,
{
p.clone() >> move |x: A|
many(p.clone()) >> move |xs: Vec<A>| {
let mut xxs: Vec<A> = vec![x.clone()];
xxs.extend(xs.clone());
success(xxs)
}
}
fn many_string(p: Parser<char>) -> Parser<String> {
many1_string(p) + success("".to_string())
}
fn many1_string(p: Parser<char>) -> Parser<String> {
p.clone() >> move |x: char|
many(p.clone()) >> move |xs: Vec<char>| {
let mut s: String = xs.into_iter().collect();
s.insert(0, x);
success(s)
}
}
fn sequence1<'a, A, B>(p: Parser<'a, A>, sep: Parser<'a, B>) -> Parser<'a, Vec<A>>
where
A: Clone + 'a,
B: Clone + 'a,
{
p.clone() >> move |x: A|
many(sep.clone() >> {
let p = p.clone();
move |_| p.clone() >> move |y: A| success(y)
}) >> move |xs: Vec<A>| {
let mut xxs: Vec<A> = vec![x.clone()];
xxs.extend(xs.clone());
success(xxs)
}
}
fn sequence<'a, A, B>(p: Parser<'a, A>, sep: Parser<'a, B>) -> Parser<'a, Vec<A>>
where
A: Clone + 'a,
B: Clone + 'a,
{
sequence1(p, sep) + success(vec![])
}
fn block<'a, Opening, Content, Closing>(
open: Parser<'a, Opening>,
p: Parser<'a, Content>,
close: Parser<'a, Closing>,
) -> Parser<'a, Content>
where
Opening: Clone + 'a,
Content: Clone + 'a,
Closing: Clone + 'a,
{
open >> move |_| {
let close = close.clone();
p.clone() >> move |x: Content| close.clone() >> move |_| success(x.clone())
}
}
#[allow(dead_code)]
fn natural<'a>() -> Parser<'a, i64> {
many1_string(digit())
>> move |xs: String| {
let n: i64 = xs.parse().unwrap();
success(n)
}
}
#[allow(dead_code)]
fn integer<'a>() -> Parser<'a, i64> {
natural() + (character('-') >> move |_| natural() >> move |y: i64| success(-y))
}
fn word<'a>(s: String) -> Parser<'a, ()> {
Parser::new(
move |input: String|
if input.starts_with(&s.clone()) {
VecInterpretation {
result: vec![Interpretation((), input[s.len()..].to_string())],
}
} else {
zero()(input)
}
)
}
// JSON Value Types
#[derive(Debug, Clone)]
#[allow(dead_code)]
enum JsonValue {
String(String),
Number(f64),
Object(Vec<(String, JsonValue)>),
Array(Vec<JsonValue>),
Bool(bool),
Null,
}
// Parsers of JSON Value Types
fn json_null<'a>() -> Parser<'a, JsonValue> {
word("null".to_string()) >> |_| success(JsonValue::Null)
}
fn json_bool<'a>() -> Parser<'a, JsonValue> {
(word("false".to_string()) >> |_| success(JsonValue::Bool(false)))
+ (word("true".to_string()) >> |_| success(JsonValue::Bool(true)))
}
fn json_number<'a>() -> Parser<'a, JsonValue> {
(character('-') + success('+')) >> move |minus_sign: char|
many1_string(digit()) >> move |int_part: String| {
let integer_part = |minus_sign, int_part| {
if minus_sign == '-' {
format!("-{}", int_part)
} else {
int_part
}
};
((character('.') >> move |_|
(many1_string(digit()) >> move |frac_part: String|
success(format!(".{}", frac_part))
) + success(String::new())
) + success(String::new())) >> move |frac: String| {
let int_part = int_part.clone();
let frac = frac.clone();
( (character('e') + character('E')) >> move |_| {
(character('+') + character('-') + success('+')) >> move |sign: char|
many1_string(digit()) >> move |exp_part: String|
success(format!("e{}{}", sign, exp_part))
} + success(String::new())) >> move |exponent: String| {
let number_str = format!("{}{}{}", integer_part(minus_sign,
int_part.clone()),
frac.clone(), exponent);
match number_str.parse::<f64>() {
Ok(number) => success(JsonValue::Number(number)),
Err(_) => zero(),
}
}
} + (match format!("{}{}",
integer_part(minus_sign,
int_part.clone()),
frac.clone()).parse::<f64>() {
Ok(number) => success(JsonValue::Number(number)),
Err(_) => zero(),
})
}
}
/// decode \uXXXX
fn hex4<'a>() -> Parser<'a, char> {
let hex_digit = || satisfies(|c| c.is_ascii_hexdigit());
hex_digit() >> move |d1|
hex_digit() >> move |d2|
hex_digit() >> move |d3|
hex_digit() >> move |d4|
u32::from_str_radix(&format!("{}{}{}{}", d1, d2, d3, d4), 16)
.ok()
.and_then(std::char::from_u32)
.map_or_else(zero, success)
}
fn json_string<'a>() -> Parser<'a, JsonValue> {
// Parser to handle escape sequences
let escape = character('\\') >> |_|
satisfies(|c| "\"\\/bfnrtu".contains(c)) >> move |esc| {
let escaped_char = match esc {
'"' => '"',
'\\' => '\\',
'/' => '/',
'b' => '\x08',
'f' => '\x0C',
'n' => '\n',
'r' => '\r',
't' => '\t',
'u' => return hex4(),
_ => return zero(),
};
success(escaped_char)
};
// Parser to handle a regular character or an escape sequence
let char_or_escape = escape + satisfies(|c| c != '"' && c != '\\');
block(character('"'), many_string(char_or_escape), character('"'))
>> |s: String| success(JsonValue::String(s))
}
fn json_array<'a>() -> Parser<'a, JsonValue> {
block(
character('['),
sequence(json_value().clone(),
character(',')),
character(']'),
) >> |values: Vec<JsonValue>|
success(JsonValue::Array(values))
}
fn json_object<'a>() -> Parser<'a, JsonValue> {
let key_value_pair = json_string()
>> move |json_key| match json_key {
JsonValue::String(key) => {
blank() >> move |_|
character(':') >> {
let key = key.clone();
move |_| json_value() >> {
let key = key.clone();
move |value| success((key.clone(), value))
}
}
}
_ => zero(),
};
block(
character('{') >> |_| blank(),
sequence(key_value_pair, character(',') >> |_| blank()),
character('}'),
) >> |pairs: Vec<(String, JsonValue)>|
success(JsonValue::Object(pairs))
}
fn json_value<'a>() -> Parser<'a, JsonValue> {
blank() >> |_|
( json_null()
+ json_bool()
+ json_number()
+ json_string()
+ json_array()
+ json_object()) >> |v|
blank() >> move |_|
success(v.clone())
}
fn main() {
let json_parse = json_value();
let json_payload = if let Some(filename) = env::args().nth(1) {
fs::read_to_string(filename)
} else {
Ok(r###"{
"Hello," : "Wolrd!",
"number": -123.45,
"number2": -123.45e85,
"number3": -123e85,
"number4": -123.E85,
"bool": true,
"array": [1 , 2, 3, "m\"eh\u0111 31\u00f8C", false],
"null": null,
"baz": {
"a": true,
"b": false
},
"plop": {},
"gr8k": []
}"###.to_string())
};
match json_payload {
Ok(payload) => {
let rs = json_parse(payload);
for Interpretation(c, s) in rs.result {
println!("{:?} -> input remainder \"{}\"", c, s);
}
}
Err(err) => println!("Err: {}", err),
}
println!("END");
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment