Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
An idea for how to handle whitespace in Rust's `nom` parser-combinator library.
/// A module for parsing whitespace. Takes into account comments too.
///
/// # Module Outline
/// - mod space
/// - fn comment
/// - mod allowed
/// - fn here
/// - fn after
/// - fn before
/// - fn around
/// - mod required
/// - fn here
/// - fn after
/// - fn before
/// - fn around
///
/// The module structure allows semantic whitespace handling while constructing parsers.
///
/// # Example
///
/// ```rust
/// let lisp_sexp = delimited(
/// char('('),
/// space::allowed::around(
/// separated_nonempty_list(space::required::here, some_parser),
/// ),
/// char(')'),
/// );
///
/// let rust_fn_definition = preceded(
/// space::required::after(tag("fn")),
/// tuple((
/// space::allowed::after(ident),
/// delimited(char('('), space::allowed::around(param_list), char(')')),
/// preceded(
/// space::allowed::around(tag("->")),
/// delimited(char('{'), space::allowed::around(block_interior), char('}')),
/// ),
/// )),
/// );
///
/// let source_file = terminated(
/// space::allowed::around(separated_list(
/// space::allowed::here,
/// alt((impl_block, fn_definition, trait_definition, type_definition)),
/// )),
/// eof,
/// );
/// ```
mod space {
use nom::{
branch::alt,
bytes::complete::{tag, take_till},
character::complete::{multispace0, multispace1},
combinator::recognize,
error::ParseError,
multi::many1,
sequence::{delimited, preceded, terminated},
IResult,
};
/// A comment starts with `//` and continues till the end of the line, or
/// end of input, whichever comes first. Note: this parser explicitly does
/// NOT consume the '\n' character at the end of lines.
pub fn comment<'i, E>(i: &'i str) -> IResult<&'i str, &'i str, E>
where
E: ParseError<&'i str>
{
let (i, _) = tag("//")(i)?;
let (i, content) = take_till(|ch| ch == '\n')(i)?;
// Strip off the first space if it has one.
if content.starts_with(' ') {
Ok((i, &content[1..]))
} else {
Ok((i, content))
}
}
pub mod allowed {
use super::*;
/// Whitespace is allowed here, but not required.
pub fn here<'i, E>(i: &'i str) -> IResult<&'i str, &'i str, E>
where
E: ParseError<&'i str>
{
alt((super::required::here, multispace0))(i)
}
/// Has potentially-empty whitespace before **and** after the captured parser.
pub fn around<'i, T, E, P>(parser: P) -> impl Fn(&'i str) -> IResult<&'i str, T, E>
where
E: ParseError<&'i str>,
P: Fn(&'i str) -> IResult<&'i str, T, E>,
{
move |i: &'i str| delimited(here, &parser, here)(i)
}
/// Has potentially-empty whitespace after the captured parser.
pub fn after<'i, T, E, P>(parser: P) -> impl Fn(&'i str) -> IResult<&'i str, T, E>
where
E: ParseError<&'i str>,
P: Fn(&'i str) -> IResult<&'i str, T, E>,
{
move |i: &'i str| terminated(&parser, here)(i)
}
/// Has potentially-empty whitespace before the captured parser.
pub fn before<'i, T, E, P>(parser: P) -> impl Fn(&'i str) -> IResult<&'i str, T, E>
where
E: ParseError<&'i str>,
P: Fn(&'i str) -> IResult<&'i str, T, E>,
{
move |i: &'i str| preceded(here, &parser)(i)
}
}
pub mod required {
use super::*;
/// Whitespace is required here.
pub fn here<'i, E>(i: &'i str) -> IResult<&'i str, &'i str, E>
where
E: ParseError<&'i str>
{
recognize(many1(alt((multispace1, comment))))(i)
}
/// Has potentially-empty whitespace before **and** after the captured parser.
pub fn around<'i, T, E, P>(parser: P) -> impl Fn(&'i str) -> IResult<&'i str, T, E>
where
E: ParseError<&'i str>,
P: Fn(&'i str) -> IResult<&'i str, T, E>,
{
move |i: &'i str| delimited(here, &parser, here)(i)
}
/// Has potentially-empty whitespace after the captured parser.
pub fn after<'i, T, E, P>(parser: P) -> impl Fn(&'i str) -> IResult<&'i str, T, E>
where
E: ParseError<&'i str>,
P: Fn(&'i str) -> IResult<&'i str, T, E>,
{
move |i: &'i str| terminated(&parser, here)(i)
}
/// Has potentially-empty whitespace before the captured parser.
pub fn before<'i, T, E, P>(parser: P) -> impl Fn(&'i str) -> IResult<&'i str, T, E>
where
E: ParseError<&'i str>,
P: Fn(&'i str) -> IResult<&'i str, T, E>,
{
move |i: &'i str| preceded(here, &parser)(i)
}
}
}
@eignnx

This comment has been minimized.

Copy link
Owner Author

@eignnx eignnx commented Apr 9, 2020

Oops! Looks like all of the closures need to be move to capture parser by value, and then internally borrow parser from the closure. Updated.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.