-
-
Save eignnx/3c8444b8e2f4d8ce10fcd97815f29d2e to your computer and use it in GitHub Desktop.
/// A module for parsing whitespace. Takes into account comments too. | |
/// | |
/// # Module Outline | |
/// - mod space | |
/// - fn comment | |
/// - mod allowed | |
/// - fn here | |
/// - fn after | |
/// - fn before | |
/// - fn around | |
/// - mod required | |
/// - fn here | |
/// - fn after | |
/// - fn before | |
/// - fn around | |
/// | |
/// The module structure allows semantic whitespace handling while constructing parsers. | |
/// | |
/// # Example | |
/// | |
/// ```rust | |
/// let lisp_sexp = delimited( | |
/// char('('), | |
/// space::allowed::around( | |
/// separated_nonempty_list(space::required::here, some_parser), | |
/// ), | |
/// char(')'), | |
/// ); | |
/// | |
/// let rust_fn_definition = preceded( | |
/// space::required::after(tag("fn")), | |
/// tuple(( | |
/// space::allowed::after(ident), | |
/// delimited(char('('), space::allowed::around(param_list), char(')')), | |
/// preceded( | |
/// space::allowed::around(tag("->")), | |
/// delimited(char('{'), space::allowed::around(block_interior), char('}')), | |
/// ), | |
/// )), | |
/// ); | |
/// | |
/// let source_file = terminated( | |
/// space::allowed::around(separated_list( | |
/// space::allowed::here, | |
/// alt((impl_block, fn_definition, trait_definition, type_definition)), | |
/// )), | |
/// eof, | |
/// ); | |
/// ``` | |
mod space { | |
use nom::{ | |
branch::alt, | |
bytes::complete::{tag, take_till}, | |
character::complete::{multispace0, multispace1}, | |
combinator::recognize, | |
error::ParseError, | |
multi::many1, | |
sequence::{delimited, preceded, terminated}, | |
IResult, | |
}; | |
/// A comment starts with `//` and continues till the end of the line, or | |
/// end of input, whichever comes first. Note: this parser explicitly does | |
/// NOT consume the '\n' character at the end of lines. | |
pub fn comment<'i, E>(i: &'i str) -> IResult<&'i str, &'i str, E> | |
where | |
E: ParseError<&'i str> | |
{ | |
let (i, _) = tag("//")(i)?; | |
let (i, content) = take_till(|ch| ch == '\n')(i)?; | |
// Strip off the first space if it has one. | |
if content.starts_with(' ') { | |
Ok((i, &content[1..])) | |
} else { | |
Ok((i, content)) | |
} | |
} | |
pub mod allowed { | |
use super::*; | |
/// Whitespace is allowed here, but not required. | |
pub fn here<'i, E>(i: &'i str) -> IResult<&'i str, &'i str, E> | |
where | |
E: ParseError<&'i str> | |
{ | |
alt((super::required::here, multispace0))(i) | |
} | |
/// Has potentially-empty whitespace before **and** after the captured parser. | |
pub fn around<'i, T, E, P>(parser: P) -> impl Fn(&'i str) -> IResult<&'i str, T, E> | |
where | |
E: ParseError<&'i str>, | |
P: Fn(&'i str) -> IResult<&'i str, T, E>, | |
{ | |
move |i: &'i str| delimited(here, &parser, here)(i) | |
} | |
/// Has potentially-empty whitespace after the captured parser. | |
pub fn after<'i, T, E, P>(parser: P) -> impl Fn(&'i str) -> IResult<&'i str, T, E> | |
where | |
E: ParseError<&'i str>, | |
P: Fn(&'i str) -> IResult<&'i str, T, E>, | |
{ | |
move |i: &'i str| terminated(&parser, here)(i) | |
} | |
/// Has potentially-empty whitespace before the captured parser. | |
pub fn before<'i, T, E, P>(parser: P) -> impl Fn(&'i str) -> IResult<&'i str, T, E> | |
where | |
E: ParseError<&'i str>, | |
P: Fn(&'i str) -> IResult<&'i str, T, E>, | |
{ | |
move |i: &'i str| preceded(here, &parser)(i) | |
} | |
} | |
pub mod required { | |
use super::*; | |
/// Whitespace is required here. | |
pub fn here<'i, E>(i: &'i str) -> IResult<&'i str, &'i str, E> | |
where | |
E: ParseError<&'i str> | |
{ | |
recognize(many1(alt((multispace1, comment))))(i) | |
} | |
/// Has potentially-empty whitespace before **and** after the captured parser. | |
pub fn around<'i, T, E, P>(parser: P) -> impl Fn(&'i str) -> IResult<&'i str, T, E> | |
where | |
E: ParseError<&'i str>, | |
P: Fn(&'i str) -> IResult<&'i str, T, E>, | |
{ | |
move |i: &'i str| delimited(here, &parser, here)(i) | |
} | |
/// Has potentially-empty whitespace after the captured parser. | |
pub fn after<'i, T, E, P>(parser: P) -> impl Fn(&'i str) -> IResult<&'i str, T, E> | |
where | |
E: ParseError<&'i str>, | |
P: Fn(&'i str) -> IResult<&'i str, T, E>, | |
{ | |
move |i: &'i str| terminated(&parser, here)(i) | |
} | |
/// Has potentially-empty whitespace before the captured parser. | |
pub fn before<'i, T, E, P>(parser: P) -> impl Fn(&'i str) -> IResult<&'i str, T, E> | |
where | |
E: ParseError<&'i str>, | |
P: Fn(&'i str) -> IResult<&'i str, T, E>, | |
{ | |
move |i: &'i str| preceded(here, &parser)(i) | |
} | |
} | |
} |
I don't have a very deep understanding of Rust and I have a question about the parameter types. In your implementation, Fn(_) -> _
is required, but sometimes (i.e., nom::combinator::recognize
), we only have FnMut(_) -> _
. How should we handle this kind of scenario?
Currently, I have a function like this, which cannot pass the compiler check.
pub fn parse_identifier(input: &str) -> IResult<&str, String> {
space::allowed::after(recognize(pair(
alt((alpha1, tag("_"))),
many0(alt((alphanumeric1, tag("_")))),
)))(input)
}
// expected a `Fn<(&str,)>` closure, found `impl FnMut<(&str,)>`
// the trait `Fn<(&str,)>` is not implemented for `impl FnMut<(&str,)>`
@Shuumatsu Wow didn't know anybody was using this code haha! Um, I'm not sure, but you could try changing all of the Fn(_) -> _
types in this gist to FnMut(_) -> _
types. That might solve the problem. I think in nom
version 6 they basically did the same change in their codebase (allowing FnMut
types as parsers), so that's what makes me think it might work here.
When I get a chance I'll try this change out in my codebase and, if it works, I'll update the gist. ✌️
Oops! Looks like all of the closures need to be
move
to captureparser
by value, and then internally borrowparser
from the closure. Updated.