RomanHargrave/string.rs

## string.rs
use crate::lang::Atom;

use nom::character::complete::{
  char,
  multispace1,
};

use nom::sequence::{delimited, preceded, terminated};

use nom::combinator::{
  map,
  map_res,
  map_opt,
  verify,
  value
};

use nom::branch::alt;

use nom::bytes::complete::{
  is_not,
  take_while_m_n
};

use nom::error::{
  FromExternalError,
  ParseError
};

use nom::IResult;

use nom::multi::fold_many0;

/// The [nom::combinator::value] combinator applied consuming a single character and emitting an
/// arbitrary value.
///
/// The application
/// ```
/// char_val!('a' => '\u{07}');
/// ```
///
/// Is directly equivalent to
/// ```
/// nom::combinator::value('\u{07}', nom::character::complete::char('a'));
/// ```
#[macro_export]
macro_rules! char_val {
  ($from:literal => $to:expr) => {
    nom::combinator::value(
      $to,
      nom::character::complete::char($from)
    )
  }
}

// Much of this is derived from the escaped string example packaged with Nom, in part because my
// brain was not working at the time. The basic idea here is to break the body of a string into
// fragments based on whether the parser that consumes a given span of the string body produces
// one, many, or no characters (String, Char, and Void respectively) and then recombine them
// appropriately. Effectively, this will take in input string with escapes, and produce an owned
// string representing the logical value of the input string body.

/// R7RS §6.7. hexadecimal escape sequence parser, invoked following the escape character. The
/// parser will consume text matching the expression `x[0-9A-F]{1,8}`.
pub fn hex_scalar_seq<'s, E>(i: &'s str) -> IResult<&'s str, char, E>
  where
    E: ParseError<&'s str> + FromExternalError<&'s str, std::num::ParseIntError>
{
  // summary: extract the hexadecimal sequence between 'x' and ';', convert it to a 32-bit
  // integer, and convert that integer to a character.
  map_opt(
    map_res(
      // consume x..; and return text between delimiters (x, ;)
      preceded(
        char('x'),
        take_while_m_n(1, 8, |c: char| c.is_ascii_hexdigit()),
      ),
      // convert hexadecimal sequence to 32-bit integer
      |r| u32::from_str_radix(r, 16)
    ),
    // convert integer to character
    char::from_u32
  )(i)
}

const ESCAPE_LEADER: char = '\\';

/// Parse an escape sequence other than whitespace
fn escaped_char<'s, E>(i: &'s str) -> IResult<&'s str, char, E>
  where
    E: ParseError<&'s str> + FromExternalError<&'s str, std::num::ParseIntError>
{
  // Expect a sequence starting with \
  preceded(
    char(ESCAPE_LEADER),
    // Immediately following \, test for acceptable values. Branches are tested in order, so it
    // may pay to shuffle this around later such that branches are explored in order of most
    // common first to least common last.
    alt((
      // Escape sequences defined by spec. Do not remove or alter.
      terminated(hex_scalar_seq, char(';')), // x…; scalar
      char_val!('a'  => '\u{07}'), // Alarm
      char_val!('b'  => '\u{08}'), // Backspace
      char_val!('t'  => '\u{09}'), // Tab
      char_val!('n'  => '\u{0A}'), // Linefeed
      char_val!('r'  => '\u{0D}'), // Return
      char_val!('"'  => '"'),
      char_val!('\\' => '\\'),
      char_val!('|'  => '|'),
    ))
  )(i)
}

/// Parse escaped whitespace, specifically a backslash followed by any amount of whitespace.
fn escaped_whitespace<'s, E>(i: &'s str) -> IResult<&'s str, &'s str, E>
  where
    E: ParseError<&'s str>
{
  // Look for a \ followed by >1 whitespace characters
  preceded(
    char(ESCAPE_LEADER),
    multispace1
  )(i)
}

/// Parse normal string components, given the string delimiter.
fn normal_text<'s, E>(
  delim: char
) -> impl FnMut(&'s str) -> IResult<&'s str, &'s str, E>
  where
    E: ParseError<&'s str>
{
  move |i|
    verify(
      // Take as many characters as possible until " or \ are reached.
      is_not([ESCAPE_LEADER, delim].as_slice()),
      // Convert the result to an error if the above parser consumed 0 characters.
      |s: &str| !s.is_empty()
    )(i)
}

/// Part of a string collected during parsing.
#[derive(Debug, Clone)]
enum Fragment<'s> {
  /// A length of text not containing any special sequences.
  String(&'s str),

  /// A single character, corresponding to escape sequences that produce a single character.
  Char(char),

  /// Nothing, having no length.
  Void
}

/// Produces a function that, with a specified delimiter, consumes part of a string, producing the
/// appropriate [Fragment] variant (String, Character, Void) for the first applicable span of text,
/// returning the remainder and variant.
fn string_fragment<'s, E>(
  delim: char
) -> impl FnMut(&'s str) -> IResult<&'s str, Fragment<'s>, E>
  where
    E: ParseError<&'s str> + FromExternalError<&'s str, std::num::ParseIntError>
{
  move |i|
    // alt() tests in order. normal_text should occur more frequently than other branches so should
    // be tested first (preferred), followed by regular character escapes, finally whitespace escapes.
    alt((
      // Parsers producing a string of characters have their result placed into the String variant
      map(normal_text(delim), Fragment::String),
      // Likewise, any single character parser results are placed into the Char variant
      map(escaped_char, Fragment::Char),
      // Anything parser that "skips" portions of the input string produces the Void variant
      value(Fragment::Void, escaped_whitespace),
    ))(i)
}

/// Consume a block of text containing escape sequences. The parser transforms escape sequences at
/// parse time, producing an owned string representing the transformed input.
fn string_body<'s, E>(
  delim: char
) -> impl FnMut(&'s str) -> IResult<&'s str, String, E>
  where
    E: ParseError<&'s str> + FromExternalError<&'s str, std::num::ParseIntError>
{
  move |i|
    fold_many0(
      // Apply the string_fragment parser until it fails
      string_fragment(delim),
      String::new,
      // Every time the string_fragment parser succeeds, the below function is called with its output
      // and another value. The initial state of the other value is computed by calling the parameter
      // prior to this function, which in this case is String::new. Notably, we are going to take the
      // initial value as mutable, which is not /terribly/ common in other applications of this
      // pattern, but allows for construction of the string without reallocating a new string on each
      // application (though the string will still have two grow :shrug:).
      |mut str, frag|
        match frag {
          // When a string fragment is encountered, append the entire string to this string. This
          // is the case applicable to the normal_text parser.
          Fragment::String(s) => {
            str.push_str(s);
            str
          }
          // When a character fragment is encountered, append the character. Applicable to
          // character escapes.
          Fragment::Char(c) => {
            str.push(c);
            str
          }
          // The void fragment will not result in any changes to the "accumulator" string
          Fragment::Void => str
        }
    )(i)
}

/// Parse a complete string residing between two delimiters.
pub fn delimited_string<'s, E>(
  delimiter: char
) -> impl FnMut(&'s str) -> IResult<&'s str, String, E>
  where
    E: ParseError<&'s str> + FromExternalError<&'s str, std::num::ParseIntError>
{
  move |i|
    // Parse the string and yield an owned String
    delimited(
      // Expect to see "
      char(delimiter),
      // Followed by the string body and any escaped chars, etc...
      string_body(delimiter),
      // Expect the closing "
      char(delimiter)
    )(i)
}

/// Consume a "-delimited span of text and emit an [Atom::String] owning its computed value, or
/// an error if the string is not valid.
pub fn string<'s, E>(i: &'s str) -> IResult<&'s str, Atom<'s>, E>
  where
    E: ParseError<&'s str> + FromExternalError<&'s str, std::num::ParseIntError>
{
  map(
    delimited_string('"'),
    // Move the computed string body to an Atom::String variant
    Atom::String
  )(i)
}

/// Consume a |-delimited string, which is used for complex identifiers
pub fn long_identifier<'s, E>(i: &'s str) -> IResult<&'s str, Atom<'s>, E>
  where
    E: ParseError<&'s str> + FromExternalError<&'s str, std::num::ParseIntError>
{
  map(
    delimited_string('|'),
    Atom::String
  )(i)
}

#[cfg(test)]
mod test {
  use super::*;
  use nom::error::ErrorKind;

  type WantError<'s> = (&'s str, ErrorKind);

  #[test]
  fn parses_scalar_seq() {
    let parse =
      hex_scalar_seq::<WantError>;

    assert_eq!(parse("xAE"), Ok(("", '\u{AE}')),
               "Parses one-byte scalar");

    assert_eq!(parse("xae"), Ok(("", '\u{AE}')),
               "Parser is not case-sensitive");

    assert_eq!(parse("xAe"), Ok(("", '\u{AE}')),
               "Parser is not case-sensitive");

    // we do not want the parser to consume ; as it is not applicable to its use in the character
    // literal parser
    assert_eq!(parse("xAE;"), Ok((";", '\u{AE}')),
               "Parser should not parse string/ident hex scalar terminator");

    parse("x")
      .expect_err("Parser should not parse hex scalar seq with 0 bits");

    assert_eq!(parse("x000000AE"), Ok(("", '\u{AE}')),
               "Parser should parse hex scalar seq up to 32 bits");

    assert_eq!(parse("x00000000AE"), Ok(("AE", '\0')),
               "Parser should not consume more than 32-bits worth of hex chars");
  }

  /// Test that the whitespace escape parser consumes any quantity of whitespace and halts at the
  /// first non-whitespace character.
  #[test]
  fn parses_escaped_whitespace() {
    let parse =
      escaped_whitespace::<WantError>;

    assert_eq!(parse(r"\ "), Ok(("", " ")),
               "Parser should consume whitespace");

    assert_eq!(parse(r"\   "), Ok(("", "   ")),
               "Parser should consume any amount of contiguous whitespace");

    assert_eq!(parse("\\ \n "), Ok(("", " \n ")),
               "Parser should consume any whitespace, including linefeed");

    assert_eq!(parse(r"\ a "), Ok(("a ", " ")),
               "Parser should halt where contiguous whitespace breaks");

    assert_eq!(parse(r"\  a "), Ok(("a", "  ")),
               "Parser should halt where contiguous whitespace breaks");
  }

  /// Test that the normal text parser consumes any quantity of normal text and stops at the
  /// first backslash or quote. Also test that the parser produces an error when asked to parse
  /// text beginning with an unexpected special character.
  #[test]
  fn parses_normal_text() {
    let mut parse =
      normal_text::<WantError>('"');

    // the parser should entirely consume an input containing no escape leader nor delimiter
    assert_eq!(parse("one two 3."), Ok(("", "one two 3.")),
               "Parser consumes normal text");

    // the parser should stop at its configured delimiter
    assert_eq!(parse("literally.\""), Ok(("\"", "literally.")),
               "Parser does not consume past delimiter");

    assert_eq!(normal_text::<WantError>('|')("pop|tarts"), Ok(("|tarts", "pop")),
               "Parser does not consume past delimiter");

    // the parser should always stop at \
    assert_eq!(parse(r"literally.\"), Ok((r"\", "literally.")),
               "Parser does not consume escape leader");

    // the parser should always stop at \
    assert_eq!(parse(r"pop\tarts"), Ok((r"\tarts", "pop")),
               "Parser does not consume escape leader");

    // e.g. unless some other character that might otherwise be a delimiter (|) is this parser's
    // configured delimiter, it should consume it.
    assert_eq!(parse("pop|tarts"), Ok(("", "pop|tarts")),
               "Parser consumes other delimiters unless told otherwise");

    // the following two cases will not return Ok() because

    parse("\\Strawberry Pop Tarts may be a cheap and inexpensive source of incendiary devices.")
      .expect_err("Consumed text beginning with backslash");

    parse("\"Strawberry Pop Tarts may be a cheap and inexpensive source of incendiary devices.")
      .expect_err("Consumed text beginning with quote");
  }

  macro_rules! expect_char_escape {
    ($($input:literal => $output:literal, $msg:literal,)+) => {
      $( assert_eq!(
        escaped_char::<WantError>($input),
        Ok(("", $output)),
        "{}: unexpected result for input {}", $msg, $input
      ); )+
    }
  }

  /// Test that the character escape sequence parser calls complex escape sequence parsers as
  /// needed and returns the expected character for a given escape sequence as defined in R7RS §6.7.
  #[test]
  fn parses_char_escape() {
    expect_char_escape!(
      r"\xAE;" => '\u{AE}', "Should parse hexadecimal scalar escape seq",
      r"\a"    => '\u{07}', "Should parse alarm escape seq",
      r"\n"    => '\u{0A}', "Should parse linefeed escape seq",
      r"\r"    => '\u{0D}', "Should parse carriage return escape seq",
      "\\\""   => '\u{22}', "Should parse double quote escape seq",
      r"\\"    => '\u{5C}', "Should parse backslash escape seq",
      r"\|"    => '\u{7C}', "Should parse vertical line escape seq",
    );

    // should not parse undefined character escape. if adding new escapes, ensure that this
    // remains undefined (e.g. update it *here* to something not in the escape parser)
    escaped_char::<WantError>("z")
      .expect_err("Should not parse undefined escape sequence");
  }

  // TODO assert_matches! stabilization would be very, very welcome
  macro_rules! expect_match {
    ($left:expr, $right:pat_param, $fail_msg:expr) => {
      match $left {
        $right => (),
        thing @ _ => panic!(
          "{}: expected {} but got {:?} instead",
          $fail_msg, stringify!($right), thing
        )
      }
    }
  }

  /// Test that the fragment parser emits the correct fragment variants for a given input.
  #[test]
  fn emits_fragment_variants() {
    let mut parse =
      string_fragment::<(&str, ErrorKind)>('"');

    expect_match!(parse("The Pop Tarts ... flames 10-18 inches"), Ok((_, Fragment::String(_))),
      "Did not produce String fragment for normal text");

    expect_match!(parse(r"\xAE;"), Ok((_, Fragment::Char(_))),
      "Did not produce Char fragment for character escape");

    expect_match!(parse(r"\ "), Ok((_, Fragment::Void)),
      "Did not produce Void fragment for escaped whitespace");

    expect_match!(parse("\\ \n "), Ok((_, Fragment::Void)),
      "Did not produce Void fragment for escaped whitespace containing linefeed");
  }

  /// Test that the string body parser consumes and recombines the input string as expected.
  #[test]
  fn parses_string_body() {
    let mut parse =
      string_body::<(&str, ErrorKind)>('"');

    assert_eq!(parse(r"\x1FAD0; Pop Tarts"), Ok(("", String::from("\u{1FAD0} Pop Tarts"))),
               "Failed to transform char fragment followed by string fragment");

    assert_eq!(parse(r"one\ two"), Ok(("", String::from("onetwo"))),
               "Failed to transform string fragments separated by void fragment");

    assert_eq!(parse(""), Ok(("", String::from(""))),
               "Failed to transform empty input to empty string");

    assert_eq!(parse(r"\  "), Ok(("", String::from(""))),
               "Failed to transform single void fragment to empty string");

    assert_eq!(parse(r"\  \n18-10 inches"), Ok(("", String::from("\n18-10 inches"))),
               "Failed to transform [Void,Char,String] sequence");

    // non-printable chars that may appear in strings unescaped

    // yes, the newlines are supposed to be there
    assert_eq!(parse("\nStrawberry\n"), Ok(("", String::from("\nStrawberry\n"))),
               "Failed to transform String fragment containing newlines");

    assert_eq!(parse("\t"), Ok(("", String::from("\t"))),
               "Failed to transform String fragment containing tab");
  }

  /// Test that delimited strings are correctly parsed, including where broken by newlines, etc…
  /// Also test that escaped string delimiters are treated nicely. Note that most tests concerned
  /// with the body parser are in [parses_string_body].
  #[test]
  fn parses_delimited_string() {
    let parse =
      string::<WantError>;

    assert_eq!(
      parse("\"Strawberry Pop Tarts\""),
      Ok(("", Atom::String(String::from("Strawberry Pop Tarts")))),
      "Failed to parse valid delimited string"
    );

    assert_eq!(
      parse("\"\\\"flames 18-10 inches\""),
      Ok(("", Atom::String(String::from("\"flames 18-10 inches")))),
      "Failed to parse valid delimited string with single escaped delimiter"
    );

    assert_eq!(
      parse("\"\\\"flames 18-10 inches\\\" in height\""),
      Ok(("", Atom::String(String::from("\"flames 18-10 inches\" in height")))),
      "Failed to parse valid delimited string with balanced escaped delimiters"
    );

    assert_eq!(
      parse("\"incendiary devices.\n Toasters\""),
      Ok(("", Atom::String(String::from("incendiary devices.\n Toasters")))),
      "Failed to parse valid delimited string containing unescaped newline"
    );

    parse("Pop Tarts may be … incendiary devices")
      .expect_err("Did not yield error when parsing un-delimited string");
  }

  #[test]
  fn parses_delimited_identifier() {
    let parse =
      long_identifier::<WantError>;

    assert_eq!(
      parse("|pop tarts|"),
      Ok(("", Atom::String(String::from("pop tarts")))),
      "Failed to parse delimited identifier"
    )
  }
}
	use crate::lang::Atom;

	use nom::character::complete::{
	char,
	multispace1,
	};

	use nom::sequence::{delimited, preceded, terminated};

	use nom::combinator::{
	map,
	map_res,
	map_opt,
	verify,
	value
	};

	use nom::branch::alt;

	use nom::bytes::complete::{
	is_not,
	take_while_m_n
	};

	use nom::error::{
	FromExternalError,
	ParseError
	};

	use nom::IResult;

	use nom::multi::fold_many0;

	/// The [nom::combinator::value] combinator applied consuming a single character and emitting an
	/// arbitrary value.
	///
	/// The application
	/// ```
	/// char_val!('a' => '\u{07}');
	/// ```
	///
	/// Is directly equivalent to
	/// ```
	/// nom::combinator::value('\u{07}', nom::character::complete::char('a'));
	/// ```
	#[macro_export]
	macro_rules! char_val {
	($from:literal => $to:expr) => {
	nom::combinator::value(
	$to,
	nom::character::complete::char($from)
	)
	}
	}

	// Much of this is derived from the escaped string example packaged with Nom, in part because my
	// brain was not working at the time. The basic idea here is to break the body of a string into
	// fragments based on whether the parser that consumes a given span of the string body produces
	// one, many, or no characters (String, Char, and Void respectively) and then recombine them
	// appropriately. Effectively, this will take in input string with escapes, and produce an owned
	// string representing the logical value of the input string body.

	/// R7RS §6.7. hexadecimal escape sequence parser, invoked following the escape character. The
	/// parser will consume text matching the expression `x[0-9A-F]{1,8}`.
	pub fn hex_scalar_seq<'s, E>(i: &'s str) -> IResult<&'s str, char, E>
	where
	E: ParseError<&'s str> + FromExternalError<&'s str, std::num::ParseIntError>
	{
	// summary: extract the hexadecimal sequence between 'x' and ';', convert it to a 32-bit
	// integer, and convert that integer to a character.
	map_opt(
	map_res(
	// consume x..; and return text between delimiters (x, ;)
	preceded(
	char('x'),
	take_while_m_n(1, 8, \|c: char\| c.is_ascii_hexdigit()),
	),
	// convert hexadecimal sequence to 32-bit integer
	\|r\| u32::from_str_radix(r, 16)
	),
	// convert integer to character
	char::from_u32
	)(i)
	}

	const ESCAPE_LEADER: char = '\\';

	/// Parse an escape sequence other than whitespace
	fn escaped_char<'s, E>(i: &'s str) -> IResult<&'s str, char, E>
	where
	E: ParseError<&'s str> + FromExternalError<&'s str, std::num::ParseIntError>
	{
	// Expect a sequence starting with \
	preceded(
	char(ESCAPE_LEADER),
	// Immediately following \, test for acceptable values. Branches are tested in order, so it
	// may pay to shuffle this around later such that branches are explored in order of most
	// common first to least common last.
	alt((
	// Escape sequences defined by spec. Do not remove or alter.
	terminated(hex_scalar_seq, char(';')), // x…; scalar
	char_val!('a' => '\u{07}'), // Alarm
	char_val!('b' => '\u{08}'), // Backspace
	char_val!('t' => '\u{09}'), // Tab
	char_val!('n' => '\u{0A}'), // Linefeed
	char_val!('r' => '\u{0D}'), // Return
	char_val!('"' => '"'),
	char_val!('\\' => '\\'),
	char_val!('\|' => '\|'),
	))
	)(i)
	}

	/// Parse escaped whitespace, specifically a backslash followed by any amount of whitespace.
	fn escaped_whitespace<'s, E>(i: &'s str) -> IResult<&'s str, &'s str, E>
	where
	E: ParseError<&'s str>
	{
	// Look for a \ followed by >1 whitespace characters
	preceded(
	char(ESCAPE_LEADER),
	multispace1
	)(i)
	}

	/// Parse normal string components, given the string delimiter.
	fn normal_text<'s, E>(
	delim: char
	) -> impl FnMut(&'s str) -> IResult<&'s str, &'s str, E>
	where
	E: ParseError<&'s str>
	{
	move \|i\|
	verify(
	// Take as many characters as possible until " or \ are reached.
	is_not([ESCAPE_LEADER, delim].as_slice()),
	// Convert the result to an error if the above parser consumed 0 characters.
	\|s: &str\| !s.is_empty()
	)(i)
	}

	/// Part of a string collected during parsing.
	#[derive(Debug, Clone)]
	enum Fragment<'s> {
	/// A length of text not containing any special sequences.
	String(&'s str),

	/// A single character, corresponding to escape sequences that produce a single character.
	Char(char),

	/// Nothing, having no length.
	Void
	}

	/// Produces a function that, with a specified delimiter, consumes part of a string, producing the
	/// appropriate [Fragment] variant (String, Character, Void) for the first applicable span of text,
	/// returning the remainder and variant.
	fn string_fragment<'s, E>(
	delim: char
	) -> impl FnMut(&'s str) -> IResult<&'s str, Fragment<'s>, E>
	where
	E: ParseError<&'s str> + FromExternalError<&'s str, std::num::ParseIntError>
	{
	move \|i\|
	// alt() tests in order. normal_text should occur more frequently than other branches so should
	// be tested first (preferred), followed by regular character escapes, finally whitespace escapes.
	alt((
	// Parsers producing a string of characters have their result placed into the String variant
	map(normal_text(delim), Fragment::String),
	// Likewise, any single character parser results are placed into the Char variant
	map(escaped_char, Fragment::Char),
	// Anything parser that "skips" portions of the input string produces the Void variant
	value(Fragment::Void, escaped_whitespace),
	))(i)
	}

	/// Consume a block of text containing escape sequences. The parser transforms escape sequences at
	/// parse time, producing an owned string representing the transformed input.
	fn string_body<'s, E>(
	delim: char
	) -> impl FnMut(&'s str) -> IResult<&'s str, String, E>
	where
	E: ParseError<&'s str> + FromExternalError<&'s str, std::num::ParseIntError>
	{
	move \|i\|
	fold_many0(
	// Apply the string_fragment parser until it fails
	string_fragment(delim),
	String::new,
	// Every time the string_fragment parser succeeds, the below function is called with its output
	// and another value. The initial state of the other value is computed by calling the parameter
	// prior to this function, which in this case is String::new. Notably, we are going to take the
	// initial value as mutable, which is not /terribly/ common in other applications of this
	// pattern, but allows for construction of the string without reallocating a new string on each
	// application (though the string will still have two grow :shrug:).
	\|mut str, frag\|
	match frag {
	// When a string fragment is encountered, append the entire string to this string. This
	// is the case applicable to the normal_text parser.
	Fragment::String(s) => {
	str.push_str(s);
	str
	}
	// When a character fragment is encountered, append the character. Applicable to
	// character escapes.
	Fragment::Char(c) => {
	str.push(c);
	str
	}
	// The void fragment will not result in any changes to the "accumulator" string
	Fragment::Void => str
	}
	)(i)
	}

	/// Parse a complete string residing between two delimiters.
	pub fn delimited_string<'s, E>(
	delimiter: char
	) -> impl FnMut(&'s str) -> IResult<&'s str, String, E>
	where
	E: ParseError<&'s str> + FromExternalError<&'s str, std::num::ParseIntError>
	{
	move \|i\|
	// Parse the string and yield an owned String
	delimited(
	// Expect to see "
	char(delimiter),
	// Followed by the string body and any escaped chars, etc...
	string_body(delimiter),
	// Expect the closing "
	char(delimiter)
	)(i)
	}

	/// Consume a "-delimited span of text and emit an [Atom::String] owning its computed value, or
	/// an error if the string is not valid.
	pub fn string<'s, E>(i: &'s str) -> IResult<&'s str, Atom<'s>, E>
	where
	E: ParseError<&'s str> + FromExternalError<&'s str, std::num::ParseIntError>
	{
	map(
	delimited_string('"'),
	// Move the computed string body to an Atom::String variant
	Atom::String
	)(i)
	}

	/// Consume a \|-delimited string, which is used for complex identifiers
	pub fn long_identifier<'s, E>(i: &'s str) -> IResult<&'s str, Atom<'s>, E>
	where
	E: ParseError<&'s str> + FromExternalError<&'s str, std::num::ParseIntError>
	{
	map(
	delimited_string('\|'),
	Atom::String
	)(i)
	}

	#[cfg(test)]
	mod test {
	use super::*;
	use nom::error::ErrorKind;

	type WantError<'s> = (&'s str, ErrorKind);

	#[test]
	fn parses_scalar_seq() {
	let parse =
	hex_scalar_seq::<WantError>;

	assert_eq!(parse("xAE"), Ok(("", '\u{AE}')),
	"Parses one-byte scalar");

	assert_eq!(parse("xae"), Ok(("", '\u{AE}')),
	"Parser is not case-sensitive");

	assert_eq!(parse("xAe"), Ok(("", '\u{AE}')),
	"Parser is not case-sensitive");

	// we do not want the parser to consume ; as it is not applicable to its use in the character
	// literal parser
	assert_eq!(parse("xAE;"), Ok((";", '\u{AE}')),
	"Parser should not parse string/ident hex scalar terminator");

	parse("x")
	.expect_err("Parser should not parse hex scalar seq with 0 bits");

	assert_eq!(parse("x000000AE"), Ok(("", '\u{AE}')),
	"Parser should parse hex scalar seq up to 32 bits");

	assert_eq!(parse("x00000000AE"), Ok(("AE", '\0')),
	"Parser should not consume more than 32-bits worth of hex chars");
	}

	/// Test that the whitespace escape parser consumes any quantity of whitespace and halts at the
	/// first non-whitespace character.
	#[test]
	fn parses_escaped_whitespace() {
	let parse =
	escaped_whitespace::<WantError>;

	assert_eq!(parse(r"\ "), Ok(("", " ")),
	"Parser should consume whitespace");

	assert_eq!(parse(r"\ "), Ok(("", " ")),
	"Parser should consume any amount of contiguous whitespace");

	assert_eq!(parse("\\ \n "), Ok(("", " \n ")),
	"Parser should consume any whitespace, including linefeed");

	assert_eq!(parse(r"\ a "), Ok(("a ", " ")),
	"Parser should halt where contiguous whitespace breaks");

	assert_eq!(parse(r"\ a "), Ok(("a", " ")),
	"Parser should halt where contiguous whitespace breaks");
	}

	/// Test that the normal text parser consumes any quantity of normal text and stops at the
	/// first backslash or quote. Also test that the parser produces an error when asked to parse
	/// text beginning with an unexpected special character.
	#[test]
	fn parses_normal_text() {
	let mut parse =
	normal_text::<WantError>('"');

	// the parser should entirely consume an input containing no escape leader nor delimiter
	assert_eq!(parse("one two 3."), Ok(("", "one two 3.")),
	"Parser consumes normal text");

	// the parser should stop at its configured delimiter
	assert_eq!(parse("literally.\""), Ok(("\"", "literally.")),
	"Parser does not consume past delimiter");

	assert_eq!(normal_text::<WantError>('\|')("pop\|tarts"), Ok(("\|tarts", "pop")),
	"Parser does not consume past delimiter");

	// the parser should always stop at \
	assert_eq!(parse(r"literally.\"), Ok((r"\", "literally.")),
	"Parser does not consume escape leader");

	// the parser should always stop at \
	assert_eq!(parse(r"pop\tarts"), Ok((r"\tarts", "pop")),
	"Parser does not consume escape leader");

	// e.g. unless some other character that might otherwise be a delimiter (\|) is this parser's
	// configured delimiter, it should consume it.
	assert_eq!(parse("pop\|tarts"), Ok(("", "pop\|tarts")),
	"Parser consumes other delimiters unless told otherwise");

	// the following two cases will not return Ok() because

	parse("\\Strawberry Pop Tarts may be a cheap and inexpensive source of incendiary devices.")
	.expect_err("Consumed text beginning with backslash");

	parse("\"Strawberry Pop Tarts may be a cheap and inexpensive source of incendiary devices.")
	.expect_err("Consumed text beginning with quote");
	}

	macro_rules! expect_char_escape {
	($($input:literal => $output:literal, $msg:literal,)+) => {
	$( assert_eq!(
	escaped_char::<WantError>($input),
	Ok(("", $output)),
	"{}: unexpected result for input {}", $msg, $input
	); )+
	}
	}

	/// Test that the character escape sequence parser calls complex escape sequence parsers as
	/// needed and returns the expected character for a given escape sequence as defined in R7RS §6.7.
	#[test]
	fn parses_char_escape() {
	expect_char_escape!(
	r"\xAE;" => '\u{AE}', "Should parse hexadecimal scalar escape seq",
	r"\a" => '\u{07}', "Should parse alarm escape seq",
	r"\n" => '\u{0A}', "Should parse linefeed escape seq",
	r"\r" => '\u{0D}', "Should parse carriage return escape seq",
	"\\\"" => '\u{22}', "Should parse double quote escape seq",
	r"\\" => '\u{5C}', "Should parse backslash escape seq",
	r"\\|" => '\u{7C}', "Should parse vertical line escape seq",
	);

	// should not parse undefined character escape. if adding new escapes, ensure that this
	// remains undefined (e.g. update it here to something not in the escape parser)
	escaped_char::<WantError>("z")
	.expect_err("Should not parse undefined escape sequence");
	}

	// TODO assert_matches! stabilization would be very, very welcome
	macro_rules! expect_match {
	($left:expr, $right:pat_param, $fail_msg:expr) => {
	match $left {
	$right => (),
	thing @ _ => panic!(
	"{}: expected {} but got {:?} instead",
	$fail_msg, stringify!($right), thing
	)
	}
	}
	}

	/// Test that the fragment parser emits the correct fragment variants for a given input.
	#[test]
	fn emits_fragment_variants() {
	let mut parse =
	string_fragment::<(&str, ErrorKind)>('"');

	expect_match!(parse("The Pop Tarts ... flames 10-18 inches"), Ok((_, Fragment::String(_))),
	"Did not produce String fragment for normal text");

	expect_match!(parse(r"\xAE;"), Ok((_, Fragment::Char(_))),
	"Did not produce Char fragment for character escape");

	expect_match!(parse(r"\ "), Ok((_, Fragment::Void)),
	"Did not produce Void fragment for escaped whitespace");

	expect_match!(parse("\\ \n "), Ok((_, Fragment::Void)),
	"Did not produce Void fragment for escaped whitespace containing linefeed");
	}

	/// Test that the string body parser consumes and recombines the input string as expected.
	#[test]
	fn parses_string_body() {
	let mut parse =
	string_body::<(&str, ErrorKind)>('"');

	assert_eq!(parse(r"\x1FAD0; Pop Tarts"), Ok(("", String::from("\u{1FAD0} Pop Tarts"))),
	"Failed to transform char fragment followed by string fragment");

	assert_eq!(parse(r"one\ two"), Ok(("", String::from("onetwo"))),
	"Failed to transform string fragments separated by void fragment");

	assert_eq!(parse(""), Ok(("", String::from(""))),
	"Failed to transform empty input to empty string");

	assert_eq!(parse(r"\ "), Ok(("", String::from(""))),
	"Failed to transform single void fragment to empty string");

	assert_eq!(parse(r"\ \n18-10 inches"), Ok(("", String::from("\n18-10 inches"))),
	"Failed to transform [Void,Char,String] sequence");

	// non-printable chars that may appear in strings unescaped

	// yes, the newlines are supposed to be there
	assert_eq!(parse("\nStrawberry\n"), Ok(("", String::from("\nStrawberry\n"))),
	"Failed to transform String fragment containing newlines");

	assert_eq!(parse("\t"), Ok(("", String::from("\t"))),
	"Failed to transform String fragment containing tab");
	}

	/// Test that delimited strings are correctly parsed, including where broken by newlines, etc…
	/// Also test that escaped string delimiters are treated nicely. Note that most tests concerned
	/// with the body parser are in [parses_string_body].
	#[test]
	fn parses_delimited_string() {
	let parse =
	string::<WantError>;

	assert_eq!(
	parse("\"Strawberry Pop Tarts\""),
	Ok(("", Atom::String(String::from("Strawberry Pop Tarts")))),
	"Failed to parse valid delimited string"
	);

	assert_eq!(
	parse("\"\\\"flames 18-10 inches\""),
	Ok(("", Atom::String(String::from("\"flames 18-10 inches")))),
	"Failed to parse valid delimited string with single escaped delimiter"
	);

	assert_eq!(
	parse("\"\\\"flames 18-10 inches\\\" in height\""),
	Ok(("", Atom::String(String::from("\"flames 18-10 inches\" in height")))),
	"Failed to parse valid delimited string with balanced escaped delimiters"
	);

	assert_eq!(
	parse("\"incendiary devices.\n Toasters\""),
	Ok(("", Atom::String(String::from("incendiary devices.\n Toasters")))),
	"Failed to parse valid delimited string containing unescaped newline"
	);

	parse("Pop Tarts may be … incendiary devices")
	.expect_err("Did not yield error when parsing un-delimited string");
	}

	#[test]
	fn parses_delimited_identifier() {
	let parse =
	long_identifier::<WantError>;

	assert_eq!(
	parse("\|pop tarts\|"),
	Ok(("", Atom::String(String::from("pop tarts")))),
	"Failed to parse delimited identifier"
	)
	}
	}