elfsternberg/start_of_line.rs

## start_of_line.rs
use nom::{
    bytes::complete::{tag, take_while},
    combinator::recognize,
    sequence::preceded,
    IResult,
};

/**
 * Using Rust Nom, show how to detect "content that begins at the start of a line." This particular
 * variant rolls forward until it finds any content *other than* the start of a line, which is
 * defined as "the input token after any \n".
 *
 */
fn is_beginning_of_line(input: &str) -> IResult<&str, ()> {
    if input.is_empty() {
        // It took me an absolutely ridiculous amount of time to find a simple "how do you construct
        // a standard error in Rust Nom" example. Shout-out to Daniel Imfeld (@dimfeld)
        // (https://imfeld.dev/writing/parsing_with_nom) for being the *23rd* entry Google offered
        // to answer that question, and being the *first* one with an example that wasn't "How to
        // write a custom Nom error" or "How to handle errors in Nom."
        return Err(nom::Err::Error(nom::error::Error::new(
            input,
            nom::error::ErrorKind::Eof,
        )));
    }

    let (remaining, _) = recognize(preceded(
        take_while(|c| c == '\n'),
        // The empty string tag always succeeds without consuming any of the input. So we're
        // skipping the start-of-line markers before "trivially" succeeding, always successfully
        // checking the empty token, so we don't actually consume the next token.
        tag(""),
    ))(input)?;

    // The nice thing about using "recognize" is that it will keep all the white space you consumed,
    // in case you want to count the number of empty lines this parser ate. Since both `input` and
    // `remaining` are slices pointing into the original input array, comparing them by pointer is
    // much faster than doing a string comparison. Remaining is now pointing to the first token
    // *after* the return, which could be literally where the input started, or remaining is where
    // the input needs to be next *after* the input started with "\n". (I.e, we rolled forward one
    // or more "\n"'s, but didn't meet anything else so we can't be anywhere but at column 0).
    if std::ptr::eq(input, remaining) || input.starts_with('\n') {
        Ok((remaining, ()))
    } else {
        // I wasn't really sure what error to return here. This is one of those parsers you'll
        // probably use in an alt or something and we're consuming a hazy concept of end-of-line
        // until we're sure we're at the beginning of a line. "Tag" was the closest thing that
        // seemed right.
        Err(nom::Err::Error(nom::error::Error::new(
            input,
            nom::error::ErrorKind::Tag,
        )))
    }
}

// And this is how you use it; you're looking for *content* at the *start* of the line, not the
// start itself.
fn pattern_at_beginning_of_line(input: &str) -> IResult<&str, &str> {
    preceded(
        is_beginning_of_line,
        tag("BEGIN"),
    )(input)
}

fn main() use nom::{
    bytes::complete::{tag, take_while},
    combinator::recognize,
    sequence::preceded,
    IResult,
};

/**
 * Using Rust Nom, show how to detect "content that begins at the start of a line." This particular
 * variant rolls forward until it finds any content *other than* the start of a line, which is
 * defined as "the input token after any \n".
 *
 */
fn is_beginning_of_line(input: &str) -> IResult<&str, ()> {
    if input.is_empty() {
        // It took me an absolutely ridiculous amount of time to find a simple "how do you construct
        // a standard error in Rust Nom" example. Shout-out to Daniel Imfeld (@dimfeld)
        // (https://imfeld.dev/writing/parsing_with_nom) for being the *23rd* entry Google offered
        // to answer that question, and being the *first* one with an example that wasn't "How to
        // write a custom Nom error" or "How to handle errors in Nom."
        return Err(nom::Err::Error(nom::error::Error::new(
            input,
            nom::error::ErrorKind::Eof,
        )));
    }

    let (remaining, _) = recognize(preceded(
        take_while(|c| c == '\n'),
        // The empty string tag always succeeds without consuming any of the input. So we're
        // skipping the start-of-line markers before "trivially" succeeding, always successfully
        // checking the empty token, so we don't actually consume the next token.
        tag(""),
    ))(input)?;

    // The nice thing about using "recognize" is that it will keep all the white space you consumed,
    // in case you want to count the number of empty lines this parser ate. Since both `input` and
    // `remaining` are slices pointing into the original input array, comparing them by pointer is
    // much faster than doing a string comparison. Remaining is now pointing to the first token
    // *after* the line feed, which could be literally where the input started, or remaining is where
    // the input needs to be next *after* the input started with "\n". (I.e, we rolled forward one
    // or more "\n"'s, but didn't meet anything else so we can't be anywhere but at column 0).
    if std::ptr::eq(input, remaining) || input.starts_with('\n') {
        Ok((remaining, ()))
    } else {
        // I wasn't really sure what error to return here. This is one of those parsers you'll
        // probably use in an alt or something and we're consuming a hazy concept of end-of-line
        // until we're sure we're at the beginning of a line. "Tag" was the closest thing that
        // seemed right.
        Err(nom::Err::Error(nom::error::Error::new(
            input,
            nom::error::ErrorKind::Tag,
        )))
    }
}

// And this is how you use it; you're looking for *content* at the *start* of the line, not the
// start itself.
fn pattern_at_beginning_of_line(input: &str) -> IResult<&str, &str> {
    preceded(
        is_beginning_of_line,
        tag("BEGIN"),
    )(input)
}


fn main() {
    let input = "\nBEGINThis is where your input will be next.";
    // Note that `pattern_at_beginning_of_line` matches the parser *after* `is_beginning_of_line`.
    // Using `preceded` (above) throws out all the line feeds.
    match pattern_at_beginning_of_line(input) {
        Ok((remaining, matched)) => println!("Matched: '{}', Remaining: '{}'", matched, remaining),
        Err(err) => println!("Error: {:?}", err),
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn predicate_empty_in_not_sol() {
        let result = is_beginning_of_line("");
        assert!(result.is_err());
    }

    #[test]
    fn predicate_sol() {
        let result = is_beginning_of_line("\n");
        assert!(result.is_ok());
    }

    #[test]
    fn predicate_not_sol() {
        let result = is_beginning_of_line("TEST");
        assert!(result.is_ok());
        let (remaining, _) = result.unwrap();
        assert_eq!(remaining, "TEST");
    }

    #[test]
    fn predicate_more_than_sol() {
        let result = is_beginning_of_line("\nTEST");
        assert!(result.is_ok());
        let (remaining, _) = result.unwrap();
        assert_eq!(remaining, "TEST");
    }

    #[test]
    fn sample_test() {
        let result = pattern_at_beginning_of_line("BEGIN: the rest");
        assert!(result.is_ok());
        let (remaining, input) = result.unwrap();
        println!("{:?}", input);
        assert_eq!(remaining, ": the rest");
    }

    #[test]
    fn with_leading_return() {
        let result = pattern_at_beginning_of_line("\nBEGIN: the rest");
        assert!(result.is_ok());
        let (remaining, _) = result.unwrap();
        assert_eq!(remaining, ": the rest");
    }

    #[test]
    fn with_multiple_leading_return() {
        let result = pattern_at_beginning_of_line("\n\n\nBEGIN: the rest");
        assert!(result.is_ok());
        let (remaining, _) = result.unwrap();
        assert_eq!(remaining, ": the rest");
    }

    #[test]
    fn with_space_leading_return() {
        let result = pattern_at_beginning_of_line(" \nBEGIN: the rest");
        assert!(result.is_err());
    }
}


    let input = "\nBEGINThis is where your input will be next";
    match pattern_at_beginning_of_line(input) {
        Ok((remaining, matched)) => println!("Matched: '{}', Remaining: '{}'", matched, remaining),
        Err(err) => println!("Error: {:?}", err),
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn predicate_empty_in_not_sol() {
        let result = is_beginning_of_line("");
        assert!(result.is_err());
    }

    #[test]
    fn predicate_sol() {
        let result = is_beginning_of_line("\n");
        assert!(result.is_ok());
    }

    #[test]
    fn predicate_not_sol() {
        let result = is_beginning_of_line("TEST");
        assert!(result.is_ok());
        let (remaining, _) = result.unwrap();
        assert_eq!(remaining, "TEST");
    }

    #[test]
    fn predicate_more_than_sol() {
        let result = is_beginning_of_line("\nTEST");
        assert!(result.is_ok());
        let (remaining, _) = result.unwrap();
        assert_eq!(remaining, "TEST");
    }

    #[test]
    fn sample_test() {
        let result = pattern_at_beginning_of_line("BEGIN: the rest");
        assert!(result.is_ok());
        let (remaining, input) = result.unwrap();
        println!("{:?}", input);
        assert_eq!(remaining, ": the rest");
    }

    #[test]
    fn with_leading_return() {
        let result = pattern_at_beginning_of_line("\nBEGIN: the rest");
        assert!(result.is_ok());
        let (remaining, _) = result.unwrap();
        assert_eq!(remaining, ": the rest");
    }

    #[test]
    fn with_multiple_leading_return() {
        let result = pattern_at_beginning_of_line("\n\n\nBEGIN: the rest");
        assert!(result.is_ok());
        let (remaining, _) = result.unwrap();
        assert_eq!(remaining, ": the rest");
    }

    #[test]
    fn with_space_leading_return() {
        let result = pattern_at_beginning_of_line(" \nBEGIN: the rest");
        assert!(result.is_err());
    }
}
	use nom::{
	bytes::complete::{tag, take_while},
	combinator::recognize,
	sequence::preceded,
	IResult,
	};

	/**
	* Using Rust Nom, show how to detect "content that begins at the start of a line." This particular
	* variant rolls forward until it finds any content other than the start of a line, which is
	* defined as "the input token after any \n".
	*
	*/
	fn is_beginning_of_line(input: &str) -> IResult<&str, ()> {
	if input.is_empty() {
	// It took me an absolutely ridiculous amount of time to find a simple "how do you construct
	// a standard error in Rust Nom" example. Shout-out to Daniel Imfeld (@dimfeld)
	// (https://imfeld.dev/writing/parsing_with_nom) for being the 23rd entry Google offered
	// to answer that question, and being the first one with an example that wasn't "How to
	// write a custom Nom error" or "How to handle errors in Nom."
	return Err(nom::Err::Error(nom::error::Error::new(
	input,
	nom::error::ErrorKind::Eof,
	)));
	}

	let (remaining, _) = recognize(preceded(
	take_while(\|c\| c == '\n'),
	// The empty string tag always succeeds without consuming any of the input. So we're
	// skipping the start-of-line markers before "trivially" succeeding, always successfully
	// checking the empty token, so we don't actually consume the next token.
	tag(""),
	))(input)?;

	// The nice thing about using "recognize" is that it will keep all the white space you consumed,
	// in case you want to count the number of empty lines this parser ate. Since both `input` and
	// `remaining` are slices pointing into the original input array, comparing them by pointer is
	// much faster than doing a string comparison. Remaining is now pointing to the first token
	// after the return, which could be literally where the input started, or remaining is where
	// the input needs to be next after the input started with "\n". (I.e, we rolled forward one
	// or more "\n"'s, but didn't meet anything else so we can't be anywhere but at column 0).
	if std::ptr::eq(input, remaining) \|\| input.starts_with('\n') {
	Ok((remaining, ()))
	} else {
	// I wasn't really sure what error to return here. This is one of those parsers you'll
	// probably use in an alt or something and we're consuming a hazy concept of end-of-line
	// until we're sure we're at the beginning of a line. "Tag" was the closest thing that
	// seemed right.
	Err(nom::Err::Error(nom::error::Error::new(
	input,
	nom::error::ErrorKind::Tag,
	)))
	}
	}

	// And this is how you use it; you're looking for content at the start of the line, not the
	// start itself.
	fn pattern_at_beginning_of_line(input: &str) -> IResult<&str, &str> {
	preceded(
	is_beginning_of_line,
	tag("BEGIN"),
	)(input)
	}

	fn main() use nom::{
	bytes::complete::{tag, take_while},
	combinator::recognize,
	sequence::preceded,
	IResult,
	};

	/**
	* Using Rust Nom, show how to detect "content that begins at the start of a line." This particular
	* variant rolls forward until it finds any content other than the start of a line, which is
	* defined as "the input token after any \n".
	*
	*/
	fn is_beginning_of_line(input: &str) -> IResult<&str, ()> {
	if input.is_empty() {
	// It took me an absolutely ridiculous amount of time to find a simple "how do you construct
	// a standard error in Rust Nom" example. Shout-out to Daniel Imfeld (@dimfeld)
	// (https://imfeld.dev/writing/parsing_with_nom) for being the 23rd entry Google offered
	// to answer that question, and being the first one with an example that wasn't "How to
	// write a custom Nom error" or "How to handle errors in Nom."
	return Err(nom::Err::Error(nom::error::Error::new(
	input,
	nom::error::ErrorKind::Eof,
	)));
	}

	let (remaining, _) = recognize(preceded(
	take_while(\|c\| c == '\n'),
	// The empty string tag always succeeds without consuming any of the input. So we're
	// skipping the start-of-line markers before "trivially" succeeding, always successfully
	// checking the empty token, so we don't actually consume the next token.
	tag(""),
	))(input)?;

	// The nice thing about using "recognize" is that it will keep all the white space you consumed,
	// in case you want to count the number of empty lines this parser ate. Since both `input` and
	// `remaining` are slices pointing into the original input array, comparing them by pointer is
	// much faster than doing a string comparison. Remaining is now pointing to the first token
	// after the line feed, which could be literally where the input started, or remaining is where
	// the input needs to be next after the input started with "\n". (I.e, we rolled forward one
	// or more "\n"'s, but didn't meet anything else so we can't be anywhere but at column 0).
	if std::ptr::eq(input, remaining) \|\| input.starts_with('\n') {
	Ok((remaining, ()))
	} else {
	// I wasn't really sure what error to return here. This is one of those parsers you'll
	// probably use in an alt or something and we're consuming a hazy concept of end-of-line
	// until we're sure we're at the beginning of a line. "Tag" was the closest thing that
	// seemed right.
	Err(nom::Err::Error(nom::error::Error::new(
	input,
	nom::error::ErrorKind::Tag,
	)))
	}
	}

	// And this is how you use it; you're looking for content at the start of the line, not the
	// start itself.
	fn pattern_at_beginning_of_line(input: &str) -> IResult<&str, &str> {
	preceded(
	is_beginning_of_line,
	tag("BEGIN"),
	)(input)
	}


	fn main() {
	let input = "\nBEGINThis is where your input will be next.";
	// Note that `pattern_at_beginning_of_line` matches the parser after `is_beginning_of_line`.
	// Using `preceded` (above) throws out all the line feeds.
	match pattern_at_beginning_of_line(input) {
	Ok((remaining, matched)) => println!("Matched: '{}', Remaining: '{}'", matched, remaining),
	Err(err) => println!("Error: {:?}", err),
	}
	}

	#[cfg(test)]
	mod tests {
	use super::*;

	#[test]
	fn predicate_empty_in_not_sol() {
	let result = is_beginning_of_line("");
	assert!(result.is_err());
	}

	#[test]
	fn predicate_sol() {
	let result = is_beginning_of_line("\n");
	assert!(result.is_ok());
	}

	#[test]
	fn predicate_not_sol() {
	let result = is_beginning_of_line("TEST");
	assert!(result.is_ok());
	let (remaining, _) = result.unwrap();
	assert_eq!(remaining, "TEST");
	}

	#[test]
	fn predicate_more_than_sol() {
	let result = is_beginning_of_line("\nTEST");
	assert!(result.is_ok());
	let (remaining, _) = result.unwrap();
	assert_eq!(remaining, "TEST");
	}

	#[test]
	fn sample_test() {
	let result = pattern_at_beginning_of_line("BEGIN: the rest");
	assert!(result.is_ok());
	let (remaining, input) = result.unwrap();
	println!("{:?}", input);
	assert_eq!(remaining, ": the rest");
	}

	#[test]
	fn with_leading_return() {
	let result = pattern_at_beginning_of_line("\nBEGIN: the rest");
	assert!(result.is_ok());
	let (remaining, _) = result.unwrap();
	assert_eq!(remaining, ": the rest");
	}

	#[test]
	fn with_multiple_leading_return() {
	let result = pattern_at_beginning_of_line("\n\n\nBEGIN: the rest");
	assert!(result.is_ok());
	let (remaining, _) = result.unwrap();
	assert_eq!(remaining, ": the rest");
	}

	#[test]
	fn with_space_leading_return() {
	let result = pattern_at_beginning_of_line(" \nBEGIN: the rest");
	assert!(result.is_err());
	}
	}



	let input = "\nBEGINThis is where your input will be next";
	match pattern_at_beginning_of_line(input) {
	Ok((remaining, matched)) => println!("Matched: '{}', Remaining: '{}'", matched, remaining),
	Err(err) => println!("Error: {:?}", err),
	}
	}

	#[cfg(test)]
	mod tests {
	use super::*;

	#[test]
	fn predicate_empty_in_not_sol() {
	let result = is_beginning_of_line("");
	assert!(result.is_err());
	}

	#[test]
	fn predicate_sol() {
	let result = is_beginning_of_line("\n");
	assert!(result.is_ok());
	}

	#[test]
	fn predicate_not_sol() {
	let result = is_beginning_of_line("TEST");
	assert!(result.is_ok());
	let (remaining, _) = result.unwrap();
	assert_eq!(remaining, "TEST");
	}

	#[test]
	fn predicate_more_than_sol() {
	let result = is_beginning_of_line("\nTEST");
	assert!(result.is_ok());
	let (remaining, _) = result.unwrap();
	assert_eq!(remaining, "TEST");
	}

	#[test]
	fn sample_test() {
	let result = pattern_at_beginning_of_line("BEGIN: the rest");
	assert!(result.is_ok());
	let (remaining, input) = result.unwrap();
	println!("{:?}", input);
	assert_eq!(remaining, ": the rest");
	}

	#[test]
	fn with_leading_return() {
	let result = pattern_at_beginning_of_line("\nBEGIN: the rest");
	assert!(result.is_ok());
	let (remaining, _) = result.unwrap();
	assert_eq!(remaining, ": the rest");
	}

	#[test]
	fn with_multiple_leading_return() {
	let result = pattern_at_beginning_of_line("\n\n\nBEGIN: the rest");
	assert!(result.is_ok());
	let (remaining, _) = result.unwrap();
	assert_eq!(remaining, ": the rest");
	}

	#[test]
	fn with_space_leading_return() {
	let result = pattern_at_beginning_of_line(" \nBEGIN: the rest");
	assert!(result.is_err());
	}
	}