Skip to content

Instantly share code, notes, and snippets.

@anirudhb
Last active March 1, 2020 04:07
Show Gist options
  • Save anirudhb/377dfc5f6514eb2ce3d8146acaa8003b to your computer and use it in GitHub Desktop.
Save anirudhb/377dfc5f6514eb2ce3d8146acaa8003b to your computer and use it in GitHub Desktop.
Format string parser in Rust!
//! Just for prototyping. I'll move this to lib later I promise!
// Pulls in IResult
use nom::IResult;
// Pulls in some useful pre-defined parsers.
use nom::{
bytes::complete::{tag, take_while, take_while_m_n},
character::{
complete::{alpha1, digit1},
is_alphabetic, is_digit,
},
sequence::tuple,
};
// Explanation:
// Takes a string to parse
// and spits out two new strings: the part that it parsed, and the part
// left over. If it fails it will return an Err as opposed to an Ok.
// ^ That applies to all parsers that return something with 'IResult'
//
// 'tag' is a pre-defined parser that simply parses a constant string.
// Note: the first '&str' in 'IResult<&str, &str>' is the remaining
// input, the other type can be anything else!
// 'tuple' chains together multiple parsers at once.
// 'alpha1' matches one or more letters.
// 'digit1' mathces one or more digits.
// 'is_alphabetic' checks if a character is a letter.
// 'is_digit' checks if a character is a digit.
/// This parses the single character '{'
fn open_brace(i: &str) -> IResult<&str, &str> { tag("{")(i) }
/// This parses the single character '}'
fn close_brace(i: &str) -> IResult<&str, &str> { tag("}")(i) }
/// This parses the single character ':'
fn colon(i: &str) -> IResult<&str, &str> { tag(":")(i) }
/// This checks if the character is '_'
fn is_underscore(c: char) -> bool { c == '_' }
/// This combines 'is_alphabetic', 'is_digit' and 'is_underscore' into
/// one.
fn is_ident_char(c: char) -> bool
{
is_ident_begin_char(c) || is_digit(c as u8)
}
/// This checks whether this is a valid character to begin an
/// identifier.
fn is_ident_begin_char(c: char) -> bool
{
is_alphabetic(c as u8) || is_underscore(c)
}
/// This parses identifiers:
/// one letter then
/// zero or more letters/numbers/'_'s
/// The return type is String not &str since we have to join two strings
/// together.
fn ident(i: &str) -> IResult<&str, String>
{
let (remaining, (a, rest)) = tuple((
take_while_m_n(1, 1, is_ident_begin_char),
take_while(is_ident_char),
))(i)?;
// Add a and rest together
let full = format!("{}{}", a, rest);
Ok((remaining, full))
}
/// This parses a full format string!
/// See, it returns a FormatString not a &str!
fn format_string(i: &str) -> IResult<&str, FormatString>
{
// P.S. '_' discards the value in that position
let (remaining, (_, name, _, bytes, format_specifier, _)) = tuple(
(open_brace, ident, colon, digit1, alpha1, close_brace),
)(i)?;
// Now we have to parse the number-string into a number
// We panic if it fails since it should never fail
// (that is what 'unwrap' does)
let bytes_num = bytes.parse::<u64>().unwrap();
// Now we create our struct and respond with success!
// 'to_owned' makes a copy of a string.
let fs = FormatString {
name, /* to_owned not necessary since 'ident' already returns
* String */
bytes: bytes_num,
format_specifier: format_specifier.to_owned(),
};
return Ok((remaining, fs));
}
/// Represents one format string (i.e. '{size:4u}' -> 32-bit positive
/// number called 'size')
/// What does v (that) do? It makes it so we can print the content of
/// the struct!
#[derive(Debug)]
struct FormatString
{
name: String,
bytes: u64,
format_specifier: String,
}
// Return type is a catch all for any kind of error
fn main() -> std::result::Result<(), Box<dyn std::error::Error>>
{
// Running the parser on three different strings
// The '?' makes it so if there was an error in the function,
// we simply return from main with that error.
// Now it is time to test!
// We will just test with the input provided.
// Read input to parse and bail out if error
let mut i = String::new();
std::io::stdin().read_line(&mut i)?;
// Parse it and print if possible, otherwise print the error
match format_string(&i)
{
Ok((_, fs)) =>
{
println!("yayyyyy it parsed!");
println!("{:#?}", fs);
},
Err(e) =>
{
println!("Aw it failed to parse :(");
println!("{:#?}", e);
},
}
// No error (because we caught it!)
Ok(())
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment