Skip to content

Instantly share code, notes, and snippets.

@kelvinmo
Last active February 2, 2021 11:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kelvinmo/a349d30e8854b24813eea1ff3040a8c6 to your computer and use it in GitHub Desktop.
Save kelvinmo/a349d30e8854b24813eea1ff3040a8c6 to your computer and use it in GitHub Desktop.
Loose-tight text format
//! The loose-tight text format (lttxt)
//!
//! Contains helpers to parse and write to files using this format.
//
//! # Overview
//!
//! The *loose-tight text format* is a loose, semi-structured text file format
//! for simple uses such as configuration files. The format is defined in the
//! [specification](#specification) below.
//!
//! # Reading and writing
//!
//! To read, use a reader that can return the input as individual lines (such as
//! [`std::io::BufReader`]). Call the [`get_tokens`] function to parse each line
//! into a set of tokens.
//!
//! To write, use the [`ltwriteln`] macro to convert a line of tokens into a string.
//!
//! # Specification
//!
//! * A lttxt file is dividend into *lines*, which are further divided into string *tokens*.
//! * Lines are delimited in the same way as [`str::lines`], i.e. either a newline
//! (`\n`) or a carriage return with a line feed (`\r\n`).
//! * Tokens are delimited by a SPACE character (unless the space character is in
//! a quoted token).
//! * Comments are denoted by the hash (`#`) character (unless it appears in a quoted
//! token). The hash character and all subsequent characters are ignored until the
//! end of the line.
//! * Tokens can be *quoted* or *unquoted*.
//! * Quoted tokens are surrounded by quotation marks `"`. They MAY contain special
//! characters (defined below). Apart from the SPACE and hash characters, special
//! characters MUST be escaped when appearing within a quoted token. In addition,
//! backslash character `\` MUST be escaped as `\\`.
//! * Unquoted tokens are not surrounded by quotation marks. They MUST NOT contain
//! special characters.
//! * *Special characters* are as follows, with the escape sequence in parentheses:
//! - SPACE
//! - hash
//! - tab (`\t`)
//! - newline (`\n`)
//! - carriage return (`\r`)
//! - quotation marks (`\"`)
//! * When writing a lttxt file, quoted tokens SHOULD only be used if the token contains
//! special characters. Otherwise, unquoted tokens SHOULD be used.
//!
//! [`get_tokens`]: ./fn.get_tokens.html
//! [`std::io::BufReader`]: https://doc.rust-lang.org/nightly/std/io/struct.BufReader.html
//! [`str::lines`]: https://doc.rust-lang.org/nightly/std/primitive.str.html#method.lines
//! [`ltwriteln`]: ./macro.ltwriteln!.html
use std::fmt;
/// The type returned when the input does not comform to the expected format. Use the
/// `Debug` implementation to generate detailed information.
#[derive(Debug)]
pub enum ParseError {
/// An unescaped quote is encountered when parsing a quoted token
UnescapedQuote(usize),
/// An quotation mark is encountered when parsing an unquoted token
UnexpectedQuote(usize),
/// End of line is encountered when parsing a quoted token
UnmatchedQuote,
/// An unknown escape sequence is encountered
UnexpectedEscapeSequence(char, usize)
}
impl fmt::Display for ParseError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
ParseError::UnescapedQuote(i) => write!(f, "Unescaped quote in quoted token at position {}", i),
ParseError::UnexpectedQuote(i) => write!(f, "Unexpected quote in unquoted token at position {}", i),
ParseError::UnmatchedQuote => write!(f, "End of line without closing quote"),
ParseError::UnexpectedEscapeSequence(c, i) => write!(f, "Unexpected escape sequence '\\{}' at position {}", c, i)
}
}
}
impl std::error::Error for ParseError {
fn description(&self) -> &str {
match *self {
ParseError::UnescapedQuote(_) => "Unescaped quote in quoted token",
ParseError::UnexpectedQuote(_) => "Unexpected quote in unquoted token",
ParseError::UnmatchedQuote => "End of line without closing quote",
ParseError::UnexpectedEscapeSequence(_, _) => "Unexpected escape sequence"
}
}
fn cause(&self) -> Option<&dyn std::error::Error> {
None
}
}
/// An iterator over tokens in a line.
///
/// This is created by calling [`get_tokens`]. See the documentation for [`get_tokens`]
/// for further details.
///
/// [`get_tokens`]: ./fn.get_tokens.html
pub struct Tokens<'a> {
s: &'a str,
pos: usize
}
impl Iterator for Tokens<'_> {
type Item = Result<String, ParseError>;
fn next(&mut self) -> Option<Result<String, ParseError>> {
if self.pos >= self.s.len() {
return None;
}
let mut token = String::new();
let mut chars = self.s[self.pos..].chars().enumerate();
let mut in_token = false;
let mut in_quote = false;
// Return None at the end
while let Some((i, c)) = chars.next() {
if c == '#' {
if in_quote {
token.push(c);
} else {
// Advance position so that it's none on next call
self.pos = self.s.len();
if in_token {
return Some(Ok(token));
} else {
return None;
}
}
} else if c == '"' {
if in_quote {
// Closing quote
match chars.next() {
None | Some((_, ' ')) | Some((_, '#')) => {
// Ok, return the token
self.pos += i + 2;
return Some(Ok(token));
},
Some((j, _)) => {
// Characters after quote
let result = Some(Err(ParseError::UnescapedQuote(self.pos + j)));
// Advance position so that it's none on next call
self.pos = self.s.len();
return result;
}
};
} else if in_token {
// Quote in the middle of unquoted token
let result = Some(Err(ParseError::UnexpectedQuote(self.pos + i)));
// Advance position so that it's none on next call
self.pos = self.s.len();
return result;
} else {
in_token = true;
in_quote = true;
}
} else if c == '\\' {
if in_quote {
let (_, n) = chars.next().unwrap_or((i, c));
match n {
'\\' | '\"' => token.push(n),
't' => token.push('\t'),
'n' => token.push('\n'),
'r' => token.push('\r'),
unexpected => {
let result = Some(Err(ParseError::UnexpectedEscapeSequence(unexpected, self.pos + i)));
self.pos = self.s.len();
return result;
}
};
} else {
token.push(c);
}
} else if c == ' ' {
if in_quote {
token.push(c);
} else if in_token {
self.pos += i + 1;
return Some(Ok(token));
}
} else {
in_token = true;
token.push(c);
}
}
// Advance position so that it's none on next call
self.pos = self.s.len();
if in_quote {
return Some(Err(ParseError::UnmatchedQuote));
}
if in_token {
return Some(Ok(token));
}
None
}
}
/// Parses a line in a lttxt file and returns an iterator over tokens in that
/// line.
///
/// The iterator returned from this function will yield instances of
/// [`Result`]`<`[`String`]`, `[`ParseError`]`>`. A [`ParseError`] is returned if an
/// error was encountered while trying to parse the next token.
///
/// [`Result`]: https://doc.rust-lang.org/nightly/std/result/enum.Result.html
/// [`String`]: https://doc.rust-lang.org/nightly/std/string/struct.String.html
/// [`ParseError`]: ./enum.ParseError.html
pub fn get_tokens(s: &str) -> Tokens {
Tokens {
s: s,
pos: 0
}
}
/// Macro for printing an lttxt formatted line to the stanard output.
///
/// See [`ltwriteln!`] for more information on the syntax.
///
/// [`ltwriteln!`]: ./macro.ltwriteln!.html
#[macro_export]
macro_rules! ltprintln {
($($arg:expr,)*) => { println!("{}", vec![$($arg),*].into_iter().map(|token| { crate::lttxt::quote_token(token) }).collect::<Vec<String>>().join(" ")) };
}
/// Macro for formatting a set of arguments into an lttxt formatted line.
///
/// The first argument is the output buffer.
#[macro_export]
macro_rules! ltwriteln {
($dst:expr, $($arg:expr,)*) => { writeln!($dst, "{}", vec![$($arg),*].into_iter().map(|token| { crate::lttxt::quote_token(token) }).collect::<Vec<String>>().join(" ")) };
}
/// Returns a quoted token if the string contains a special character, or an
/// unquoted token otherwise.
pub fn quote_token(s: &str) -> String {
let mut result = String::new();
let quote = s.chars().any(|c| c == ' ' || c == '#' || c == '\t' || c == '\n' || c == '\r' || c == '"');
if quote { result.push('"'); }
let mut chars = s.chars();
while let Some(c) = chars.next() {
match c {
'\t' => result.push_str("\\t"),
'\n' => result.push_str("\\n"),
'\r' => result.push_str("\\r"),
'"' => result.push_str("\\\""),
c => result.push(c)
};
}
if quote { result.push('"'); }
result
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment