Skip to content

Instantly share code, notes, and snippets.

@Marwes
Forked from boxofrox/cargo_test.txt
Created March 15, 2018 17:50
Show Gist options
  • Save Marwes/e9700c536c14f49044dd01352919d0ae to your computer and use it in GitHub Desktop.
Save Marwes/e9700c536c14f49044dd01352919d0ae to your computer and use it in GitHub Desktop.
simple shell parser in Rust using combine crate (not working)
$ cargo test
Compiling csvmap v0.1.0 (file:///home/charetjc/files/development/rust/tinker/csvmap)
Finished dev [unoptimized + debuginfo] target(s) in 2.34 secs
Running target/debug/deps/csvmap-cdc7458d4bf17d5e
running 5 tests
test tests::can_parse ... ok
test tests::can_parse_single_quoted_shell_arg ... ok
test tests::can_parse_double_quoted_shell_arg ... ok
test tests::can_parse_any_shell_arg ... FAILED
failures:
---- tests::can_parse_any_shell_arg stdout ----
thread 'tests::can_parse_any_shell_arg' panicked at 'assertion failed: `(left == right)`
left: `Ok(("", "abc"))`,
right: `Ok(("abc", ""))`', src/main.rs:335:13
failures:
tests::can_parse_any_shell_arg
test result: FAILED. 3 passed; 1 failed; 0 ignored; 0 measured; 0 filtered out
error: test failed, to rerun pass '--bin csvmap'
#[macro_use]
extern crate combine;
use combine::{any, between, many, parser, satisfy, sep_by, token, ParseResult, Parser};
use combine::Stream;
// Used to selectively evaluate certain escape sequences, and leave other sequences as they are.
enum Quoted {
Char(char),
Escape(char), // assuming control character is \
}
struct OwnedQuotedIter {
count: usize,
quoted: Quoted,
}
impl Quoted {
fn into_iter(self) -> OwnedQuotedIter {
OwnedQuotedIter {
count: 0,
quoted: self,
}
}
}
impl Iterator for OwnedQuotedIter {
type Item = char;
// Squeeze out one char for evaluated escapes or regular chars (Char).
// Squeeze out two chars for unevaluated escapes that were found (Escape).
fn next(&mut self) -> Option<Self::Item> {
use Quoted::*;
self.count += 1;
match (self.count, &self.quoted) {
(1, &Char(x)) => Some(x),
(1, &Escape(_)) => Some('\\'),
(2, &Escape(x)) => Some(x),
(_, _) => None,
}
}
}
// Collect everything between single quotes, but don't stop prematurely
// if escaped single quotes are found; collect them, too, without the control
// character.
fn parse_single_quoted_shell_arg<I>(input: I) -> ParseResult<String, I>
where
I: Stream<Item = char>,
{
fn escaped<I>(input: I) -> ParseResult<Quoted, I>
where
I: Stream<Item = char>,
{
use Quoted::*;
let (c, input) = satisfy(|c| c != '\'').parse_stream(input)?;
match c {
'\\' => input.combine(|input| {
any()
.map(|c| match c {
'\'' => Char('\''),
c => Escape(c),
})
.parse_stream(input)
}),
_ => Ok((Char(c), input)),
}
};
let inner = many::<Vec<_>, _>(parser(escaped))
.map(|v| v.into_iter().flat_map(|x| x.into_iter()).collect());
between(token('\''), token('\''), inner).parse_stream(input)
}
// Collect everything between double quotes, but don't stop prematurely
// if escaped double quotes are found; collect them, too, without the control
// character. Also evaluate escape sequences for { \\, \n, \r } into
// { \, newline, carriage-return }, respsectively.
fn parse_double_quoted_shell_arg<I>(input: I) -> ParseResult<String, I>
where
I: Stream<Item = char>,
{
fn escaped<I>(input: I) -> ParseResult<Quoted, I>
where
I: Stream<Item = char>,
{
use Quoted::*;
let (c, input) = satisfy(|c| c != '"').parse_stream(input)?;
match c {
'\\' => input.combine(|input| {
any()
.map(|c| match c {
'"' => Char('"'),
'\\' => Char('\\'),
'n' => Char('\n'),
'r' => Char('\r'),
c => Escape(c),
})
.parse_stream(input)
}),
_ => Ok((Char(c), input)),
}
};
let inner = many::<Vec<_>, _>(parser(escaped))
.map(|v| v.into_iter().flat_map(|x| x.into_iter()).collect());
between(token('\"'), token('\"'), inner).parse_stream(input)
}
fn parse_any_shell_arg<I>(input: I) -> ParseResult<String, I>
where
I: Stream<Item = char>,
{
let single = parser(parse_single_quoted_shell_arg);
let double = parser(parse_double_quoted_shell_arg);
let unquoted = many::<String, _>(satisfy(|c| c != ' '));
// Does not parse "abc".
single.or(double).or(unquoted).parse_stream(input)
// Can parse "abc", but cannot parse single or double quoted arguments.
//unquoted.parse_stream(input)
}
fn parse_shell_command<I>(input: I) -> ParseResult<Vec<String>, I>
where
I: Stream<Item = char>,
{
sep_by(parser(parse_any_shell_arg), token(' ')).parse_stream(input)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn can_parse_single_quoted_shell_arg() {
let cases = vec![
("'abc'", Ok(("abc".to_owned(), ""))),
("'a\\nb\\\"c'", Ok(("a\\nb\\\"c".to_owned(), ""))),
("'abc\\''", Ok(("abc'".to_owned(), ""))),
];
for (input, expected) in cases {
let actual = parser(parse_single_quoted_shell_arg).parse(input);
assert_eq!(actual, expected);
}
}
#[test]
fn can_parse_double_quoted_shell_arg() {
let cases = vec![
("\"abc\"", Ok(("abc".to_owned(), ""))),
("\"a\\nb\\\"c\"", Ok(("a\nb\"c".to_owned(), ""))),
("\"abc\\'\"", Ok(("abc\\'".to_owned(), ""))),
];
for (input, expected) in cases {
let actual = parser(parse_double_quoted_shell_arg).parse(input);
assert_eq!(actual, expected);
}
}
#[test]
fn can_parse() {
// Verify the unquoted parser works.
assert_eq!(
many::<String, _>(satisfy(|c| c != ' ')).parse("abc"),
Ok(("abc".to_owned(), ""))
);
// Check behavior of sep_by with unquoted parser.
assert_eq!(
sep_by(many::<String, _>(satisfy(|c| c != ' ')), token(' ')).parse("abc def"),
Ok((vec!["abc".to_owned(), "def".to_owned()], ""))
);
}
#[test]
fn can_parse_any_shell_arg() {
let cases = vec![
("abc", Ok(("abc".to_owned(), ""))),
("\"abc\"", Ok(("abc".to_owned(), ""))),
("'abc'", Ok(("abc".to_owned(), ""))),
];
for (input, expected) in cases {
let actual = parser(parse_any_shell_arg).parse(input);
assert_eq!(actual, expected);
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment