Skip to content

Instantly share code, notes, and snippets.

@boxofrox
Last active March 15, 2018 17:50
Show Gist options
  • Save boxofrox/8dc3d9b78fe2d59cfa1dc949b6a2ee22 to your computer and use it in GitHub Desktop.
Save boxofrox/8dc3d9b78fe2d59cfa1dc949b6a2ee22 to your computer and use it in GitHub Desktop.
simple shell parser in Rust using combine crate (not working)
$ cargo test
Compiling csvmap v0.1.0 (file:///home/charetjc/files/development/rust/tinker/csvmap)
Finished dev [unoptimized + debuginfo] target(s) in 2.34 secs
Running target/debug/deps/csvmap-cdc7458d4bf17d5e
running 5 tests
test tests::can_parse ... ok
test tests::can_parse_single_quoted_shell_arg ... ok
test tests::can_parse_double_quoted_shell_arg ... ok
test tests::can_parse_any_shell_arg ... FAILED
failures:
---- tests::can_parse_any_shell_arg stdout ----
thread 'tests::can_parse_any_shell_arg' panicked at 'assertion failed: `(left == right)`
left: `Ok(("", "abc"))`,
right: `Ok(("abc", ""))`', src/main.rs:335:13
failures:
tests::can_parse_any_shell_arg
test result: FAILED. 3 passed; 1 failed; 0 ignored; 0 measured; 0 filtered out
error: test failed, to rerun pass '--bin csvmap'
#[macro_use]
extern crate combine;
use combine::{any, between, many, parser, satisfy, sep_by, token, ParseResult, Parser};
use combine::primitives::Stream;
// Used to selectively evaluate certain escape sequences, and leave other sequences as they are.
enum Quoted {
Char(char),
Escape(char), // assuming control character is \
}
struct OwnedQuotedIter {
count: usize,
quoted: Quoted,
}
impl Quoted {
fn into_iter(self) -> OwnedQuotedIter {
OwnedQuotedIter {
count: 0,
quoted: self,
}
}
}
impl Iterator for OwnedQuotedIter {
type Item = char;
// Squeeze out one char for evaluated escapes or regular chars (Char).
// Squeeze out two chars for unevaluated escapes that were found (Escape).
fn next(&mut self) -> Option<Self::Item> {
use Quoted::*;
self.count += 1;
match (self.count, &self.quoted) {
(1, &Char(x)) => Some(x),
(1, &Escape(_)) => Some('\\'),
(2, &Escape(x)) => Some(x),
(_, _) => None,
}
}
}
// Collect everything between single quotes, but don't stop prematurely
// if escaped single quotes are found; collect them, too, without the control
// character.
fn parse_single_quoted_shell_arg<I>(input: I) -> ParseResult<String, I>
where
I: Stream<Item = char>,
{
fn escaped<I>(input: I) -> ParseResult<Quoted, I>
where
I: Stream<Item = char>,
{
use Quoted::*;
let (c, input) = satisfy(|c| c != '\'').parse_stream(input)?;
match c {
'\\' => input.combine(|input| {
any()
.map(|c| match c {
'\'' => Char('\''),
c => Escape(c),
})
.parse_stream(input)
}),
_ => Ok((Char(c), input)),
}
};
let inner = many::<Vec<_>, _>(parser(escaped));
let mut iter = between(token('\''), token('\''), inner).iter(input);
let result = iter.by_ref()
.flat_map(|x| x) // Not sure where this Vec originated, but flatten it.
.flat_map(|x| x.into_iter()) // Flatten Vec from `inner` and iterate over Quoted.
.collect::<String>();
iter.into_result(result)
}
// Collect everything between double quotes, but don't stop prematurely
// if escaped double quotes are found; collect them, too, without the control
// character. Also evaluate escape sequences for { \\, \n, \r } into
// { \, newline, carriage-return }, respsectively.
fn parse_double_quoted_shell_arg<I>(input: I) -> ParseResult<String, I>
where
I: Stream<Item = char>,
{
fn escaped<I>(input: I) -> ParseResult<Quoted, I>
where
I: Stream<Item = char>,
{
use Quoted::*;
let (c, input) = satisfy(|c| c != '"').parse_stream(input)?;
match c {
'\\' => input.combine(|input| {
any()
.map(|c| match c {
'"' => Char('"'),
'\\' => Char('\\'),
'n' => Char('\n'),
'r' => Char('\r'),
c => Escape(c),
})
.parse_stream(input)
}),
_ => Ok((Char(c), input)),
}
};
let inner = many::<Vec<_>, _>(parser(escaped));
let mut iter = between(token('\"'), token('\"'), inner).iter(input);
let result = iter.by_ref()
.flat_map(|x| x) // Not sure where this Vec originated, but flatten it.
.flat_map(|x| x.into_iter()) // Flatten Vec from `inner` and iterate over Quoted.
.collect::<String>();
iter.into_result(result)
}
fn parse_any_shell_arg<I>(input: I) -> ParseResult<String, I>
where
I: Stream<Item = char>,
{
let single = parser(parse_single_quoted_shell_arg);
let double = parser(parse_double_quoted_shell_arg);
let unquoted = many::<String, _>(satisfy(|c| c != ' '));
// Does not parse "abc".
single.or(double).or(unquoted).parse_stream(input)
// Can parse "abc", but cannot parse single or double quoted arguments.
//unquoted.parse_stream(input)
}
fn parse_shell_command<I>(input: I) -> ParseResult<Vec<String>, I>
where
I: Stream<Item = char>,
{
sep_by(parser(parse_any_shell_arg), token(' ')).parse_stream(input)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn can_parse_single_quoted_shell_arg() {
let cases = vec![
("'abc'", Ok(("abc".to_owned(), ""))),
("'a\\nb\\\"c'", Ok(("a\\nb\\\"c".to_owned(), ""))),
("'abc\\''", Ok(("abc'".to_owned(), ""))),
];
for (input, expected) in cases {
let actual = parser(parse_single_quoted_shell_arg).parse(input);
assert_eq!(actual, expected);
}
}
#[test]
fn can_parse_double_quoted_shell_arg() {
let cases = vec![
("\"abc\"", Ok(("abc".to_owned(), ""))),
("\"a\\nb\\\"c\"", Ok(("a\nb\"c".to_owned(), ""))),
("\"abc\\'\"", Ok(("abc\\'".to_owned(), ""))),
];
for (input, expected) in cases {
let actual = parser(parse_double_quoted_shell_arg).parse(input);
assert_eq!(actual, expected);
}
}
#[test]
fn can_parse() {
// Verify the unquoted parser works.
assert_eq!(
many::<String, _>(satisfy(|c| c != ' ')).parse("abc"),
Ok(("abc".to_owned(), ""))
);
// Check behavior of sep_by with unquoted parser.
assert_eq!(
sep_by(many::<String, _>(satisfy(|c| c != ' ')), token(' ')).parse("abc def"),
Ok((vec!["abc".to_owned(), "def".to_owned()], ""))
);
}
#[test]
fn can_parse_any_shell_arg() {
let cases = vec![
("abc", Ok(("abc".to_owned(), ""))),
("\"abc\"", Ok(("abc".to_owned(), ""))),
("'abc'", Ok(("abc".to_owned(), ""))),
];
for (input, expected) in cases {
let actual = parser(parse_any_shell_arg).parse(input);
assert_eq!(actual, expected);
}
}
}
@Marwes
Copy link

Marwes commented Mar 15, 2018

@Marwes
Copy link

Marwes commented Mar 15, 2018

The problem here is that using fn iter to parse makes no parse an successful outcome. So when the parser looks at the first token and sees " but expects ' it will fail with an "empty error" (which says that it failed without consuming input). When iter then sees that the parser failed without consuming input it will just return None immediately.

@Marwes
Copy link

Marwes commented Mar 15, 2018

If you just remove the use of iter and do between(token('\"'), token('\"'), inner).parse_stream(input) directly it should work.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment