Last active
March 15, 2018 17:50
-
-
Save boxofrox/8dc3d9b78fe2d59cfa1dc949b6a2ee22 to your computer and use it in GitHub Desktop.
simple shell parser in Rust using combine crate (not working)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ cargo test | |
Compiling csvmap v0.1.0 (file:///home/charetjc/files/development/rust/tinker/csvmap) | |
Finished dev [unoptimized + debuginfo] target(s) in 2.34 secs | |
Running target/debug/deps/csvmap-cdc7458d4bf17d5e | |
running 5 tests | |
test tests::can_parse ... ok | |
test tests::can_parse_single_quoted_shell_arg ... ok | |
test tests::can_parse_double_quoted_shell_arg ... ok | |
test tests::can_parse_any_shell_arg ... FAILED | |
failures: | |
---- tests::can_parse_any_shell_arg stdout ---- | |
thread 'tests::can_parse_any_shell_arg' panicked at 'assertion failed: `(left == right)` | |
left: `Ok(("", "abc"))`, | |
right: `Ok(("abc", ""))`', src/main.rs:335:13 | |
failures: | |
tests::can_parse_any_shell_arg | |
test result: FAILED. 3 passed; 1 failed; 0 ignored; 0 measured; 0 filtered out | |
error: test failed, to rerun pass '--bin csvmap' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#[macro_use] | |
extern crate combine; | |
use combine::{any, between, many, parser, satisfy, sep_by, token, ParseResult, Parser}; | |
use combine::primitives::Stream; | |
// Used to selectively evaluate certain escape sequences, and leave other sequences as they are. | |
enum Quoted { | |
Char(char), | |
Escape(char), // assuming control character is \ | |
} | |
struct OwnedQuotedIter { | |
count: usize, | |
quoted: Quoted, | |
} | |
impl Quoted { | |
fn into_iter(self) -> OwnedQuotedIter { | |
OwnedQuotedIter { | |
count: 0, | |
quoted: self, | |
} | |
} | |
} | |
impl Iterator for OwnedQuotedIter { | |
type Item = char; | |
// Squeeze out one char for evaluated escapes or regular chars (Char). | |
// Squeeze out two chars for unevaluated escapes that were found (Escape). | |
fn next(&mut self) -> Option<Self::Item> { | |
use Quoted::*; | |
self.count += 1; | |
match (self.count, &self.quoted) { | |
(1, &Char(x)) => Some(x), | |
(1, &Escape(_)) => Some('\\'), | |
(2, &Escape(x)) => Some(x), | |
(_, _) => None, | |
} | |
} | |
} | |
// Collect everything between single quotes, but don't stop prematurely | |
// if escaped single quotes are found; collect them, too, without the control | |
// character. | |
fn parse_single_quoted_shell_arg<I>(input: I) -> ParseResult<String, I> | |
where | |
I: Stream<Item = char>, | |
{ | |
fn escaped<I>(input: I) -> ParseResult<Quoted, I> | |
where | |
I: Stream<Item = char>, | |
{ | |
use Quoted::*; | |
let (c, input) = satisfy(|c| c != '\'').parse_stream(input)?; | |
match c { | |
'\\' => input.combine(|input| { | |
any() | |
.map(|c| match c { | |
'\'' => Char('\''), | |
c => Escape(c), | |
}) | |
.parse_stream(input) | |
}), | |
_ => Ok((Char(c), input)), | |
} | |
}; | |
let inner = many::<Vec<_>, _>(parser(escaped)); | |
let mut iter = between(token('\''), token('\''), inner).iter(input); | |
let result = iter.by_ref() | |
.flat_map(|x| x) // Not sure where this Vec originated, but flatten it. | |
.flat_map(|x| x.into_iter()) // Flatten Vec from `inner` and iterate over Quoted. | |
.collect::<String>(); | |
iter.into_result(result) | |
} | |
// Collect everything between double quotes, but don't stop prematurely | |
// if escaped double quotes are found; collect them, too, without the control | |
// character. Also evaluate escape sequences for { \\, \n, \r } into | |
// { \, newline, carriage-return }, respsectively. | |
fn parse_double_quoted_shell_arg<I>(input: I) -> ParseResult<String, I> | |
where | |
I: Stream<Item = char>, | |
{ | |
fn escaped<I>(input: I) -> ParseResult<Quoted, I> | |
where | |
I: Stream<Item = char>, | |
{ | |
use Quoted::*; | |
let (c, input) = satisfy(|c| c != '"').parse_stream(input)?; | |
match c { | |
'\\' => input.combine(|input| { | |
any() | |
.map(|c| match c { | |
'"' => Char('"'), | |
'\\' => Char('\\'), | |
'n' => Char('\n'), | |
'r' => Char('\r'), | |
c => Escape(c), | |
}) | |
.parse_stream(input) | |
}), | |
_ => Ok((Char(c), input)), | |
} | |
}; | |
let inner = many::<Vec<_>, _>(parser(escaped)); | |
let mut iter = between(token('\"'), token('\"'), inner).iter(input); | |
let result = iter.by_ref() | |
.flat_map(|x| x) // Not sure where this Vec originated, but flatten it. | |
.flat_map(|x| x.into_iter()) // Flatten Vec from `inner` and iterate over Quoted. | |
.collect::<String>(); | |
iter.into_result(result) | |
} | |
fn parse_any_shell_arg<I>(input: I) -> ParseResult<String, I> | |
where | |
I: Stream<Item = char>, | |
{ | |
let single = parser(parse_single_quoted_shell_arg); | |
let double = parser(parse_double_quoted_shell_arg); | |
let unquoted = many::<String, _>(satisfy(|c| c != ' ')); | |
// Does not parse "abc". | |
single.or(double).or(unquoted).parse_stream(input) | |
// Can parse "abc", but cannot parse single or double quoted arguments. | |
//unquoted.parse_stream(input) | |
} | |
fn parse_shell_command<I>(input: I) -> ParseResult<Vec<String>, I> | |
where | |
I: Stream<Item = char>, | |
{ | |
sep_by(parser(parse_any_shell_arg), token(' ')).parse_stream(input) | |
} | |
#[cfg(test)] | |
mod tests { | |
use super::*; | |
#[test] | |
fn can_parse_single_quoted_shell_arg() { | |
let cases = vec![ | |
("'abc'", Ok(("abc".to_owned(), ""))), | |
("'a\\nb\\\"c'", Ok(("a\\nb\\\"c".to_owned(), ""))), | |
("'abc\\''", Ok(("abc'".to_owned(), ""))), | |
]; | |
for (input, expected) in cases { | |
let actual = parser(parse_single_quoted_shell_arg).parse(input); | |
assert_eq!(actual, expected); | |
} | |
} | |
#[test] | |
fn can_parse_double_quoted_shell_arg() { | |
let cases = vec![ | |
("\"abc\"", Ok(("abc".to_owned(), ""))), | |
("\"a\\nb\\\"c\"", Ok(("a\nb\"c".to_owned(), ""))), | |
("\"abc\\'\"", Ok(("abc\\'".to_owned(), ""))), | |
]; | |
for (input, expected) in cases { | |
let actual = parser(parse_double_quoted_shell_arg).parse(input); | |
assert_eq!(actual, expected); | |
} | |
} | |
#[test] | |
fn can_parse() { | |
// Verify the unquoted parser works. | |
assert_eq!( | |
many::<String, _>(satisfy(|c| c != ' ')).parse("abc"), | |
Ok(("abc".to_owned(), "")) | |
); | |
// Check behavior of sep_by with unquoted parser. | |
assert_eq!( | |
sep_by(many::<String, _>(satisfy(|c| c != ' ')), token(' ')).parse("abc def"), | |
Ok((vec!["abc".to_owned(), "def".to_owned()], "")) | |
); | |
} | |
#[test] | |
fn can_parse_any_shell_arg() { | |
let cases = vec![ | |
("abc", Ok(("abc".to_owned(), ""))), | |
("\"abc\"", Ok(("abc".to_owned(), ""))), | |
("'abc'", Ok(("abc".to_owned(), ""))), | |
]; | |
for (input, expected) in cases { | |
let actual = parser(parse_any_shell_arg).parse(input); | |
assert_eq!(actual, expected); | |
} | |
} | |
} |
The problem here is that using fn iter
to parse makes no parse an successful outcome. So when the parser looks at the first token and sees "
but expects '
it will fail with an "empty error" (which says that it failed without consuming input). When iter
then sees that the parser failed without consuming input it will just return None
immediately.
If you just remove the use of iter
and do between(token('\"'), token('\"'), inner).parse_stream(input)
directly it should work.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The
Vec
comes from the use ofmany
here