-
-
Save Marwes/e9700c536c14f49044dd01352919d0ae to your computer and use it in GitHub Desktop.
simple shell parser in Rust using combine crate (not working)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ cargo test | |
Compiling csvmap v0.1.0 (file:///home/charetjc/files/development/rust/tinker/csvmap) | |
Finished dev [unoptimized + debuginfo] target(s) in 2.34 secs | |
Running target/debug/deps/csvmap-cdc7458d4bf17d5e | |
running 5 tests | |
test tests::can_parse ... ok | |
test tests::can_parse_single_quoted_shell_arg ... ok | |
test tests::can_parse_double_quoted_shell_arg ... ok | |
test tests::can_parse_any_shell_arg ... FAILED | |
failures: | |
---- tests::can_parse_any_shell_arg stdout ---- | |
thread 'tests::can_parse_any_shell_arg' panicked at 'assertion failed: `(left == right)` | |
left: `Ok(("", "abc"))`, | |
right: `Ok(("abc", ""))`', src/main.rs:335:13 | |
failures: | |
tests::can_parse_any_shell_arg | |
test result: FAILED. 3 passed; 1 failed; 0 ignored; 0 measured; 0 filtered out | |
error: test failed, to rerun pass '--bin csvmap' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#[macro_use] | |
extern crate combine; | |
use combine::{any, between, many, parser, satisfy, sep_by, token, ParseResult, Parser}; | |
use combine::Stream; | |
// Used to selectively evaluate certain escape sequences, and leave other sequences as they are. | |
enum Quoted { | |
Char(char), | |
Escape(char), // assuming control character is \ | |
} | |
struct OwnedQuotedIter { | |
count: usize, | |
quoted: Quoted, | |
} | |
impl Quoted { | |
fn into_iter(self) -> OwnedQuotedIter { | |
OwnedQuotedIter { | |
count: 0, | |
quoted: self, | |
} | |
} | |
} | |
impl Iterator for OwnedQuotedIter { | |
type Item = char; | |
// Squeeze out one char for evaluated escapes or regular chars (Char). | |
// Squeeze out two chars for unevaluated escapes that were found (Escape). | |
fn next(&mut self) -> Option<Self::Item> { | |
use Quoted::*; | |
self.count += 1; | |
match (self.count, &self.quoted) { | |
(1, &Char(x)) => Some(x), | |
(1, &Escape(_)) => Some('\\'), | |
(2, &Escape(x)) => Some(x), | |
(_, _) => None, | |
} | |
} | |
} | |
// Collect everything between single quotes, but don't stop prematurely | |
// if escaped single quotes are found; collect them, too, without the control | |
// character. | |
fn parse_single_quoted_shell_arg<I>(input: I) -> ParseResult<String, I> | |
where | |
I: Stream<Item = char>, | |
{ | |
fn escaped<I>(input: I) -> ParseResult<Quoted, I> | |
where | |
I: Stream<Item = char>, | |
{ | |
use Quoted::*; | |
let (c, input) = satisfy(|c| c != '\'').parse_stream(input)?; | |
match c { | |
'\\' => input.combine(|input| { | |
any() | |
.map(|c| match c { | |
'\'' => Char('\''), | |
c => Escape(c), | |
}) | |
.parse_stream(input) | |
}), | |
_ => Ok((Char(c), input)), | |
} | |
}; | |
let inner = many::<Vec<_>, _>(parser(escaped)) | |
.map(|v| v.into_iter().flat_map(|x| x.into_iter()).collect()); | |
between(token('\''), token('\''), inner).parse_stream(input) | |
} | |
// Collect everything between double quotes, but don't stop prematurely | |
// if escaped double quotes are found; collect them, too, without the control | |
// character. Also evaluate escape sequences for { \\, \n, \r } into | |
// { \, newline, carriage-return }, respsectively. | |
fn parse_double_quoted_shell_arg<I>(input: I) -> ParseResult<String, I> | |
where | |
I: Stream<Item = char>, | |
{ | |
fn escaped<I>(input: I) -> ParseResult<Quoted, I> | |
where | |
I: Stream<Item = char>, | |
{ | |
use Quoted::*; | |
let (c, input) = satisfy(|c| c != '"').parse_stream(input)?; | |
match c { | |
'\\' => input.combine(|input| { | |
any() | |
.map(|c| match c { | |
'"' => Char('"'), | |
'\\' => Char('\\'), | |
'n' => Char('\n'), | |
'r' => Char('\r'), | |
c => Escape(c), | |
}) | |
.parse_stream(input) | |
}), | |
_ => Ok((Char(c), input)), | |
} | |
}; | |
let inner = many::<Vec<_>, _>(parser(escaped)) | |
.map(|v| v.into_iter().flat_map(|x| x.into_iter()).collect()); | |
between(token('\"'), token('\"'), inner).parse_stream(input) | |
} | |
fn parse_any_shell_arg<I>(input: I) -> ParseResult<String, I> | |
where | |
I: Stream<Item = char>, | |
{ | |
let single = parser(parse_single_quoted_shell_arg); | |
let double = parser(parse_double_quoted_shell_arg); | |
let unquoted = many::<String, _>(satisfy(|c| c != ' ')); | |
// Does not parse "abc". | |
single.or(double).or(unquoted).parse_stream(input) | |
// Can parse "abc", but cannot parse single or double quoted arguments. | |
//unquoted.parse_stream(input) | |
} | |
fn parse_shell_command<I>(input: I) -> ParseResult<Vec<String>, I> | |
where | |
I: Stream<Item = char>, | |
{ | |
sep_by(parser(parse_any_shell_arg), token(' ')).parse_stream(input) | |
} | |
#[cfg(test)] | |
mod tests { | |
use super::*; | |
#[test] | |
fn can_parse_single_quoted_shell_arg() { | |
let cases = vec![ | |
("'abc'", Ok(("abc".to_owned(), ""))), | |
("'a\\nb\\\"c'", Ok(("a\\nb\\\"c".to_owned(), ""))), | |
("'abc\\''", Ok(("abc'".to_owned(), ""))), | |
]; | |
for (input, expected) in cases { | |
let actual = parser(parse_single_quoted_shell_arg).parse(input); | |
assert_eq!(actual, expected); | |
} | |
} | |
#[test] | |
fn can_parse_double_quoted_shell_arg() { | |
let cases = vec![ | |
("\"abc\"", Ok(("abc".to_owned(), ""))), | |
("\"a\\nb\\\"c\"", Ok(("a\nb\"c".to_owned(), ""))), | |
("\"abc\\'\"", Ok(("abc\\'".to_owned(), ""))), | |
]; | |
for (input, expected) in cases { | |
let actual = parser(parse_double_quoted_shell_arg).parse(input); | |
assert_eq!(actual, expected); | |
} | |
} | |
#[test] | |
fn can_parse() { | |
// Verify the unquoted parser works. | |
assert_eq!( | |
many::<String, _>(satisfy(|c| c != ' ')).parse("abc"), | |
Ok(("abc".to_owned(), "")) | |
); | |
// Check behavior of sep_by with unquoted parser. | |
assert_eq!( | |
sep_by(many::<String, _>(satisfy(|c| c != ' ')), token(' ')).parse("abc def"), | |
Ok((vec!["abc".to_owned(), "def".to_owned()], "")) | |
); | |
} | |
#[test] | |
fn can_parse_any_shell_arg() { | |
let cases = vec![ | |
("abc", Ok(("abc".to_owned(), ""))), | |
("\"abc\"", Ok(("abc".to_owned(), ""))), | |
("'abc'", Ok(("abc".to_owned(), ""))), | |
]; | |
for (input, expected) in cases { | |
let actual = parser(parse_any_shell_arg).parse(input); | |
assert_eq!(actual, expected); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment