-
-
Save cole-h/c045b48222821dcc79dc1626a7da8929 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/src/uu/tr/src/expand.rs b/src/uu/tr/src/expand.rs | |
index 3291d57a..c6d15705 100644 | |
--- a/src/uu/tr/src/expand.rs | |
+++ b/src/uu/tr/src/expand.rs | |
@@ -14,18 +14,50 @@ use std::cmp::min; | |
use std::iter::Peekable; | |
use std::ops::RangeInclusive; | |
+/// According to POSIX, an "octal sequence shall consist of a <backslash> | |
+/// followed by the longest sequence of one, two, or three-octal-digit | |
+/// characters (01234567)" ([source]), so limit it to a maximum of | |
+/// 4 chars. | |
+/// | |
+/// [source]: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/tr.html | |
+const OCTAL_MAX_LEN: usize = 4; | |
+ | |
#[inline] | |
-fn unescape_char(c: char) -> char { | |
- match c { | |
- 'a' => 0x07u8 as char, | |
- 'b' => 0x08u8 as char, | |
- 'f' => 0x0cu8 as char, | |
- 'v' => 0x0bu8 as char, | |
- 'n' => '\n', | |
- 'r' => '\r', | |
- 't' => '\t', | |
- _ => c, | |
+fn unescape(s: &str) -> Option<(char, usize)> { | |
+ let mut res = None; | |
+ | |
+ for (i, ch) in s.chars().enumerate() { | |
+ if ch == '\\' && res.is_none() { | |
+ // We only want to skip one backslash. | |
+ res = Some((ch, ch.len_utf8())); | |
+ | |
+ continue; | |
+ } else { | |
+ let c = match ch { | |
+ 'a' => '\u{0007}', | |
+ 'b' => '\u{0008}', | |
+ 'f' => '\u{000c}', | |
+ 'v' => '\u{000b}', | |
+ 'n' => '\n', | |
+ 'r' => '\r', | |
+ 't' => '\t', | |
+ '\'' => '\'', | |
+ '"' => '"', | |
+ '\\' => '\\', | |
+ o if o.is_digit(8) => { | |
+ let i = if o != '0' { i } else { i + 1 }; | |
+ let len = usize::min(OCTAL_MAX_LEN, s.len()); | |
+ | |
+ u8::from_str_radix(&s[i..len], 8).expect("octal") as char | |
+ } | |
+ _ => ch, | |
+ }; | |
+ | |
+ return Some((c, c.len_utf8() + 1)); | |
+ } | |
} | |
+ | |
+ res | |
} | |
struct Unescape<'a> { | |
@@ -49,11 +81,9 @@ impl<'a> Iterator for Unescape<'a> { | |
// is the next character an escape? | |
let (ret, idx) = match self.string.chars().next().unwrap() { | |
- '\\' if self.string.len() > 1 => { | |
- // yes---it's \ and it's not the last char in a string | |
- // we know that \ is 1 byte long so we can index into the string safely | |
- let c = self.string[1..].chars().next().unwrap(); | |
- (Some(unescape_char(c)), 1 + c.len_utf8()) | |
+ '\\' => { | |
+ let (c, len) = unescape(&self.string).expect("failed to unescape input"); | |
+ (Some(c), len) | |
} | |
c => (Some(c), c.len_utf8()), // not an escape char | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment