Skip to content

Instantly share code, notes, and snippets.

@cole-h

cole-h/1.diff Secret

Created March 15, 2021 00:22
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cole-h/c045b48222821dcc79dc1626a7da8929 to your computer and use it in GitHub Desktop.
Save cole-h/c045b48222821dcc79dc1626a7da8929 to your computer and use it in GitHub Desktop.
diff --git a/src/uu/tr/src/expand.rs b/src/uu/tr/src/expand.rs
index 3291d57a..c6d15705 100644
--- a/src/uu/tr/src/expand.rs
+++ b/src/uu/tr/src/expand.rs
@@ -14,18 +14,50 @@ use std::cmp::min;
use std::iter::Peekable;
use std::ops::RangeInclusive;
+/// According to POSIX, an "octal sequence shall consist of a <backslash>
+/// followed by the longest sequence of one, two, or three-octal-digit
+/// characters (01234567)" ([source]), so limit it to a maximum of
+/// 4 chars.
+///
+/// [source]: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/tr.html
+const OCTAL_MAX_LEN: usize = 4;
+
#[inline]
-fn unescape_char(c: char) -> char {
- match c {
- 'a' => 0x07u8 as char,
- 'b' => 0x08u8 as char,
- 'f' => 0x0cu8 as char,
- 'v' => 0x0bu8 as char,
- 'n' => '\n',
- 'r' => '\r',
- 't' => '\t',
- _ => c,
+fn unescape(s: &str) -> Option<(char, usize)> {
+ let mut res = None;
+
+ for (i, ch) in s.chars().enumerate() {
+ if ch == '\\' && res.is_none() {
+ // We only want to skip one backslash.
+ res = Some((ch, ch.len_utf8()));
+
+ continue;
+ } else {
+ let c = match ch {
+ 'a' => '\u{0007}',
+ 'b' => '\u{0008}',
+ 'f' => '\u{000c}',
+ 'v' => '\u{000b}',
+ 'n' => '\n',
+ 'r' => '\r',
+ 't' => '\t',
+ '\'' => '\'',
+ '"' => '"',
+ '\\' => '\\',
+ o if o.is_digit(8) => {
+ let i = if o != '0' { i } else { i + 1 };
+ let len = usize::min(OCTAL_MAX_LEN, s.len());
+
+ u8::from_str_radix(&s[i..len], 8).expect("octal") as char
+ }
+ _ => ch,
+ };
+
+ return Some((c, c.len_utf8() + 1));
+ }
}
+
+ res
}
struct Unescape<'a> {
@@ -49,11 +81,9 @@ impl<'a> Iterator for Unescape<'a> {
// is the next character an escape?
let (ret, idx) = match self.string.chars().next().unwrap() {
- '\\' if self.string.len() > 1 => {
- // yes---it's \ and it's not the last char in a string
- // we know that \ is 1 byte long so we can index into the string safely
- let c = self.string[1..].chars().next().unwrap();
- (Some(unescape_char(c)), 1 + c.len_utf8())
+ '\\' => {
+ let (c, len) = unescape(&self.string).expect("failed to unescape input");
+ (Some(c), len)
}
c => (Some(c), c.len_utf8()), // not an escape char
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment