Skip to content

Instantly share code, notes, and snippets.

@mikeyhew
Last active September 22, 2019 16:53
Show Gist options
  • Save mikeyhew/334122cd0104ad3509388074be4351ba to your computer and use it in GitHub Desktop.
Save mikeyhew/334122cd0104ad3509388074be4351ba to your computer and use it in GitHub Desktop.
JsonStr, a Rust unsized string slice type that can include Json escape sequences
// Author: Michael Hewson
// https://gist.github.com/mikeyhew/334122cd0104ad3509388074be4351ba
// released under the Unlicense <https://unlicense.org/>
use std::{
borrow::Cow,
str,
char,
iter,
};
#[repr(transparent)]
struct JsonStr(str);
impl JsonStr {
fn chars(&self) -> Chars<'_> {
Chars(self.0.chars())
}
// safe because JsonStr doesn't rely on valid escape sequences
// for memory safety
fn from_str_unchecked(s: &str) -> &Self {
unsafe {
&*(s as *const str as *const JsonStr)
}
}
}
impl<'a> From<&'a JsonStr> for String {
fn from(json_str: &'a JsonStr) -> Self {
json_str.chars().collect()
}
}
impl <'a> From<&'a JsonStr> for Box<str> {
fn from(json_str: &'a JsonStr) -> Self {
let s: String = json_str.into();
s.into()
}
}
impl <'a> From<&'a JsonStr> for Cow<'a, str> {
fn from(json_str: &'a JsonStr) -> Self {
if json_str.0.contains('\\') {
Cow::Owned(json_str.into())
} else {
Cow::Borrowed(&json_str.0)
}
}
}
struct Chars<'a>(str::Chars<'a>);
impl<'a> Iterator for Chars<'a> {
type Item = char;
fn next(&mut self) -> Option<char> {
self.0.next().map(|c| match c {
'\\' => match self.0.next().expect("lone backslash") {
'"' => '"',
'\\' => '\\',
'/' => '/',
'b' => '\u{8}',
'f' => '\u{c}',
'n' => '\n',
'r' => '\r',
't' => '\t',
'u' => parse_uXXXX_escape(&mut self.0),
c => c,
}
c => c,
})
}
}
#[allow(non_snake_case)]
fn parse_uXXXX_escape(chars: &mut str::Chars<'_>) -> char {
let scalar1 = parse_u16(chars);
let mut next_chars = chars.clone();
if let [Some('\\'), Some('u')] = [next_chars.next(), next_chars.next()] {
let scalar2 = parse_u16(&mut next_chars);
let mut decoder = char::decode_utf16(
iter::once(scalar1).chain(iter::once(scalar2))
);
if let [Some(Ok(c)), None] = [decoder.next(), decoder.next()] {
// advance iterator past end of surrogate pair
*chars = next_chars;
return c
}
}
char::decode_utf16(iter::once(scalar1))
.next()
.expect("should at least get Some(Err(_))")
.unwrap_or_else(|err| panic!("failed to decode \\uXXXX sequence: {}", err))
}
fn parse_u16(chars: &mut str::Chars<'_>) -> u16 {
let s = chars.as_str()
.get(0..4)
.unwrap_or_else(|| panic!("unexpected end of str: {}", chars.as_str()));
let ret = <u16>::from_str_radix(s, 16)
.unwrap_or_else(|err| panic!("invalid \\uXXXX escape: {:?}, {}", chars.as_str(), err));
for _ in 0..4 {
chars.next();
}
ret
}
#[test]
fn test_cow() {
let json_str = JsonStr::from_str_unchecked("abc\ndef\\n\\u0065");
let cow: Cow<'_, str> = json_str.into();
match cow {
Cow::Owned(_) => (),
Cow::Borrowed(_) => panic!("cow should not be borrowed"),
}
assert_eq!(cow, "abc\ndef\ne");
}
#[test]
fn test_surrogate_pair() {
let json_str = JsonStr::from_str_unchecked("\\uD801\\uDC37");
let s: String = json_str.into();
assert_eq!(s, "𐐷");
assert_eq!(s, "\u{10437}");
}
#[test]
fn test_non_surrogate() {
let json_str = JsonStr::from_str_unchecked("\\u0433\\u0434");
let s: String = json_str.into();
assert_eq!(s, "гд");
assert_eq!(s, "\u{0433}\u{0434}");
}
#[test]
fn test_simple_escapes() {
let escaped = [
"\\\"",
"\\\\",
"\\/",
"\\b",
"\\f",
"\\n",
"\\r",
"\\t",
].join("");
let expected = [
"\"",
"\\",
"/",
"\u{8}",
"\u{c}",
"\n",
"\r",
"\t",
].join("");
let unescaped: String = JsonStr::from_str_unchecked(&escaped).into();
assert_eq!(unescaped, expected);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment