Skip to content

Instantly share code, notes, and snippets.

@salvoravida
Created January 19, 2022 10:13
Show Gist options
  • Save salvoravida/e0d6cb47b87e34f854b6807b76005528 to your computer and use it in GitHub Desktop.
Save salvoravida/e0d6cb47b87e34f854b6807b76005528 to your computer and use it in GitHub Desktop.
unescape unicode and utf-8 hex chars
/**
* \xe2\x98\x89 Mercury -> ☉ Mercury
**/
function unescapeUtf8Hex(s: string): string {
if (typeof s !== 'string') return s;
return s.replace(/(?:\\x[\da-fA-F]{2})+/g, (m) => decodeURIComponent(m.replace(/\\x/g, '%')));
}
/**
* \\\\U0001F528 hello \\U0001F528 hello -> 🔨 hello 🔨 hello
**/
function unescapeUnicode(s: string): string {
if (typeof s !== 'string') return s;
return s.replace(/\\\\u[0-9A-Fa-f]*|\\u[0-9A-Fa-f]*/gi, (s) => {
const escaped = s.replace(/\\\\u/i, '\\u');
const codePoint = escaped.replace(/\\u/i, '0x');
try {
return String.fromCodePoint(Number(codePoint));
} catch (e) {
return s;
}
});
}
export function unescapeAll(s: string): string {
return unescapeUnicode(unescapeUtf8Hex(s));
}
@salvoravida
Copy link
Author

describe('unescapeUnicode', () => {
   test('should handle doubled escaped and escaped unicode code points', () => {
      const s = '\\\\U0001F528 hello \\U0001F528 hello - \\xe2\\x98\\x89 Mercury';

      expect(unescapeAll(s)).toBe('🔨 hello 🔨 hello - ☉ Mercury');
   });

   test('should handle invalid escaped and escaped unicode code points', () => {
      const s = '\\\\Ue201F528 hello \\Ue201F528 hello';

      expect(unescapeAll(s)).toBe(s);
   });

   test('should handle invalid strings', () => {
      // @ts-ignore
      expect(unescapeAll(2)).toBe(2);
   });
});

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment