Skip to content

Instantly share code, notes, and snippets.

@neonexus
Last active September 25, 2019 20:06
Show Gist options
  • Save neonexus/376e01cb9e7477813ed556ab5974d508 to your computer and use it in GitHub Desktop.
Save neonexus/376e01cb9e7477813ed556ab5974d508 to your computer and use it in GitHub Desktop.
Encode / decode UTF8 array in JavaScript (original: https://gist.github.com/joni/3760795)
function fromUTF8Array(data) { // array of bytes
let str = '',
i;
for (i = 0; i < data.length; i++) {
let value = data[i];
if (value < 0x80) {
str += String.fromCharCode(value);
} else if (value > 0xBF && value < 0xE0) {
str += String.fromCharCode((value & 0x1F) << 6 | data[i + 1] & 0x3F);
i += 1;
} else if (value > 0xDF && value < 0xF0) {
str += String.fromCharCode((value & 0x0F) << 12 | (data[i + 1] & 0x3F) << 6 | data[i + 2] & 0x3F);
i += 2;
} else {
// surrogate pair
let charCode = ((value & 0x07) << 18 | (data[i + 1] & 0x3F) << 12 | (data[i + 2] & 0x3F) << 6 | data[i + 3] & 0x3F) - 0x010000;
str += String.fromCharCode(charCode >> 10 | 0xD800, charCode & 0x03FF | 0xDC00);
i += 3;
}
}
return str;
}
function toUTF8Array(str) {
let utf8 = [];
for (let i=0; i < str.length; i++) {
let charcode = str.charCodeAt(i);
if (charcode < 0x80) {
utf8.push(charcode);
} else if (charcode < 0x800) {
utf8.push(0xc0 | (charcode >> 6),
0x80 | (charcode & 0x3f));
} else if (charcode < 0xd800 || charcode >= 0xe000) {
utf8.push(0xe0 | (charcode >> 12),
0x80 | ((charcode>>6) & 0x3f),
0x80 | (charcode & 0x3f));
// surrogate pair
} else {
i++;
// UTF-16 encodes 0x10000-0x10FFFF by
// subtracting 0x10000 and splitting the
// 20 bits of 0x0-0xFFFFF into two halves
charcode = (((charcode & 0x3ff) << 10)
| (str.charCodeAt(i) & 0x3ff))
+ 0x010000;
utf8.push(0xf0 | (charcode >>18),
0x80 | ((charcode>>12) & 0x3f),
0x80 | ((charcode>>6) & 0x3f),
0x80 | (charcode & 0x3f));
}
}
return utf8;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment