Skip to content

Instantly share code, notes, and snippets.

@hasinhayder
Forked from joni/toUTF8Array.js
Created April 11, 2021 19:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hasinhayder/3ac5ceb98c2bfcaa11fd16cb1049450c to your computer and use it in GitHub Desktop.
Save hasinhayder/3ac5ceb98c2bfcaa11fd16cb1049450c to your computer and use it in GitHub Desktop.
toUTF8Array: Javascript function for encoding a string in UTF8.
function toUTF8Array(str) {
var utf8 = [];
for (var i=0; i < str.length; i++) {
var charcode = str.charCodeAt(i);
if (charcode < 0x80) utf8.push(charcode);
else if (charcode < 0x800) {
utf8.push(0xc0 | (charcode >> 6),
0x80 | (charcode & 0x3f));
}
else if (charcode < 0xd800 || charcode >= 0xe000) {
utf8.push(0xe0 | (charcode >> 12),
0x80 | ((charcode>>6) & 0x3f),
0x80 | (charcode & 0x3f));
}
// surrogate pair
else {
i++;
// UTF-16 encodes 0x10000-0x10FFFF by
// subtracting 0x10000 and splitting the
// 20 bits of 0x0-0xFFFFF into two halves
charcode = 0x10000 + (((charcode & 0x3ff)<<10)
| (str.charCodeAt(i) & 0x3ff))
utf8.push(0xf0 | (charcode >>18),
0x80 | ((charcode>>12) & 0x3f),
0x80 | ((charcode>>6) & 0x3f),
0x80 | (charcode & 0x3f));
}
}
return utf8;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment