Instantly share code, notes, and snippets.

Embed
What would you like to do?
JS Unicode String to Byte Array
function stringToByteArray(str) {
var b = [], i, unicode;
for(i = 0; i < str.length; i++) {
unicode = str.charCodeAt(i);
// 0x00000000 - 0x0000007f -> 0xxxxxxx
if (unicode <= 0x7f) {
b.push(String.fromCharCode(unicode));
// 0x00000080 - 0x000007ff -> 110xxxxx 10xxxxxx
} else if (unicode <= 0x7ff) {
b.push(String.fromCharCode((unicode >> 6) | 0xc0));
b.push(String.fromCharCode((unicode & 0x3F) | 0x80));
// 0x00000800 - 0x0000ffff -> 1110xxxx 10xxxxxx 10xxxxxx
} else if (unicode <= 0xffff) {
b.push(String.fromCharCode((unicode >> 12) | 0xe0));
b.push(String.fromCharCode(((unicode >> 6) & 0x3f) | 0x80));
b.push(String.fromCharCode((unicode & 0x3f) | 0x80));
// 0x00010000 - 0x001fffff -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
} else {
b.push(String.fromCharCode((unicode >> 18) | 0xf0));
b.push(String.fromCharCode(((unicode >> 12) & 0x3f) | 0x80));
b.push(String.fromCharCode(((unicode >> 6) & 0x3f) | 0x80));
b.push(String.fromCharCode((unicode & 0x3f) | 0x80));
}
}
return b;
}
@savamura

This comment has been minimized.

savamura commented Jun 27, 2015

the last else is incorrect, because unicode chars with index greater than 0xffff are encoded with two chars in JS. https://javascript.ru/String/charCodeAt

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment