Created
June 13, 2012 10:10
-
-
Save volodymyr-mykhailyk/2923227 to your computer and use it in GitHub Desktop.
JS Unicode String to Byte Array
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function stringToByteArray(str) { | |
var b = [], i, unicode; | |
for(i = 0; i < str.length; i++) { | |
unicode = str.charCodeAt(i); | |
// 0x00000000 - 0x0000007f -> 0xxxxxxx | |
if (unicode <= 0x7f) { | |
b.push(String.fromCharCode(unicode)); | |
// 0x00000080 - 0x000007ff -> 110xxxxx 10xxxxxx | |
} else if (unicode <= 0x7ff) { | |
b.push(String.fromCharCode((unicode >> 6) | 0xc0)); | |
b.push(String.fromCharCode((unicode & 0x3F) | 0x80)); | |
// 0x00000800 - 0x0000ffff -> 1110xxxx 10xxxxxx 10xxxxxx | |
} else if (unicode <= 0xffff) { | |
b.push(String.fromCharCode((unicode >> 12) | 0xe0)); | |
b.push(String.fromCharCode(((unicode >> 6) & 0x3f) | 0x80)); | |
b.push(String.fromCharCode((unicode & 0x3f) | 0x80)); | |
// 0x00010000 - 0x001fffff -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx | |
} else { | |
b.push(String.fromCharCode((unicode >> 18) | 0xf0)); | |
b.push(String.fromCharCode(((unicode >> 12) & 0x3f) | 0x80)); | |
b.push(String.fromCharCode(((unicode >> 6) & 0x3f) | 0x80)); | |
b.push(String.fromCharCode((unicode & 0x3f) | 0x80)); | |
} | |
} | |
return b; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
the last else is incorrect, because unicode chars with index greater than 0xffff are encoded with two chars in JS. https://javascript.ru/String/charCodeAt