Skip to content

Instantly share code, notes, and snippets.

@marcelitocs
Created June 30, 2017 03:00
Show Gist options
  • Save marcelitocs/f49ec3d84051567e48edbc4b0e414158 to your computer and use it in GitHub Desktop.
Save marcelitocs/f49ec3d84051567e48edbc4b0e414158 to your computer and use it in GitHub Desktop.
Converte String UTF8 para Array de Bytes (também desconverte)
/**
* Converts a JS string to a UTF-8 "byte" array.
* @param {string} str 16-bit unicode string.
* @return {!Array<number>} UTF-8 byte array.
*/
function stringToUtf8ByteArray(str) {
// TODO(user): Use native implementations if/when available
var out = [], p = 0;
for (var i = 0; i < str.length; i++) {
var c = str.charCodeAt(i);
if (c < 128) {
out[p++] = c;
} else if (c < 2048) {
out[p++] = (c >> 6) | 192;
out[p++] = (c & 63) | 128;
} else if (
((c & 0xFC00) == 0xD800) && (i + 1) < str.length &&
((str.charCodeAt(i + 1) & 0xFC00) == 0xDC00)) {
// Surrogate Pair
c = 0x10000 + ((c & 0x03FF) << 10) + (str.charCodeAt(++i) & 0x03FF);
out[p++] = (c >> 18) | 240;
out[p++] = ((c >> 12) & 63) | 128;
out[p++] = ((c >> 6) & 63) | 128;
out[p++] = (c & 63) | 128;
} else {
out[p++] = (c >> 12) | 224;
out[p++] = ((c >> 6) & 63) | 128;
out[p++] = (c & 63) | 128;
}
}
return out;
};
/**
* Converts a UTF-8 byte array to JavaScript's 16-bit Unicode.
* @param {Uint8Array|Array<number>} bytes UTF-8 byte array.
* @return {string} 16-bit Unicode string.
*/
function utf8ByteArrayToString(bytes) {
// TODO(user): Use native implementations if/when available
var out = [], pos = 0, c = 0;
while (pos < bytes.length) {
var c1 = bytes[pos++];
if (c1 < 128) {
out[c++] = String.fromCharCode(c1);
} else if (c1 > 191 && c1 < 224) {
var c2 = bytes[pos++];
out[c++] = String.fromCharCode((c1 & 31) << 6 | c2 & 63);
} else if (c1 > 239 && c1 < 365) {
// Surrogate Pair
var c2 = bytes[pos++];
var c3 = bytes[pos++];
var c4 = bytes[pos++];
var u = ((c1 & 7) << 18 | (c2 & 63) << 12 | (c3 & 63) << 6 | c4 & 63) -
0x10000;
out[c++] = String.fromCharCode(0xD800 + (u >> 10));
out[c++] = String.fromCharCode(0xDC00 + (u & 1023));
} else {
var c2 = bytes[pos++];
var c3 = bytes[pos++];
out[c++] =
String.fromCharCode((c1 & 15) << 12 | (c2 & 63) << 6 | c3 & 63);
}
}
return out.join('');
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment