Last active
December 28, 2022 23:56
-
-
Save yckart/4951636 to your computer and use it in GitHub Desktop.
Encode/Decode UTF8 with javascript. more at: https://gist.github.com/avenauche/4585741
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// http://kenany.me/blog/base64-js/ | |
var utf8 = { | |
encode: function(str) { return unescape( encodeURIComponent(str) ); }, | |
decode: function(str) { return decodeURIComponent( escape(str) ); } | |
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ | |
/* Utf8 class: encode / decode between multi-byte Unicode characters and UTF-8 multiple */ | |
/* single-byte character encoding (c) Chris Veness 2002-2011 */ | |
/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */ | |
/* changelog | |
* 2013-02-14: changed namespace to object literal | |
* 2010-09-29: fixed decode order of operation to avoid falsely recognising decoded string as 3-byte | |
* utf-8 charaacter | |
*/ | |
var UTF8 = { | |
/** | |
* Encode multi-byte Unicode string into utf-8 multiple single-byte characters | |
* (BMP / basic multilingual plane only) | |
* | |
* Chars in range U+0080 - U+07FF are encoded in 2 chars, U+0800 - U+FFFF in 3 chars | |
* | |
* @param {String} strUni Unicode string to be encoded as UTF-8 | |
* @returns {String} encoded string | |
*/ | |
encode: function (strUni) { | |
// use regular expressions & String.replace callback function for better efficiency | |
// than procedural approaches | |
var strUtf = strUni.replace( | |
/[\u0080-\u07ff]/g, // U+0080 - U+07FF => 2 bytes 110yyyyy, 10zzzzzz | |
function (c) { | |
var cc = c.charCodeAt(0); | |
return String.fromCharCode(0xc0 | cc >> 6, 0x80 | cc & 0x3f); | |
}); | |
strUtf = strUtf.replace( | |
/[\u0800-\uffff]/g, // U+0800 - U+FFFF => 3 bytes 1110xxxx, 10yyyyyy, 10zzzzzz | |
function (c) { | |
var cc = c.charCodeAt(0); | |
return String.fromCharCode(0xe0 | cc >> 12, 0x80 | cc >> 6 & 0x3F, 0x80 | cc & 0x3f); | |
}); | |
return strUtf; | |
}, | |
/** | |
* Decode utf-8 encoded string back into multi-byte Unicode characters | |
* | |
* @param {String} strUtf UTF-8 string to be decoded back to Unicode | |
* @returns {String} decoded string | |
*/ | |
decode: function (strUtf) { | |
// note: decode 3-byte chars first as decoded 2-byte strings could appear to be 3-byte char! | |
var strUni = strUtf.replace( | |
/[\u00e0-\u00ef][\u0080-\u00bf][\u0080-\u00bf]/g, // 3-byte chars | |
function (c) { // (note parentheses for precence) | |
var cc = ((c.charCodeAt(0) & 0x0f) << 12) | ((c.charCodeAt(1) & 0x3f) << 6) | (c.charCodeAt(2) & 0x3f); | |
return String.fromCharCode(cc); | |
}); | |
strUni = strUni.replace( | |
/[\u00c0-\u00df][\u0080-\u00bf]/g, // 2-byte chars | |
function (c) { // (note parentheses for precence) | |
var cc = (c.charCodeAt(0) & 0x1f) << 6 | c.charCodeAt(1) & 0x3f; | |
return String.fromCharCode(cc); | |
}); | |
return strUni; | |
} | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment