Created
July 21, 2014 05:45
-
-
Save Kreijstal/115c2fda07e59f712858 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function UnicodeToUTF8Array(UnicodeValue) {//Only 1 unicode value!!!!!! //Char Number Range as called from the RFC | |
//Converts Unicode number or character.toCharCodeAt() to an "UTF8 array" | |
//After this has been done, converting to UTF-8 is just trivial, just call UTF8ArrayToUTF8 | |
//JavaScript cannot "convert" strings to UTF-8 so we're just using byte arrays to save the information in case we need to save it to a file | |
/*Q:So why converting from Unicode to UTF-8 takes two functions? How is that a good Idea? | |
A:Well, I just thought that it could be really nice for debugging purposes, also knowing what the hell the String is converted to before being converted to*/ | |
var thearr = []; | |
if (UnicodeValue > 0x7FFFFFFF) { | |
throw new Error("Value cannot be greater than " + 0x7FFFFFFF); | |
/*You should remove this, if you don't like errors much, it will be annoying to enclose everything | |
with try{}, make something like:*/ | |
//return [];//then you check if the array is empty, yeah, that will be faster.. | |
//Or you can simply.. don't change this function but check whatever parameter you're going to call this function with | |
//Stop reading comments, The code is obvious | |
} | |
if ((UnicodeValue >>> 7) > 0) { //For some reason after 0x7FFFFFFF if you used the '>>' operator it would convert it to a negative number but with '>>>' operator it doesn't work that way | |
while (UnicodeValue) { //I also made my own '>>>' operator which is "parseInt(Unicode.toString(2).substring(7),2)" which works after 0xFFFFFFFF but stops working around 0xFFFFFFFFFFFFF and is more likely hell slowler | |
thearr.push(UnicodeValue & 63); //Gotta love parseInt(int,2) helped me making this function. Aside UTF-8 wikipedia article | |
UnicodeValue = UnicodeValue >>> 6; //And (int).toString(2) too | |
} | |
} else { | |
thearr.push(UnicodeValue) | |
} | |
return thearr;//This is the char number range NOT the UTF-8 use UTF8ArrayToUTF8 to get the UTF8 byte array.. | |
} | |
//Changes Argument | |
function UTF8ArrayToUTF8(UTF8Array) { //Only 1 character | |
//Alright, so basically we convert the array from the previous function to UTF8 byte array | |
//Obvious code is obvious, even this uglified&minified should be easy to understand | |
var leng = UTF8Array.length, //Pros:more performance; Cons:more memory usage. Not that anyone cares or something. Or, the fact that I need the original length (Char Number Range) | |
z,a, | |
utf8bytearray = []; //all the bytes will be here | |
if (leng && leng < 2) { //basically leng===1 | |
if ((-1 < UTF8Array[0]) && (UTF8Array[0] < 126)) { //Too paranoic? | |
return UTF8Array; //Do nothing, only happens with ASCII values | |
} else { //Too restrictive? | |
throw new Error("Incorrect value, the array provided doesn't have a correct UTF8 value"); | |
} | |
} else if (leng < 6) { //On another imaginary format it can be longer than 6, but this is UTF-8 (At least the old version which accepted 6 bytes..) | |
a = (252 << 6 - leng) & 255; //I don't even remember what I'm doing here, and I just wrote it | |
utf8bytearray.push(a | UTF8Array.pop()); | |
while ((z = UTF8Array.pop()) !== undefined) { | |
if (z > 63) { //obvious code is... not so obvious.. try anyway | |
throw new Error("Incorrect value, the array provided doesn't have a correct UTF8 value"); | |
} | |
utf8bytearray.push(128 | z); | |
} | |
return utf8bytearray; //Done! | |
} else { | |
throw new Error("UTF-8 Array cannot have more than 6 values"); | |
} | |
} | |
function stringToUint8utf8array(string){//Fuck old browsers | |
function c(x){var i,a=[];for(i=0;i<x.length;i++)a=a.concat(UTF8ArrayToUTF8(UnicodeToUTF8Array(x.charCodeAt(i))));return a}; | |
var a=c(string),ui8arr=new Uint8Array(a.length); | |
for(var i=0,l=a.length;i<l;i++){ | |
ui8arr[i]=a[i]; | |
} | |
return ui8arr; | |
} | |
//USO! :D | |
stringToUint8utf8array("こんにちは");//Esto devuelve el valor de konnichiwa xd en UTF-8 en un array (de bytes) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment