Skip to content

Instantly share code, notes, and snippets.

@Kreijstal
Created July 21, 2014 05:45
Show Gist options
  • Save Kreijstal/115c2fda07e59f712858 to your computer and use it in GitHub Desktop.
Save Kreijstal/115c2fda07e59f712858 to your computer and use it in GitHub Desktop.
function UnicodeToUTF8Array(UnicodeValue) {//Only 1 unicode value!!!!!! //Char Number Range as called from the RFC
//Converts Unicode number or character.toCharCodeAt() to an "UTF8 array"
//After this has been done, converting to UTF-8 is just trivial, just call UTF8ArrayToUTF8
//JavaScript cannot "convert" strings to UTF-8 so we're just using byte arrays to save the information in case we need to save it to a file
/*Q:So why converting from Unicode to UTF-8 takes two functions? How is that a good Idea?
A:Well, I just thought that it could be really nice for debugging purposes, also knowing what the hell the String is converted to before being converted to*/
var thearr = [];
if (UnicodeValue > 0x7FFFFFFF) {
throw new Error("Value cannot be greater than " + 0x7FFFFFFF);
/*You should remove this, if you don't like errors much, it will be annoying to enclose everything
with try{}, make something like:*/
//return [];//then you check if the array is empty, yeah, that will be faster..
//Or you can simply.. don't change this function but check whatever parameter you're going to call this function with
//Stop reading comments, The code is obvious
}
if ((UnicodeValue >>> 7) > 0) { //For some reason after 0x7FFFFFFF if you used the '>>' operator it would convert it to a negative number but with '>>>' operator it doesn't work that way
while (UnicodeValue) { //I also made my own '>>>' operator which is "parseInt(Unicode.toString(2).substring(7),2)" which works after 0xFFFFFFFF but stops working around 0xFFFFFFFFFFFFF and is more likely hell slowler
thearr.push(UnicodeValue & 63); //Gotta love parseInt(int,2) helped me making this function. Aside UTF-8 wikipedia article
UnicodeValue = UnicodeValue >>> 6; //And (int).toString(2) too
}
} else {
thearr.push(UnicodeValue)
}
return thearr;//This is the char number range NOT the UTF-8 use UTF8ArrayToUTF8 to get the UTF8 byte array..
}
//Changes Argument
function UTF8ArrayToUTF8(UTF8Array) { //Only 1 character
//Alright, so basically we convert the array from the previous function to UTF8 byte array
//Obvious code is obvious, even this uglified&minified should be easy to understand
var leng = UTF8Array.length, //Pros:more performance; Cons:more memory usage. Not that anyone cares or something. Or, the fact that I need the original length (Char Number Range)
z,a,
utf8bytearray = []; //all the bytes will be here
if (leng && leng < 2) { //basically leng===1
if ((-1 < UTF8Array[0]) && (UTF8Array[0] < 126)) { //Too paranoic?
return UTF8Array; //Do nothing, only happens with ASCII values
} else { //Too restrictive?
throw new Error("Incorrect value, the array provided doesn't have a correct UTF8 value");
}
} else if (leng < 6) { //On another imaginary format it can be longer than 6, but this is UTF-8 (At least the old version which accepted 6 bytes..)
a = (252 << 6 - leng) & 255; //I don't even remember what I'm doing here, and I just wrote it
utf8bytearray.push(a | UTF8Array.pop());
while ((z = UTF8Array.pop()) !== undefined) {
if (z > 63) { //obvious code is... not so obvious.. try anyway
throw new Error("Incorrect value, the array provided doesn't have a correct UTF8 value");
}
utf8bytearray.push(128 | z);
}
return utf8bytearray; //Done!
} else {
throw new Error("UTF-8 Array cannot have more than 6 values");
}
}
function stringToUint8utf8array(string){//Fuck old browsers
function c(x){var i,a=[];for(i=0;i<x.length;i++)a=a.concat(UTF8ArrayToUTF8(UnicodeToUTF8Array(x.charCodeAt(i))));return a};
var a=c(string),ui8arr=new Uint8Array(a.length);
for(var i=0,l=a.length;i<l;i++){
ui8arr[i]=a[i];
}
return ui8arr;
}
//USO! :D
stringToUint8utf8array("こんにちは");//Esto devuelve el valor de konnichiwa xd en UTF-8 en un array (de bytes)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment