Skip to content

Instantly share code, notes, and snippets.

@19317362
Forked from fwolf/gb2312-utf8-convert.js
Created January 12, 2016 06:20
Show Gist options
  • Save 19317362/a1d8e40bfb6587463d4d to your computer and use it in GitHub Desktop.
Save 19317362/a1d8e40bfb6587463d4d to your computer and use it in GitHub Desktop.
Convert between gb2312 and utf8 using javascript
http://freecode-freecode.blogspot.com/2008/11/how-to-gbkgb2312-and-utf-8-encoding.html
GB2312UTF8 = {
Dig2Dec : function(s){
var retV = 0;
if(s.length == 4){
for(var i = 0; i < 4; i ++){
retV += eval(s.charAt(i)) * Math.pow(2, 3 - i);
}
return retV;
}
return -1;
} ,
Hex2Utf8 : function(s){
var retS = "";
var tempS = "";
var ss = "";
if(s.length == 16){
tempS = "1110" + s.substring(0, 4);
tempS += "10" + s.substring(4, 10);
tempS += "10" + s.substring(10,16);
var sss = "0123456789ABCDEF";
for(var i = 0; i < 3; i ++){
retS += "%";
ss = tempS.substring(i * 8, (eval(i)+1)*8);
retS += sss.charAt(this.Dig2Dec(ss.substring(0,4)));
retS += sss.charAt(this.Dig2Dec(ss.substring(4,8)));
}
return retS;
}
return "";
} ,
Dec2Dig : function(n1){
var s = "";
var n2 = 0;
for(var i = 0; i < 4; i++){
n2 = Math.pow(2,3 - i);
if(n1 >= n2){
s += '1';
n1 = n1 - n2;
}
else
s += '0';
}
return s;
},
Str2Hex : function(s){
var c = "";
var n;
var ss = "0123456789ABCDEF";
var digS = "";
for(var i = 0; i < s.length; i ++){
c = s.charAt(i);
n = ss.indexOf(c);
digS += this.Dec2Dig(eval(n));
}
return digS;
},
GB2312ToUTF8 : function(s1){
var s = escape(s1);
var sa = s.split("%");
var retV ="";
if(sa[0] != ""){
retV = sa[0];
}
for(var i = 1; i < sa.length; i ++){
if(sa[i].substring(0,1) == "u"){
//alert(this.Str2Hex(sa[i].substring(1,5)));
retV += this.Hex2Utf8(this.Str2Hex(sa[i].substring(1,5)));
if(sa[i].length){
retV += sa[i].substring(5);
}
}
else{
retV += unescape("%" + sa[i]);
if(sa[i].length){
retV += sa[i].substring(5);
}
}
}
return retV;
},
UTF8ToGB2312 : function(str1){
var substr = "";
var a = "";
var b = "";
var c = "";
var i = -1;
i = str1.indexOf("%");
if(i==-1){
return str1;
}
while(i!= -1){
if(i<3){
substr = substr + str1.substr(0,i-1);
str1 = str1.substr(i+1,str1.length-i);
a = str1.substr(0,2);
str1 = str1.substr(2,str1.length - 2);
if(parseInt("0x" + a) & 0x80 == 0){
substr = substr + String.fromCharCode(parseInt("0x" + a));
}
else if(parseInt("0x" + a) & 0xE0 == 0xC0){ //two byte
b = str1.substr(1,2);
str1 = str1.substr(3,str1.length - 3);
var widechar = (parseInt("0x" + a) & 0x1F) << 6;
widechar = widechar | (parseInt("0x" + b) & 0x3F);
substr = substr + String.fromCharCode(widechar);
}
else{
b = str1.substr(1,2);
str1 = str1.substr(3,str1.length - 3);
c = str1.substr(1,2);
str1 = str1.substr(3,str1.length - 3);
var widechar = (parseInt("0x" + a) & 0x0F) << 12;
widechar = widechar | ((parseInt("0x" + b) & 0x3F) << 6);
widechar = widechar | (parseInt("0x" + c) & 0x3F);
substr = substr + String.fromCharCode(widechar);
}
}
else {
substr = substr + str1.substring(0,i);
str1= str1.substring(i);
}
i = str1.indexOf("%");
}
return substr+str1;
}
};
test:
GBK => UTF8:
var utf8 = GB2312UTF8.GB2312ToUTF8("中文GB2312");
UTF8 => GBK:
GB2312UTF8.UTF8ToGB2312(utf8);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment