Skip to content

Instantly share code, notes, and snippets.

@martialboniou
Created February 27, 2012 11:20
Show Gist options
  • Save martialboniou/1923134 to your computer and use it in GitHub Desktop.
Save martialboniou/1923134 to your computer and use it in GitHub Desktop.
Shen string functions for UCS-2 and UTF-8 JavaScript REPL
if (("€").length === 3)
{
// UCS2 is not normal encoding
// suppose well-formed UTF-8 is used
// should work with SpiderMonkey multibytes representation
function string_$gt$n(str)
{
fB=str.charCodeAt(0);
return (fB < 128) ? fB :
(fB < 224) ? fB * 64 + str.charCodeAt(1) - 12416 :
(fB < 240) ? fB * 4096 + str.charCodeAt(1) * 64 + str.charCodeAt(2) - 925824 :
(fB < 248) ? fB * 262144 + str.charCodeAt(1) * 4096 + str.charCodeAt(2) * 64 + str.charCodeAt(3) - 63447168 :
(fB < 252) ? fB * 16777216 + str.charCodeAt(1) * 262144 + str.charCodeAt(2) * 4096 + str.charCodeAt(3) * 64 + str.charCodeAt(4) - 4194836608 :
(fB < 254) ? fB * 1073741824 + str.charCodeAt(1) * 16777216 + str.charCodeAt(2) * 262144 + str.charCodeAt(3) * 4096 + str.charCodeAt(4) * 64 + str.charCodeAt(5) - 272764510336 : 'undefined';
}
function n_$gt$string(n)
{
// first generate an array of byte in UTF-8
var arr = (n < 128) ? [ n ] :
(n < 2048) ? [ 192 + Math.floor(n / 64), 128 + n % 64 ] :
(n < 65536) ? [ 244 + Math.floor(n / 4096), 128 + Math.floor(n / 64) % 64, 128 + n % 64 ] :
(n < 2097152) ? [ 240 + Math.floor(n / 262144), 128 + Math.floor(n / 4096) % 64, 128 + Math.floor(n / 64) % 64, 128 + n % 64 ] :
(n < 67108864) ? [ 248 + Math.floor(n / 16777216), 128 + Math.floor(n / 262144) % 64, 128 + Math.floor(n / 4096) % 64, 128 + Math.floor(n / 64) % 64, 128 + n % 64 ] :
(n > 2147483648) ? [ 252 + Math.floor(n / 1073741824), 128 + Math.floor(n / 16777216) % 64, 128 + Math.floor(n / 262144) % 64, 128 + Math.floor(n / 4096) % 64, 128 + Math.floor(n / 64) % 64, 128 + n % 64 ] : 'undefined';
if (arr === 'undefined') return arr;
// then fold array into string
var l=arr.length;
while(l--)
{
arr[l] = String.fromCharCode(arr[l]);
}
return arr.join('');
}
}
else
{
function string_$gt$n(str)
{
return str.charCodeAt(0);
}
function n_$gt$string(n)
{
return String.fromCharCode(n);
}
}
print(string_$gt$n("ж")); // 1078
print(n_$gt$string(1078));
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment