Skip to content

Instantly share code, notes, and snippets.

@migerh
Created September 26, 2013 09:58
Show Gist options
  • Save migerh/6712140 to your computer and use it in GitHub Desktop.
Save migerh/6712140 to your computer and use it in GitHub Desktop.
Benchmark three different versions of utf8 decode(). See https://github.com/jsxgraph/jsxgraph/issues/50 for details. newerDecode() is a translation from C99 of http://bjoern.hoehrmann.de/utf-8/decoder/dfa/.
<!DOCTYPE html>
<html>
<head>
<title></title>
<script type="text/javascript" src="http://jsxgraph.uni-bayreuth.de/~michael/jsxgui/Examples/libs/benchmark.js"></script>
</head>
<body>
<div id="output"></div>
<script type="text/javascript">
var counter = 0;
// constants
var UTF8_ACCEPT = 0,
UTF8_REJECT = 12,
UTF8D = [
// The first part of the table maps bytes to character classes that
// to reduce the size of the transition table and create bitmasks.
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
10, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 11, 6, 6, 6, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
// The second part is a transition table that maps a combination
// of a state of the automaton and a character class to a state.
0, 12, 24, 36, 60, 96, 84, 12, 12, 12, 48, 72, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
12, 0, 12, 12, 12, 12, 12, 0, 12, 0, 12, 12, 12, 24, 12, 12, 12, 12, 12, 24, 12, 24, 12, 12,
12, 12, 12, 12, 12, 12, 12, 24, 12, 12, 12, 12, 12, 24, 12, 12, 12, 12, 12, 12, 12, 24, 12, 12,
12, 12, 12, 12, 12, 12, 12, 36, 12, 36, 12, 12, 12, 36, 12, 12, 12, 12, 12, 36, 12, 36, 12, 12,
12, 36, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12
];
String.prototype.repeat = function(num) {
return new Array(isNaN(num)? 1 : ++num).join(this);
}
var output = document.getElementById('output');
//var shortStr = '\xe8\x87\xaa\xe8\x8d\x8a\xe6\xb9\x96\xe4\xbb\xa5\xe5\x8c\x97';
var shortStr = '\xf0\x9f\x99\x8a';
var str = shortStr.repeat(100);
var suiteDecode = new Benchmark.Suite();
var output = document.getElementById('output');
output.innerHTML = 'Input length: ' + str.length + '<br />';
var newerDecode = function (utftext) {
var i, byte, type, char,
codep = 0,
state = UTF8_ACCEPT,
string = [],
len = utftext.length;
for (i = 0; i < len; i++) {
byte = utftext.charCodeAt(i);
type = UTF8D[byte];
if (state !== UTF8_ACCEPT) {
codep = (byte & 0x3f) | (codep << 6);
} else {
codep = (0xff >> type) & byte;
}
state = UTF8D[256 + state + type];
if (state === UTF8_ACCEPT) {
if (codep > 0xffff) {
string.push(String.fromCharCode(0xD7C0 + (codep >> 10)));
string.push(String.fromCharCode(0xDC00 + (codep & 0x3FF)));
} else {
string.push(String.fromCharCode(codep));
}
}
}
return string.join('');
};
var newDecode = function (utftext) {
return decodeURIComponent(escape(utftext));
};
var oldDecode = function (utftext) {
var string = [],
i = 0,
c = 0,
c2 = 0,
c3 = 0,
len = utftext.length;
while (i < len) {
c = utftext.charCodeAt(i);
if (c < 128) {
string.push(String.fromCharCode(c));
i++;
} else if ((c > 191) && (c < 224)) {
c2 = utftext.charCodeAt(i + 1);
string.push(String.fromCharCode(((c & 31) << 6) | (c2 & 63)));
i += 2;
} else {
c2 = utftext.charCodeAt(i + 1);
c3 = utftext.charCodeAt(i + 2);
string.push(String.fromCharCode(((c & 15) << 12) | ((c2 & 63) << 6) | (c3 & 63)));
i += 3;
}
}
return string.join('');
}
output.innerHTML += 'Run benchmark suites...<br />';
suiteDecode.add('Decode#Old', function () {
oldDecode(str);
})
.add('Decode#New', function () {
newDecode(str);
}) // add listeners
.add('Decode#Newer', function () {
newerDecode(str);
}) // add listeners
.on('cycle', function(event) {
output.innerHTML += (String(event.target)) + '<br />';
})
.on('complete', function() {
output.innerHTML += ('Fastest is ' + this.filter('fastest').pluck('name')) + '<br />';
output.innerHTML += 'Old: ' + oldDecode(str) + ' / Length old: ' + oldDecode(str).length + '<br />';
output.innerHTML += 'New: ' + newDecode(str) + ' / Length new: ' + newDecode(str).length + '<br />';
output.innerHTML += 'Newer: ' + newerDecode(str) + ' / Length newer: ' + newerDecode(str).length;
})
// run async
.run({ 'async': true });
</script>
</body>
</html>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment