Skip to content

Instantly share code, notes, and snippets.

@jridgewell
Created March 25, 2021 08:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jridgewell/40f7a063110f9a5c0f13bf965cf17590 to your computer and use it in GitHub Desktop.
Save jridgewell/40f7a063110f9a5c0f13bf965cf17590 to your computer and use it in GitHub Desktop.
TextEncoder vs charCodeAt (https://jsbench.github.io/#40f7a063110f9a5c0f13bf965cf17590) #jsbench #jsperf
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8"/>
<title>TextEncoder vs charCodeAt</title>
<script src="https://cdnjs.cloudflare.com/ajax/libs/benchmark/1.0.0/benchmark.min.js"></script>
<script src="./suite.js"></script>
</head>
<body>
<h1>Open the console to view the results</h1>
<h2><code>cmd + alt + j</code> or <code>ctrl + alt + j</code></h2>
</body>
</html>
"use strict";
(function (factory) {
if (typeof Benchmark !== "undefined") {
factory(Benchmark);
} else {
factory(require("benchmark"));
}
})(function (Benchmark) {
var suite = new Benchmark.Suite;
Benchmark.prototype.setup = function () {
const shortAscii = 'test'.repeat(100);
const longAscii = 'test'.repeat(5000);
const encoder = new TextEncoder();
function encode(str) {
return encoder.encode(str);
}
const CONTINUE = 0b1000_0000;
const TWO_BYTE = 0b1100_0000;
const THREE_BYTE = 1110_0000;
const FOUR_BYTE = 0b1111_0000;
const MASK = 0b11_1111;
function toBytes(str) {
const bytes = new Uint8Array(length(str));
let index = 0;
for (let i = 0; i < str.length; i++) {
const c = str.charCodeAt(0);
if (c < 0x80) {
bytes[index++] = c;
continue;
}
if (c < 0x800) {
bytes[index++] = (c >> 6) | TWO_BYTE;
bytes[index++] = (c & MASK) | CONTINUE;
}
if (
(c & 0xfc00) == 0xd800 &&
i + 1 < str.length &&
(str.charCodeAt(i + 1) & 0xfc00) == 0xdc00
) {
// Surrogate Pair
c = 0x10000 + ((c & 0x03ff) << 10) + (str.charCodeAt(++i) & 0x03ff);
out[index++] = (c >> 18) | FOUR_BYTE;
out[index++] = ((c >> 12) & MASK) | CONTINUE;
out[index++] = ((c >> 6) & MASK) | CONTINUE;
out[index++] = (c & MASK) | CONTINUE;
continue;
}
out[index++] = (c >> 12) | THREE_BYTE;
out[index++] = ((c >> 6) & MASK) | CONTINUE;
out[index++] = (c & MASK) | CONTINUE;
continue;
}
}
function length(str) {
let length = 0;
for (let i = 0; i < str.length; i++) {
const c = str.charCodeAt(0);
if (c < 0x80) {
length += 1;
continue;
}
if (c < 0x800) {
length += 2;
continue;
}
if (
(c & 0xfc00) == 0xd800 &&
i + 1 < str.length &&
(str.charCodeAt(i + 1) & 0xfc00) == 0xdc00
) {
length += 4;
continue;
}
length += 3;
}
return length;
}
};
suite.add("TextEncoder (short ascii str)", function () {
// TextEncoder (short ascii str)
encode(shortAscii);
});
suite.add("charCodeAt (short ascii str)", function () {
// charCodeAt (short ascii str)
toBytes(shortAscii);
});
suite.add("TextEncoder (short ascii str)", function () {
// TextEncoder (short ascii str)
encode(longAscii);
});
suite.add("charCodeAt (short ascii str)", function () {
// charCodeAt (short ascii str)
toBytes(longAscii);
});
suite.on("cycle", function (evt) {
console.log(" - " + evt.target);
});
suite.on("complete", function (evt) {
console.log(new Array(30).join("-"));
var results = evt.currentTarget.sort(function (a, b) {
return b.hz - a.hz;
});
results.forEach(function (item) {
console.log((idx + 1) + ". " + item);
});
});
console.log("TextEncoder vs charCodeAt");
console.log(new Array(30).join("-"));
suite.run();
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment