Skip to content

Instantly share code, notes, and snippets.

@kawanet
Created September 3, 2021 14:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kawanet/a66a0e2657464c57bcff2249286d3a24 to your computer and use it in GitHub Desktop.
Save kawanet/a66a0e2657464c57bcff2249286d3a24 to your computer and use it in GitHub Desktop.
Benchmarks for TextDecoder and TextEncoder
#!/usr/bin/env node
/**
* text-decoder-bench.js
* @copyright Kawanet
* @licence MIT
* @see https://gist.github.com/kawanet/a66a0e2657464c57bcff2249286d3a24
*/
const readString = (buffer, offset, length) => {
if (length === 0) {
return "";
} else if (length === 1) {
const c = buffer[offset];
if (c < 128)
return String.fromCharCode(c);
}
let index = offset | 0;
const end = offset + length;
let string = "";
while (index < end) {
const chunk = [];
const cend = Math.min(index + 256, end);
while (index < cend) {
const chr = buffer[index++];
if (chr < 128) { // 1 byte
chunk.push(chr);
} else if ((chr & 0xE0) === 0xC0) { // 2 bytes
chunk.push((chr & 0x1F) << 6 |
(buffer[index++] & 0x3F));
} else if ((chr & 0xF0) === 0xE0) { // 3 bytes
chunk.push((chr & 0x0F) << 12 |
(buffer[index++] & 0x3F) << 6 |
(buffer[index++] & 0x3F));
} else if ((chr & 0xF8) === 0xF0) { // 4 bytes
let code = (chr & 0x07) << 18 |
(buffer[index++] & 0x3F) << 12 |
(buffer[index++] & 0x3F) << 6 |
(buffer[index++] & 0x3F);
if (code < 0x010000) {
chunk.push(code);
} else { // surrogate pair
code -= 0x010000;
chunk.push((code >>> 10) + 0xD800, (code & 0x3FF) + 0xDC00);
}
}
}
string += String.fromCharCode.apply(String, chunk);
}
return string;
};
const {Suite} = require("benchmark");
const assert = require("assert").strict;
const SLEEP = msec => new Promise(resolve => setTimeout(resolve, msec));
async function main() {
const suite = new Suite();
suite.on("cycle", event => console.log(String(event.target)));
const chars = ["A", "A"];
const decoder = new TextDecoder();
const offset = 100;
for (const c of chars) {
for (let len = 1; len <= 65536; len *= 2) {
const string = c.repeat(len);
const data = Uint8Array.from(Buffer.concat([Buffer.alloc(offset), Buffer.from(string)]));
const bytes = Buffer.byteLength(string);
const fPureJS = () => readString(data, offset, bytes);
const fTextDecoder = () => decoder.decode(data.subarray(offset));
const fBuffer = () => {
const {buffer, byteOffset, byteLength} = data;
return Buffer.from(buffer, byteOffset + offset, byteLength - offset).toString();
};
assert.equal(fPureJS(), string, "readString");
assert.equal(fTextDecoder(), string, "TextDecoder");
assert.equal(fBuffer(), string, "Buffer");
suite.add(`readString\t${bytes}`, fPureJS);
suite.add(`TextDecoder\t${bytes}`, fTextDecoder);
suite.add(`Buffer\t${bytes}`, fBuffer);
}
}
await SLEEP(100);
suite.run({async: true});
}
main().catch(console.error);
#!/usr/bin/env node
/**
* text-encoder-bench.js
* @copyright Kawanet
* @licence MIT
* @see https://gist.github.com/kawanet/a66a0e2657464c57bcff2249286d3a24
*/
const writeString = (buffer, start, string) => {
let index = start;
const length = string.length;
let chr = 0;
let idx = 0;
while (idx < length) {
chr = string.charCodeAt(idx++);
if (chr < 128) {
buffer[index++] = chr;
} else if (chr < 0x800) {
// 2 bytes
buffer[index++] = 0xC0 | (chr >>> 6);
buffer[index++] = 0x80 | (chr & 0x3F);
} else if (chr < 0xD800 || chr > 0xDFFF) {
// 3 bytes
buffer[index++] = 0xE0 | (chr >>> 12);
buffer[index++] = 0x80 | ((chr >>> 6) & 0x3F);
buffer[index++] = 0x80 | (chr & 0x3F);
} else {
// 4 bytes - surrogate pair
chr = (((chr - 0xD800) << 10) | (string.charCodeAt(idx++) - 0xDC00)) + 0x10000;
buffer[index++] = 0xF0 | (chr >>> 18);
buffer[index++] = 0x80 | ((chr >>> 12) & 0x3F);
buffer[index++] = 0x80 | ((chr >>> 6) & 0x3F);
buffer[index++] = 0x80 | (chr & 0x3F);
}
}
return index - start;
};
const {Suite} = require("benchmark");
const assert = require("assert").strict;
const SLEEP = msec => new Promise(resolve => setTimeout(resolve, msec));
async function main() {
const suite = new Suite();
suite.on("cycle", event => console.log(String(event.target)));
const chars = ["A", "A"];
const encoder = new TextEncoder();
const offset = 100;
for (const c of chars) {
for (let len = 1; len <= 65536; len *= 2) {
const string = c.repeat(len);
const bytes = Buffer.byteLength(string);
const data1 = new Uint8Array(offset + bytes);
const data2 = new Uint8Array(offset + bytes);
const data3 = new Uint8Array(offset + bytes);
const fPureJS = () => writeString(data1, offset, string);
const fTextEncoder = () => encoder.encodeInto(string, data2.subarray(offset));
const fBuffer = () => {
const {buffer, byteOffset, byteLength} = data3;
return Buffer.from(buffer, byteOffset, byteLength).write(string, offset);
};
assert.equal(String(Buffer.from((fPureJS(), data1).subarray(offset))), string, "writeString");
assert.equal(String(Buffer.from((fTextEncoder(), data2).subarray(offset))), string, "TextEncoder");
assert.equal(String(Buffer.from((fBuffer(), data3).subarray(offset))), string, "Buffer");
suite.add(`writeString\t${bytes}`, fPureJS);
suite.add(`TextEncoder\t${bytes}`, fTextEncoder);
suite.add(`Buffer\t${bytes}`, fBuffer);
}
}
await SLEEP(100);
suite.run({async: true});
}
main().catch(console.error);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment