Skip to content

Instantly share code, notes, and snippets.

@Electroid
Last active January 4, 2024 01:53
Show Gist options
  • Save Electroid/cdd5d673106342234ff5653e728f1c09 to your computer and use it in GitHub Desktop.
Save Electroid/cdd5d673106342234ff5653e728f1c09 to your computer and use it in GitHub Desktop.
The RegExp challenge
// Benchmark #1 - `isbot` tests
//
// Relevant issues:
// - https://github.com/oven-sh/bun/issues/5197
// Relevant credit:
// - https://github.com/strager for writing this benchmark
// - https://www.npmjs.com/package/isbot for `isbot`
import { run, bench } from "mitata";
import { gunzipSync } from "node:zlib";
const response = await fetch(
"https://github.com/quick-lint/bun-vs-node/raw/17934db/bun-regexp-bug/uas.txt.gz",
);
const compressedBody = await response.arrayBuffer();
const body = gunzipSync(Buffer.from(compressedBody)).toString("utf8");
const userAgents = body.split("\n");
const regExp =
/ daum[ /]| deusu\/| yadirectfetcher|(?:^| )site|(?:^|[^g])news|@[a-z]|\(at\)[a-z]|\(github\.com\/|\[at\][a-z]|^12345|^<|^[\w \.\-\(\)]+(\/v?\d+(\.\d+)?(\.\d{1,10})?)?$|^[^ ]{50,}$|^active|^ad muncher|^amaya|^anglesharp\/|^anonymous|^avsdevicesdk\/|^axios\/|^bidtellect\/|^biglotron|^btwebclient\/|^castro|^clamav[ /]|^client\/|^cobweb\/|^coccoc|^custom|^ddg[_-]android|^discourse|^dispatch\/\d|^downcast\/|^duckduckgo|^facebook|^fdm[ /]\d|^getright\/|^gozilla\/|^hatena|^hobbit|^hotzonu|^hwcdn\/|^jeode\/|^jetty\/|^jigsaw|^linkdex|^lwp[-: ]|^metauri|^microsoft bits|^movabletype|^mozilla\/\d\.\d \(compatible;?\)$|^mozilla\/\d\.\d \w*$|^navermailapp|^netsurf|^offline explorer|^php|^postman|^postrank|^python|^read|^reed|^restsharp\/|^snapchat|^space bison|^svn|^swcd |^taringa|^test certificate info|^thumbor\/|^tumblr\/|^user-agent:mozilla|^valid|^venus\/fedoraplanet|^w3c|^webbandit\/|^webcopier|^wget|^whatsapp|^xenu link sleuth|^yahoo|^yandex|^zdm\/\d|^zoom marketplace\/|^{{.*}}$|adbeat\.com|appinsights|archive|ask jeeves\/teoma|bit\.ly\/|bluecoat drtr|(?<! cu)bot|browsex|burpcollaborator|capture|catch|check|chrome-lighthouse|chromeframe|cloud|crawl|cryptoapi|dareboost|datanyze|dataprovider|dejaclick|dmbrowser|download|evc-batch\/|feed|firephp|freesafeip|ghost|gomezagent|(?<! (?:channel\/|google\/))google(?!(app|\/google| pixel))|headlesschrome\/|(?<!(?:lib))http|httrack|hubspot marketing grader|hydra|ibisbrowser|images|iplabel|ips-agent|java(?!;)|library|mail\.ru\/|manager|monitor|morningscore\/|neustar wpm|nutch|offbyone|optimize|pageburst|pagespeed|perl|phantom|pingdom|powermarks|preview|proxy|ptst[ /]\d|reader|rexx;|rigor|rss|scan|scrape|(?<! ya(?:yandex)?)search|serp ?reputation ?management|server|sogou|sparkler\/|speedcurve|spider|splash|statuscake|stumbleupon\.com|supercleaner|synapse|synthetic|taginspector\/|torrent|tracemyfile|transcoder|trendsmapresolver|twingly recon|url|virtuoso|wappalyzer|webglance|webkit2png|websitemetadataretriever|whatcms\/|wordpress|zgrab/;
bench("isbot", () => {
let count = 0;
for (const userAgent of userAgents) {
count += regExp.test(userAgent);
}
});
await run();
// Benchmark #2 - Markdown parser
//
// Relevant issues:
// - https://github.com/markedjs/marked/issues/2863
// - https://github.com/oven-sh/bun/issues/3464
import { run, bench } from "mitata";
import { marked } from "marked";
const response = await fetch(
"https://github.com/markedjs/marked/files/11911404/test.md",
);
const data = await response.text();
bench("marked (sync)", () => {
marked(data, (error, result) => {
if (error) throw error;
console.log(result.length);
});
});
bench("marked (async)", async () => {
await marked(data, {
async: true,
});
});
await run();
// Benchmark #3 - Capture groups
//
// Relevant issues:
// - https://github.com/oven-sh/bun/issues/3464#issuecomment-1774043531
// Relevant credit:
// - https://github.com/yschroe for writing this benchmark
import { run, bench } from "mitata";
const loremIpsum =
"Lorem ipsum dolor sit amet, consectetur adipisici elit, sed eiusmod tempor incidunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquid ex ea commodi consequat. Quis aute iure reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint obcaecat cupiditat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.";
const regExps = [
/A{1,3}B{1,3}/g,
/A{1,3}(B){1,3}/g,
/(A)(B){1,3}/g,
/(A){1,3}(B){1,3}/g,
];
bench("capture groups", () => {
let count = 0;
for (const regExp of regExps) {
count += loremIpsum.match(regExp)?.length;
}
});
await run();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment