Created
September 29, 2023 22:23
-
-
Save lemire/e5f9a63680b10687f216d4c43d3d84e7 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"use strict"; | |
import { bench, run } from "mitata"; | |
import { existsSync, createWriteStream, readFileSync, mkdirSync } from "node:fs"; | |
import path from "node:path"; | |
import axios from "axios"; | |
const fixturesFolderPath = new URL('fixtures', import.meta.url).pathname; | |
const urls = [ | |
"https://github.com/ada-url/url-various-datasets/blob/main/files/isaacs_files.txt", | |
]; | |
function get_filename(url) { | |
return `fixtures/${path.basename(url)}`; | |
} | |
async function downloadFile(url) { | |
const response = await axios({ | |
method: "GET", | |
url: url, | |
responseType: "stream", | |
}); | |
response.data.pipe(createWriteStream(get_filename(url))); | |
return new Promise((resolve, reject) => { | |
response.data.on("end", () => { | |
resolve(); | |
}); | |
response.data.on("error", (err) => { | |
reject(err); | |
}); | |
}); | |
} | |
if (!existsSync(fixturesFolderPath)) { | |
mkdirSync(fixturesFolderPath) | |
} | |
const urls_for_download = urls.filter(url => !existsSync(get_filename(url))) | |
const all_promises = urls_for_download.map((url) => downloadFile(url)); | |
await axios.all(all_promises); | |
let length = 0; | |
let bad_url = 0; | |
let good_url = 0; | |
for (let url of urls) { | |
const filename = get_filename(url); | |
const file_content = readFileSync(filename, "utf-8"); | |
const lines = file_content.split("\n"); | |
bench(filename, () => { | |
for (let i = 0; i < lines.length; i++) { | |
if(URL.canParse(lines[i])) { | |
length += new URL(lines[i]).href.length; | |
good_url++; | |
} else { | |
bad_url++; | |
} | |
} | |
return length; | |
}); | |
} | |
await run(); | |
console.info("Average URL size: " + Math.round(length / good_url) + " bytes"); | |
console.info( | |
"Ratio of bad URLs:", | |
`${Math.round((bad_url / (good_url + bad_url)) * 10000) / 100}%`, | |
); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment