Skip to content

Instantly share code, notes, and snippets.

@phuedx
Last active December 23, 2015 12:08
Show Gist options
  • Save phuedx/2c611a651eb2ed501b6b to your computer and use it in GitHub Desktop.
Save phuedx/2c611a651eb2ed501b6b to your computer and use it in GitHub Desktop.
The sizes of Parsoid- and non-Parsoid-generated (very nearly almost all Cite-generated) element IDs on https://en.wikipedia.org/api/rest_v1/page/html/Barack_Obama.
// $ curl -so /dev/null https://en.wikipedia.org/api/rest_v1/page/html/Barack_Obama -w '%{size_download}'
var TOTAL_BYTES = 1582727;
function total_bytes_for_ids(ids) {
return ids.reduce(
// 5 additional bytes for the "id=''" string
(acc, id) => acc + id.length + 5,
0
);
}
var ids = Array.from(document.querySelectorAll('[id]')).map(el => el.id);
var ids_bytes = total_bytes_for_ids(ids);
console.log(ids_bytes);
var non_parsoid_ids = ids.filter(
// Parsoid IDs are prefixed with "mw" but some reference IDs are prefixed with "mw-".
id => (id[0] !== 'm' && mw[1] !== 'w') || (id[0] === 'm' && id[1] === 'w' && id[2] === '-')
);
var non_parsoid_ids_bytes = total_bytes_for_ids(non_parsoid_ids);
console.log(non_parsoid_ids_bytes);
console.log(ids_bytes / TOTAL_BYTES);
console.log(non_parsoid_ids_bytes / TOTAL_BYTES);
88845
34107
0.05613412799554187
0.021549515488141668
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment