Skip to content

Instantly share code, notes, and snippets.

@PaulCapron
Created Nov 5, 2020
Embed
What would you like to do?
Preview Wikipedia links of an HTML document (via the `title` attribute)
/**
* @file Preview Wikipedia links 📖🔗👀
*
* @see <https://en.wikipedia.org/api/rest_v1/>
*
* @version 2020-11
* @since 2018-08
* @author <https://paul.fragara.com/#me>
* @license CC0-1.0
* The author has dedicated all rights to this software to the public domain.
* This software is distributed without any warranty.
*/
"use strict";
/**
* Handle the few special cases in Wikipedia language/host names.
*
* @see <https://meta.wikimedia.org/wiki/List_of_Wikipedias#Nonstandard_language_codes>
* @see <https://www.ietf.org/rfc/bcp/bcp47.txt>
*
* @param {!string} languageCode (Human) language, in Wikipedia naming
* @return {!string} A corresponding BCP 47 language tag
*/
function bcp47FromWikipedia(languageCode) {
switch (languageCode.toLowerCase()) {
case "simple": return "en";
case "nrm": return "nrf";
case "bat-smg": return "sgs";
case "roa-rup": return "rup";
case "fiu-vro": return "vro";
case "zh-yue": return "yue";
case "zh-min-nan": return "nan";
case "zh-classical": return "lzh";
}
return languageCode;
}
/**
* Query Wikipedia to put a summary, in the ‘title’ attribute,
* of any HTMLAnchorElement pointing to one of its articles.
*
* @see <https://www.mediawiki.org/wiki/Specs/Summary/1.3.0>
* @see <https://en.wikipedia.org/api/rest_v1/#/Page%20content/get_page_summary__title_>
*
* @param {!Iterable<Element>=} elts Link elements, non-Wikipedia href get filtered
* @param {!Map<string, Promise<string>>=} memo (Previously recorded) fetched summaries
* @param {RegExp=} regexp Matches Wikipedia URLs, extracts language & article name (“slug”)
* @param {number=} reqMax Max API calls; Wikimedia asks for ≤ 200 requests per second
* @param {!RequestInit=} reqParams Fetch parameters
* @return {!Map<string, Promise<string>>} (Updated) fetched summaries
*/
function titleWikipediaAnchors(
elts = document.getElementsByTagName("a"),
memo = new Map,
regexp = /^https?:\/\/([a-z\-]{2,64})\.wikipedia\.org\/wiki\/([^?#]+)/i,
reqMax = 100,
reqParams = {
"headers": {
"Accept": "application/json; charset=utf-8; "
+ 'profile="https://www.mediawiki.org/wiki/Specs/Summary/1.4.2"',
},
"cache": "force-cache", // HTTP-expired data is totally OK
"credentials": "omit",
"mode": "cors",
// Wikimedia asks to “Set a unique User-Agent […] to contact you quickly.”
// By using CORS, the ‘Origin’ header points to us, the embedding site.
}
) {
for (const elt of elts) {
if (elt.title !== "") continue;
const parts = elt.href.match(regexp);
if (parts === null) continue;
const [ , lang, slug] = parts;
if (!elt.matches(`:lang(${bcp47FromWikipedia(lang)})`)) continue;
const url = `https://${lang}.wikipedia.org/api/rest_v1/page/summary/`
+ slug.replace(/\//g, "%2F");
if (!memo.has(url)) {
if (memo.size >= reqMax) continue;
memo.set(url, fetch(url, reqParams)
.then(resp => resp.json())
.then(json => (json.titles ? json.titles.normalized : slug)
+ (json.description ? ` (${json.description})` : "")));
}
memo.get(url).then(txt => { elt.title = txt; });
}
return memo;
}
console.log("wkpd: fetched", titleWikipediaAnchors());
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment