Skip to content

Instantly share code, notes, and snippets.

@dontcallmedom
Last active May 6, 2021 19:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dontcallmedom/290986d35a8991a163f805e1692ff53a to your computer and use it in GitHub Desktop.
Save dontcallmedom/290986d35a8991a163f805e1692ff53a to your computer and use it in GitHub Desktop.
Process editors extracted from a custom reffy run
const {JSDOM} = require("jsdom");
const {Parser} = require("json2csv");
const fs = require("fs");
const specData = require(process.argv[2]);
const affiliations = {};
const sortKeys = (o) =>
Object (o) !== o || Array .isArray (o)
? o
: Object .keys (o) .sort () .reduce ((a, k) => ({...a, [k]: sortKeys (o [k])}), {});
const domainToAffiliation = {
"adobe.com": "Adobe",
"adboe.com": "Adobe",
"google.com": "Google",
"mozilla.com": "Mozilla",
"mozilla.org": "Mozilla",
"cs.stanford.edu": "Mozilla",
"mcc.id.au": "Mozilla",
"apple.com": "Apple",
"w3.org": "W3C",
"microsoft.com": "Microsoft",
"chromium.org": "Google",
"disruptive-innovations.com": "Disruptive Innovations",
"compuware.com": "Compuware",
"gmail.com": "undetermined"
};
specData.filter(s => s.editors).forEach(s => {
s.editors.forEach(e => {
let affiliation, name, m;
const edNode = JSDOM.fragment(e.markup);
if (edNode.querySelector(".org")) {
affiliation = edNode.querySelector(".org").textContent;
name = edNode.querySelector(".p-name").textContent;
} else if (m = edNode.querySelector('a[href^="mailto:"]')) {
const domain = m.href.split('@')[1];
affiliation = domainToAffiliation[domain];
if (!affiliation) {
affiliation = "undetermined";
}
if (!m.textContent.match(/@/)) {
name = m.textContent;
} else {
name = edNode.textContent.split(',')[0];
}
}
if (!affiliation || affiliation === "undetermined") {
if (m = e.text.match(/^([^\(]*) \(([^\)]*)\)/)) {
affiliation = m[2];
name = m[1];
} else if (m = e.text.match(/^([^,]*), ([a-zA-Z ]*)/)) {
affiliation = m[2];
name = m[1];
} else {
affiliation = "undetermined";
name = e.text;
}
}
affiliation = affiliation.trim().replace(/ Systems/, '').replace(/ LLC/, '').replace(/ inc\.?/i, '').replace(/ Corp.*/, '').replace(/ \[[^\]]*\]/, '').replace(/ \([^\)]*\)/, '').replace(/,.*/, '').replace(/ Foundation/, '').replace(/ ASA/, '').replace("W3C Invited Expert", "Invited Expert").toLowerCase();
if (affiliation.match(/^until /) || affiliation.match(/former /)) return;
name = name.replace(/’/, "'");
if (!name || name.match(/see contributors/i)) return;
if (!affiliations[affiliation]) {
affiliations[affiliation] = {};
}
if (!affiliations[affiliation][name]) {
affiliations[affiliation][name] = [];
}
affiliations[affiliation][name].push({url: s.nightly.url, title: s.title});
});
});
fs.writeFileSync("editors-affiliations.json", JSON.stringify(sortKeys(affiliations), null, 2));
const json2csv = new Parser();
const flat = Object.keys(affiliations).sort().map(aff => Object.keys(affiliations[aff]).sort().map(ed => affiliations[aff][ed].map(spec => { spec.editor = ed; spec.affiliation = aff; return spec ;}))).flat().flat();
const csv = json2csv.parse(flat);
fs.writeFileSync("editors-affiliations.csv", csv);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment