Skip to content

Instantly share code, notes, and snippets.

@retorquere
Created April 5, 2019 18:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save retorquere/6d13e8438989834f0f9a710d974686f9 to your computer and use it in GitHub Desktop.
Save retorquere/6d13e8438989834f0f9a710d974686f9 to your computer and use it in GitHub Desktop.
{
"translatorID": "c159dcfe-8a53-4301-a499-30f6549c340d",
"label": "DOI",
"creator": "Simon Kornblith",
"target": "",
"minVersion": "3.0",
"maxVersion": "",
"priority": 400,
"inRepository": true,
"translatorType": 4,
"browserSupport": "gcsibv",
"lastUpdated": "2019-01-27 14:06:07"
}
/*
***** BEGIN LICENSE BLOCK *****
Copyright © 2019 Simon Kornblith
This file is part of Zotero.
Zotero is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Zotero is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with Zotero. If not, see <http://www.gnu.org/licenses/>.
***** END LICENSE BLOCK *****
*/
// The variables items and selectArray will be filled during the first
// as well as the second retrieveDOIs function call and therefore they
// are defined global.
var items = {};
var selectArray = {};
// builds a list of DOIs
function getDOIs(doc) {
// TODO Detect DOIs more correctly.
// The actual rules for DOIs are very lax-- but we're more strict.
// Specifically, we should allow space characters, and all Unicode
// characters except for control characters. Here, we're cheating
// by not allowing ampersands, to fix an issue with getting DOIs
// out of URLs.
// Additionally, all content inside <noscript> is picked up as text()
// by the xpath, which we don't necessarily want to exclude, but
// that means that we can get DOIs inside node attributes and we should
// exclude quotes in this case.
// DOI should never end with a period or a comma (we hope)
// Description at: http://www.doi.org/handbook_2000/appendix_1.html#A1-4
const DOIre = /\b10\.[0-9]{4,}\/[^\s&"']*[^\s&"'.,]/g;
var dois = [];
var m, DOI;
var treeWalker = doc.createTreeWalker(doc.documentElement, 4, null, false);
var ignore = ['script', 'style'];
while (treeWalker.nextNode()) {
if (ignore.includes(treeWalker.currentNode.parentNode.tagName.toLowerCase())) continue;
// Z.debug(node.nodeValue)
DOIre.lastMatch = 0;
while ((m = DOIre.exec(treeWalker.currentNode.nodeValue))) {
DOI = m[0];
if (DOI.endsWith(")") && !DOI.includes("(")) {
DOI = DOI.substr(0, DOI.length - 1);
}
if (DOI.endsWith("}") && !DOI.includes("{")) {
DOI = DOI.substr(0, DOI.length - 1);
}
// only add new DOIs
if (!dois.includes(DOI)) {
dois.push(DOI);
}
}
}
return dois;
}
function detectWeb(doc, url) {
// Blacklist the advertising iframe in ScienceDirect guest mode:
// http://www.sciencedirect.com/science/advertisement/options/num/264322/mainCat/general/cat/general/acct/...
// This can be removed from blacklist when 5c324134c636a3a3e0432f1d2f277a6bc2717c2a hits all clients (Z 3.0+)
const blacklistRe = /^https?:\/\/[^/]*(?:google\.com|sciencedirect\.com\/science\/advertisement\/)/i;
if (!blacklistRe.test(url)) {
var DOIs = getDOIs(doc);
if (DOIs.length) {
return "multiple";
}
}
return false;
}
function retrieveDOI(DOI) {
return new Promise(function (resolve) {
const translate = Zotero.loadTranslator("search");
translate.setTranslator("b28d0d42-8549-4c6d-83fc-8382874a5cb9");
translate.setSearch({ itemType: "journalArticle", DOI: DOI });
// don't save when item is done
translate.setHandler("itemDone", function (_translate, item) {
selectArray[item.DOI] = item.title;
if (!item.title) {
Zotero.debug("No title available for " + item.DOI);
item.title = "[No Title]";
selectArray[item.DOI] = "[" + item.DOI + "]";
}
items[item.DOI] = item;
});
translate.setHandler("done", function () {
resolve();
});
// Don't throw on error
translate.setHandler("error", function () {});
translate.translate();
});
}
function doWeb(doc) {
var dois = getDOIs(doc);
Z.debug(dois);
Promise.all(dois.map(retrieveDOI)).then(function () {
var numDOIs = Object.keys(selectArray).length;
if (numDOIs == 0) {
throw new Error("DOI Translator: could not find DOI");
}
else {
Zotero.selectItems(selectArray, function (selectedDOIs) {
if (!selectedDOIs) return true;
for (var DOI in selectedDOIs) {
items[DOI].complete();
}
return true;
});
}
});
}
/** BEGIN TEST CASES **/
var testCases = [
{
"type": "web",
"url": "http://blog.apastyle.org/apastyle/digital-object-identifier-doi/",
"items": "multiple"
},
{
"type": "web",
"url": "http://libguides.csuchico.edu/citingbusiness",
"items": "multiple"
},
{
"type": "web",
"url": "http://www.egms.de/static/de/journals/mbi/2015-15/mbi000336.shtml",
"items": "multiple"
},
{
"type": "web",
"url": "http://www.roboticsproceedings.org/rss09/p23.html",
"items": "multiple"
},
{
"type": "web",
"url": "https://en.wikipedia.org/wiki/Template_talk:Doi",
"items": "multiple"
}
]
/** END TEST CASES **/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment