Skip to content

Instantly share code, notes, and snippets.

@morags
Last active March 15, 2023 10:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save morags/d49230c7350083f670de4e51a9a0f228 to your computer and use it in GitHub Desktop.
Save morags/d49230c7350083f670de4e51a9a0f228 to your computer and use it in GitHub Desktop.
function detectWeb(doc, url) {
const document = doc;
if (document.querySelector('meta[name="title"]') !== null) {
return 'newspaperArticle';
}
}
function doWeb(doc, url) {
const document = doc;
let item = new Zotero.Item('newspaperArticle');
item.title = attr(document, 'meta[name="title"]', 'content');
item.url = url;
const site_name = attr(document, 'meta[property="og:site_name"]', 'content');
switch (site_name) {
case 'Haaretz.com':
item.publicationTitle = 'Haaretz';
item.language = 'en';
default:
item.publicationTitle = 'הארץ';
item.language = 'he';
}
const abstract = attr(document, 'meta[name="description"]', 'content');
item.abstractNote = abstract;
// meta[name="author"] doesn't list all authors, so we need to scrape
const authors = getSearchResults(document, 'a[href^="https://www.haaretz.co.il/ty-WRITER"]');
// Each name displayed twice, so ignore the second copy
for (let i = 0; i < authors.length / 2; i++) {
item.creators.push(authors[i].text);
}
item.date = attr(document, 'meta[propery="publishDate"]', 'content');
const keywords = attr(document, 'meta[name="news_keywords"]', 'content').split(',');
for (let i = 0; i < keywords.length; i++) {
if (keywords[i].length > 0) item.tags.push(keywords[i].trim());
}
item.complete();
}
function attr(docOrElem, selector, attr, index) {
const elem = index ? docOrElem.querySelectorAll(selector).item(index) : docOrElem.querySelector(selector);
return elem ? elem.getAttribute(attr) : null;
}
function text(docOrElem, selector, index) {
const elem = index ? docOrElem.querySelectorAll(selector).item(index) : docOrElem.querySelector(selector);
return elem ? elem.textContent : null;
}
function getSearchResults(doc, selector, checkOnly) {
let items = {};
let found = false;
let rows = doc.querySelectorAll(selector);
for (let i = 0; i < rows.length; i++) {
const href = rows[i].href;
const title = ZU.trimInternal(rows[i].textContent);
if (!href || !title) continue;
if (checkOnly) return true;
found = true;
items[href] = title;
}
return found ? items : false;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment