Last active
March 15, 2023 10:38
-
-
Save morags/d49230c7350083f670de4e51a9a0f228 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function detectWeb(doc, url) { | |
const document = doc; | |
if (document.querySelector('meta[name="title"]') !== null) { | |
return 'newspaperArticle'; | |
} | |
} | |
function doWeb(doc, url) { | |
const document = doc; | |
let item = new Zotero.Item('newspaperArticle'); | |
item.title = attr(document, 'meta[name="title"]', 'content'); | |
item.url = url; | |
const site_name = attr(document, 'meta[property="og:site_name"]', 'content'); | |
switch (site_name) { | |
case 'Haaretz.com': | |
item.publicationTitle = 'Haaretz'; | |
item.language = 'en'; | |
default: | |
item.publicationTitle = 'הארץ'; | |
item.language = 'he'; | |
} | |
const abstract = attr(document, 'meta[name="description"]', 'content'); | |
item.abstractNote = abstract; | |
// meta[name="author"] doesn't list all authors, so we need to scrape | |
const authors = getSearchResults(document, 'a[href^="https://www.haaretz.co.il/ty-WRITER"]'); | |
// Each name displayed twice, so ignore the second copy | |
for (let i = 0; i < authors.length / 2; i++) { | |
item.creators.push(authors[i].text); | |
} | |
item.date = attr(document, 'meta[propery="publishDate"]', 'content'); | |
const keywords = attr(document, 'meta[name="news_keywords"]', 'content').split(','); | |
for (let i = 0; i < keywords.length; i++) { | |
if (keywords[i].length > 0) item.tags.push(keywords[i].trim()); | |
} | |
item.complete(); | |
} | |
function attr(docOrElem, selector, attr, index) { | |
const elem = index ? docOrElem.querySelectorAll(selector).item(index) : docOrElem.querySelector(selector); | |
return elem ? elem.getAttribute(attr) : null; | |
} | |
function text(docOrElem, selector, index) { | |
const elem = index ? docOrElem.querySelectorAll(selector).item(index) : docOrElem.querySelector(selector); | |
return elem ? elem.textContent : null; | |
} | |
function getSearchResults(doc, selector, checkOnly) { | |
let items = {}; | |
let found = false; | |
let rows = doc.querySelectorAll(selector); | |
for (let i = 0; i < rows.length; i++) { | |
const href = rows[i].href; | |
const title = ZU.trimInternal(rows[i].textContent); | |
if (!href || !title) continue; | |
if (checkOnly) return true; | |
found = true; | |
items[href] = title; | |
} | |
return found ? items : false; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment