emmareisz/British Newspaper Archive.js

## British Newspaper Archive.js
{
	"translatorID": "a304870e-c4f3-45e3-ab75-e7afef13dff0",
	"label": "British Newspaper Archive",
	"creator": "Emma Reisz",
	"target": "^https?://www\\.britishnewspaperarchive\\.co\\.uk/(search/results|viewer)",
	"minVersion": "3.0",
	"maxVersion": "",
	"priority": 100,
	"inRepository": true,
	"translatorType": 4,
	"browserSupport": "gcsibv",
	"lastUpdated": "2017-11-22 17:49:30"
}

/*
	***** BEGIN LICENSE BLOCK *****

	Copyright © 2017 Emma Reisz

	This file is part of Zotero.

	Zotero is free software: you can redistribute it and/or modify
	it under the terms of the GNU Affero General Public License as published by
	the Free Software Foundation, either version 3 of the License, or
	(at your option) any later version.

	Zotero is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	GNU Affero General Public License for more details.

	You should have received a copy of the GNU Affero General Public License
	along with Zotero. If not, see <http://www.gnu.org/licenses/>.

	***** END LICENSE BLOCK *****
*/

function detectWeb(doc, url) {
	if (url.includes("/search/results")) {
		var nullPath = '//*[@id="ajaxcontainer"]/div[3]/div/div[2]/div[3]/h1';
		if (ZU.xpath(doc, nullPath)[0]===undefined) {
			return "multiple";
		}
	} else if (url.includes("/viewer")) {
	return "newspaperArticle";
	}
}

function doWeb(doc, url) {
	if (detectWeb(doc, url) == 'multiple') {
		scrapeSearch(doc, url);	//Non-subsribers have access to search only so we scrape from search results view
	} else {
		scrapeImage(doc, url);
	}
}

function scrapeSearch(doc, url) {
	var titlesPath = '//*[@id="ajaxcontainer"]/div[3]/div/div[2]/article/div[3]/header/h4/a';
	var titles = ZU.xpath(doc, titlesPath);
	var mapping = {
			title :"innerText",
			url : "href",
	};

	var results = {};

	for(var i = 0; i<titles.length; i++) {
		var result ={};
		for (var j in mapping){
			if (mapping.hasOwnProperty(j)) {
				result[j]=titles[i][mapping[j]];
			}
		}
		result.title = ZU.capitalizeTitle(result.title.toLowerCase(),true); //Converts to title case.
		results[i]=result;
	}

	Z.selectItems(results, function( selected ) {
		if ( !selected ) return true;

		var scrape = {
			1 :"date",
			2 :"publicationTitle",
			3 :"place",
			6 :"pages",
			7 :"tags",
		};

		for (var i in selected) {
			if (selected.hasOwnProperty(i)) {
				var item = new Z.Item("newspaperArticle");
				for (var j in results[i]){
					if (results[i].hasOwnProperty(j)) {
						item[j]= results[i][j];
					}
				}
				var index = Number(i) + 1;
				var detailPath = '//*[@id="ajaxcontainer"]/div[3]/div/div[2]/article[' + index + ']/div[3]/footer/div/small';

				for (var k in scrape){
					if (scrape.hasOwnProperty(k)) {
						var scrapePath = detailPath + '/span[' + k + ']';
						item[scrape[k]] = ZU.trimInternal(ZU.xpathText(doc, scrapePath)).split(/: (.+)/)[1];
					}
				}

				item.pages=item.pages.replace(/^Pages?: ?/,''); //Done automatically by Zotero

				item.tags=item.tags.split(", ");
				if (item.tags == "none") item.tags=undefined;	//Quicker than delete
				item.complete();
			}
		}
	});

}

function scrapeImage(doc, url) {
	var item = new Z.Item("newspaperArticle");
	var mapping = {
	publicationTitle :"newspaperTitle",
	date :"newspaperDate",
	rights :"newspaperCopy",
	};
	for (var i in mapping ){
		if (mapping.hasOwnProperty(i)) {
			var scrapePath="//*[@id=\"" + mapping[i] +"\"]";
			item[i]=ZU.xpathText(doc, scrapePath);
		}
	}
	item.title=doc.title.split("|")[0].trim().replace(/[.,;]?$/, '');
	item.url=url;
	item.complete();
}

/** BEGIN TEST CASES **/
var testCases = [
	{
		"type": "web",
		"url": "https://www.britishnewspaperarchive.co.uk/viewer/bl/0001578/18901206/021/0007",
		"items": [
			{
				"itemType": "newspaperArticle",
				"title": "Testimonial To Sir Robert Hart, G.c..m.g., Inspector-General Op Customs In China",
				"creators": [],
				"libraryCatalog": "British Newspaper Archive",
				"publicationTitle": "Loading publication",
				"url": "https://www.britishnewspaperarchive.co.uk/viewer/bl/0001578/18901206/021/0007",
				"attachments": [],
				"tags": [],
				"notes": [],
				"seeAlso": []
			}
		]
	},
	{
		"type": "web",
		"url": "https://www.britishnewspaperarchive.co.uk/search/results/1700-01-01/1749-12-31?basicsearch=robert%20hart%2020%20november%201890%20plate&exactsearch=true&retrievecountrycounts=false",
		"items": "multiple"
	}
]
/** END TEST CASES **/
	{
	"translatorID": "a304870e-c4f3-45e3-ab75-e7afef13dff0",
	"label": "British Newspaper Archive",
	"creator": "Emma Reisz",
	"target": "^https?://www\\.britishnewspaperarchive\\.co\\.uk/(search/results\|viewer)",
	"minVersion": "3.0",
	"maxVersion": "",
	"priority": 100,
	"inRepository": true,
	"translatorType": 4,
	"browserSupport": "gcsibv",
	"lastUpdated": "2017-11-22 17:49:30"
	}

	/*
	*** BEGIN LICENSE BLOCK ***

	Copyright © 2017 Emma Reisz

	This file is part of Zotero.

	Zotero is free software: you can redistribute it and/or modify
	it under the terms of the GNU Affero General Public License as published by
	the Free Software Foundation, either version 3 of the License, or
	(at your option) any later version.

	Zotero is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	GNU Affero General Public License for more details.

	You should have received a copy of the GNU Affero General Public License
	along with Zotero. If not, see <http://www.gnu.org/licenses/>.

	*** END LICENSE BLOCK ***
	*/

	function detectWeb(doc, url) {
	if (url.includes("/search/results")) {
	var nullPath = '//*[@id="ajaxcontainer"]/div[3]/div/div[2]/div[3]/h1';
	if (ZU.xpath(doc, nullPath)[0]===undefined) {
	return "multiple";
	}
	} else if (url.includes("/viewer")) {
	return "newspaperArticle";
	}
	}

	function doWeb(doc, url) {
	if (detectWeb(doc, url) == 'multiple') {
	scrapeSearch(doc, url); //Non-subsribers have access to search only so we scrape from search results view
	} else {
	scrapeImage(doc, url);
	}
	}

	function scrapeSearch(doc, url) {
	var titlesPath = '//*[@id="ajaxcontainer"]/div[3]/div/div[2]/article/div[3]/header/h4/a';
	var titles = ZU.xpath(doc, titlesPath);
	var mapping = {
	title :"innerText",
	url : "href",
	};

	var results = {};

	for(var i = 0; i<titles.length; i++) {
	var result ={};
	for (var j in mapping){
	if (mapping.hasOwnProperty(j)) {
	result[j]=titles[i][mapping[j]];
	}
	}
	result.title = ZU.capitalizeTitle(result.title.toLowerCase(),true); //Converts to title case.
	results[i]=result;
	}

	Z.selectItems(results, function( selected ) {
	if ( !selected ) return true;

	var scrape = {
	1 :"date",
	2 :"publicationTitle",
	3 :"place",
	6 :"pages",
	7 :"tags",
	};

	for (var i in selected) {
	if (selected.hasOwnProperty(i)) {
	var item = new Z.Item("newspaperArticle");
	for (var j in results[i]){
	if (results[i].hasOwnProperty(j)) {
	item[j]= results[i][j];
	}
	}
	var index = Number(i) + 1;
	var detailPath = '//*[@id="ajaxcontainer"]/div[3]/div/div[2]/article[' + index + ']/div[3]/footer/div/small';

	for (var k in scrape){
	if (scrape.hasOwnProperty(k)) {
	var scrapePath = detailPath + '/span[' + k + ']';
	item[scrape[k]] = ZU.trimInternal(ZU.xpathText(doc, scrapePath)).split(/: (.+)/)[1];
	}
	}

	item.pages=item.pages.replace(/^Pages?: ?/,''); //Done automatically by Zotero

	item.tags=item.tags.split(", ");
	if (item.tags == "none") item.tags=undefined; //Quicker than delete
	item.complete();
	}
	}
	});

	}

	function scrapeImage(doc, url) {
	var item = new Z.Item("newspaperArticle");
	var mapping = {
	publicationTitle :"newspaperTitle",
	date :"newspaperDate",
	rights :"newspaperCopy",
	};
	for (var i in mapping ){
	if (mapping.hasOwnProperty(i)) {
	var scrapePath="//*[@id=\"" + mapping[i] +"\"]";
	item[i]=ZU.xpathText(doc, scrapePath);
	}
	}
	item.title=doc.title.split("\|")[0].trim().replace(/[.,;]?$/, '');
	item.url=url;
	item.complete();
	}

	/ BEGIN TEST CASES /
	var testCases = [
	{
	"type": "web",
	"url": "https://www.britishnewspaperarchive.co.uk/viewer/bl/0001578/18901206/021/0007",
	"items": [
	{
	"itemType": "newspaperArticle",
	"title": "Testimonial To Sir Robert Hart, G.c..m.g., Inspector-General Op Customs In China",
	"creators": [],
	"libraryCatalog": "British Newspaper Archive",
	"publicationTitle": "Loading publication",
	"url": "https://www.britishnewspaperarchive.co.uk/viewer/bl/0001578/18901206/021/0007",
	"attachments": [],
	"tags": [],
	"notes": [],
	"seeAlso": []
	}
	]
	},
	{
	"type": "web",
	"url": "https://www.britishnewspaperarchive.co.uk/search/results/1700-01-01/1749-12-31?basicsearch=robert%20hart%2020%20november%201890%20plate&exactsearch=true&retrievecountrycounts=false",
	"items": "multiple"
	}
	]
	/ END TEST CASES /