/New Zealand Herald.js

## gistfile2.txt

      
    Raw
  

              gistfile2.txt
            
          
## New Zealand Herald.js
{"translatorID":"207f4aad-b604-43ef-a7f5-3e6229aade9f","label":"New Zealand  Herald","creator":"Sopheak Hean (University of Waikato, Faculty of Education, New Zealand)","target":"www.nzherald.co.nz","minVersion":"1.0","maxVersion":"","priority":100,"inRepository":"1","translatorType":4,"lastUpdated":"2010-08-03 15:31:36"}

					/* Big thanks to
					*	Michael Collins,
					*	Frank Bennett and
					*	Avram Lyon
					* for their help with optimising the code */

function detectWeb(doc, url) {
	var namespace = doc.documentElement.namespaceURI;
	var nsResolver = namespace ? function(prefix) {
	if (prefix == "x" ) return namespace; else return null;
	} : null;

/* If the address bar has /news in it then it's a newspapers article*/

	if (doc.location.href.indexOf("/search/results.cfm") !=-1){
		return "multiple";
	} else if (doc.location.href.indexOf("/news/article.cfm") !=-1){
		return "newspaperArticle";
	}


}

function associateData (newItem, items, field, zoteroField) {
	if (items[field]){
		newItem[zoteroField] = items[field];
	}
}


function scrape(doc, url){
	var namespace = doc.documentElement.namespaceURI;
	var nsResolver = namespace ? function(prefix) {
		if (prefix == 'x') return namespace; else return null;
	} : null;

	var articleLanguage = "English";

	var newItem = new Zotero.Item('newspaperArticle');
	newItem.url = doc.location.href;


	newItem.title = "No Title Found";
	newItem.publicationTitle = "New Zealand Herald";
	newItem.ISSN = "1170-0777";

	//Get title of the news via xpath
	var myXPath = '//h1';
	var myXPathObject = doc.evaluate(myXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent;
	//Get news title
	headers =myXPathObject;
	newItem.title = headers;

	var headers;
	var items = new Object();
	var authorsTemp;
	var blankCell;
	var contents;
	var authorArray = new Array();

		/*
			//Get authors of the article
			Remove "By " then replace "and " with ", "
			Put the string into an array then split the array and loop all authors then push author to Zotero.  Possible with more than 1 author on an article.
		*/
	try {
		//Try this path to author if there is nothing wrong the proceed
		//but if not then puke out some errors and catch it to prevent mass destruction.
			var authorXPath = '//span[@class="credits"]';
			var authorXPathObject = doc.evaluate(authorXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();

			if (authorXPathObject) {
				var authorString = authorXPathObject.textContent.replace(/\bBy\W+/g, '');
					if (authorString.match(/\W\band\W+/g)){
						authorTemp = authorString.replace(/\W\band\W+/g, ', ');
						authorArray = authorTemp.split(", ");

					} else if (!authorString.match(/\W\band\W+/g))
						{
							authorArray = authorString;
						}
					if( authorArray instanceof Array ) {
							for (var i in authorArray){
								var author;
								author = authorArray[i];
								newItem.creators.push(Zotero.Utilities.cleanAuthor(author, "author"));
							}
						} else {
									if (authorString.match(/\W\bof\W+/g)){
										authorTemp = authorString.replace (/\W\bof\W(.*)/g, '');
										authorArray = authorTemp;
										newItem.creators.push(Zotero.Utilities.cleanAuthor(authorTemp, "author"));

									} else {
										newItem.creators.push(Zotero.Utilities.cleanAuthor(authorArray, "author"));
									}
							}
				} else {
							newItem.creators ="";
						}

		} catch (errs){
		//if the span class credits doesn't exist or something gone really wrong in the authorXPath argument then
		//it is either the xpath to author element is broken or doesn't exist.
		//Blank the Author to stop the desctructions and to keep the translator alive.

				newItem.creators=""; //give creator value to Nothing.
		}
				//date-Year
				var dateXPath = '//div[@class="tools"]/span';
				var dateXPathObject = doc.evaluate(dateXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent.replace(/\d{1,2}:\d{1,2} (AM|PM) (\w)+ /g, '');

				//If the original Xpath1 is equal to Updated then go to XPath2
				if ((dateXPathObject =="Updated")|| (dateXPathObject =="New")){
					var dateXPath = '//div[@class="tools"]/span[2]';
					var dateXPathObject = doc.evaluate(dateXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent.replace(/\d{1,2}:\d{1,2} (AM|PM) (\w)+ /g, '');
					newItem.date = dateXPathObject ;
				}
				else{ //great found the date just push it to Zotero.
					var dateXPath = '//div[@class="tools"]/span';
					var dateXPathObject = doc.evaluate(dateXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent.replace(/\d{1,2}:\d{1,2} (AM|PM) (\w)+ /g, '');

					newItem.date = dateXPathObject ;
				}


				//Get Section of the news
				var sectionXPath = '//div[@class="sectionHeader"]/span/a[1]';
				var sectionXPathObject = doc.evaluate(sectionXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent;
				newItem.section = sectionXPathObject;


				newItem.language= articleLanguage;


				//grab abstract from meta data
				var a= "//meta[@name='description']";
				newItem.abstractNote = doc.evaluate(a, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().content;


		newItem.complete();

	/* These doing nothing but leaving it here just in case and to look pretty
	associateData (newItem, items, "Language:", "language");
	associateData (newItem, items, "Section:", "section");
	associateData (newItem, items, "Abstract:", "abstract");
	associateData (newItem, items, "Author:", "author");
	*/
}

function doWeb(doc, url){
	var namespace = doc.documentElement.namespaceURI;
	var nsResolver = namespace ? function(prefix){
		if (prefix =='x')
		return namespace; else return null;
	} :null;

	var articles = new Array();
	var items = new Object();
	var nextTitle;

	if (detectWeb(doc, url) == "multiple"){
		var titles = doc.evaluate('//p[@class="results"]/a', doc, nsResolver, XPathResult.ANY_TYPE, null);
		while (nextTitle = titles.iterateNext()){
			items[nextTitle.href] = nextTitle.textContent;
		}
		items= Zotero.selectItems(items);
		for (var i in items){
			articles.push(i);
		}
	} else {
		articles = [url];
	}

	//Zotero.Util only works when scrape function is declared
	Zotero.Utilities.processDocuments(articles, scrape, function(){Zotero.done();});

	Zotero.wait();
}
	{"translatorID":"207f4aad-b604-43ef-a7f5-3e6229aade9f","label":"New Zealand Herald","creator":"Sopheak Hean (University of Waikato, Faculty of Education, New Zealand)","target":"www.nzherald.co.nz","minVersion":"1.0","maxVersion":"","priority":100,"inRepository":"1","translatorType":4,"lastUpdated":"2010-08-03 15:31:36"}

	/* Big thanks to
	* Michael Collins,
	* Frank Bennett and
	* Avram Lyon
	* for their help with optimising the code */

	function detectWeb(doc, url) {
	var namespace = doc.documentElement.namespaceURI;
	var nsResolver = namespace ? function(prefix) {
	if (prefix == "x" ) return namespace; else return null;
	} : null;

	/* If the address bar has /news in it then it's a newspapers article*/

	if (doc.location.href.indexOf("/search/results.cfm") !=-1){
	return "multiple";
	} else if (doc.location.href.indexOf("/news/article.cfm") !=-1){
	return "newspaperArticle";
	}


	}

	function associateData (newItem, items, field, zoteroField) {
	if (items[field]){
	newItem[zoteroField] = items[field];
	}
	}



	function scrape(doc, url){
	var namespace = doc.documentElement.namespaceURI;
	var nsResolver = namespace ? function(prefix) {
	if (prefix == 'x') return namespace; else return null;
	} : null;

	var articleLanguage = "English";

	var newItem = new Zotero.Item('newspaperArticle');
	newItem.url = doc.location.href;


	newItem.title = "No Title Found";
	newItem.publicationTitle = "New Zealand Herald";
	newItem.ISSN = "1170-0777";

	//Get title of the news via xpath
	var myXPath = '//h1';
	var myXPathObject = doc.evaluate(myXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent;
	//Get news title
	headers =myXPathObject;
	newItem.title = headers;

	var headers;
	var items = new Object();
	var authorsTemp;
	var blankCell;
	var contents;
	var authorArray = new Array();

	/*
	//Get authors of the article
	Remove "By " then replace "and " with ", "
	Put the string into an array then split the array and loop all authors then push author to Zotero. Possible with more than 1 author on an article.
	*/
	try {
	//Try this path to author if there is nothing wrong the proceed
	//but if not then puke out some errors and catch it to prevent mass destruction.
	var authorXPath = '//span[@class="credits"]';
	var authorXPathObject = doc.evaluate(authorXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();

	if (authorXPathObject) {
	var authorString = authorXPathObject.textContent.replace(/\bBy\W+/g, '');
	if (authorString.match(/\W\band\W+/g)){
	authorTemp = authorString.replace(/\W\band\W+/g, ', ');
	authorArray = authorTemp.split(", ");

	} else if (!authorString.match(/\W\band\W+/g))
	{
	authorArray = authorString;
	}
	if( authorArray instanceof Array ) {
	for (var i in authorArray){
	var author;
	author = authorArray[i];
	newItem.creators.push(Zotero.Utilities.cleanAuthor(author, "author"));
	}
	} else {
	if (authorString.match(/\W\bof\W+/g)){
	authorTemp = authorString.replace (/\W\bof\W(.*)/g, '');
	authorArray = authorTemp;
	newItem.creators.push(Zotero.Utilities.cleanAuthor(authorTemp, "author"));

	} else {
	newItem.creators.push(Zotero.Utilities.cleanAuthor(authorArray, "author"));
	}
	}
	} else {
	newItem.creators ="";
	}

	} catch (errs){
	//if the span class credits doesn't exist or something gone really wrong in the authorXPath argument then
	//it is either the xpath to author element is broken or doesn't exist.
	//Blank the Author to stop the desctructions and to keep the translator alive.

	newItem.creators=""; //give creator value to Nothing.
	}
	//date-Year
	var dateXPath = '//div[@class="tools"]/span';
	var dateXPathObject = doc.evaluate(dateXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent.replace(/\d{1,2}:\d{1,2} (AM\|PM) (\w)+ /g, '');

	//If the original Xpath1 is equal to Updated then go to XPath2
	if ((dateXPathObject =="Updated")\|\| (dateXPathObject =="New")){
	var dateXPath = '//div[@class="tools"]/span[2]';
	var dateXPathObject = doc.evaluate(dateXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent.replace(/\d{1,2}:\d{1,2} (AM\|PM) (\w)+ /g, '');
	newItem.date = dateXPathObject ;
	}
	else{ //great found the date just push it to Zotero.
	var dateXPath = '//div[@class="tools"]/span';
	var dateXPathObject = doc.evaluate(dateXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent.replace(/\d{1,2}:\d{1,2} (AM\|PM) (\w)+ /g, '');

	newItem.date = dateXPathObject ;
	}


	//Get Section of the news
	var sectionXPath = '//div[@class="sectionHeader"]/span/a[1]';
	var sectionXPathObject = doc.evaluate(sectionXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent;
	newItem.section = sectionXPathObject;


	newItem.language= articleLanguage;


	//grab abstract from meta data
	var a= "//meta[@name='description']";
	newItem.abstractNote = doc.evaluate(a, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().content;


	newItem.complete();

	/* These doing nothing but leaving it here just in case and to look pretty
	associateData (newItem, items, "Language:", "language");
	associateData (newItem, items, "Section:", "section");
	associateData (newItem, items, "Abstract:", "abstract");
	associateData (newItem, items, "Author:", "author");
	*/
	}

	function doWeb(doc, url){
	var namespace = doc.documentElement.namespaceURI;
	var nsResolver = namespace ? function(prefix){
	if (prefix =='x')
	return namespace; else return null;
	} :null;

	var articles = new Array();
	var items = new Object();
	var nextTitle;

	if (detectWeb(doc, url) == "multiple"){
	var titles = doc.evaluate('//p[@class="results"]/a', doc, nsResolver, XPathResult.ANY_TYPE, null);
	while (nextTitle = titles.iterateNext()){
	items[nextTitle.href] = nextTitle.textContent;
	}
	items= Zotero.selectItems(items);
	for (var i in items){
	articles.push(i);
	}
	} else {
	articles = [url];
	}

	//Zotero.Util only works when scrape function is declared
	Zotero.Utilities.processDocuments(articles, scrape, function(){Zotero.done();});

	Zotero.wait();
	}