WilliamMayor/CiteLine.js

## CiteLine.js
{
	"translatorID":"17AF2C40-AAA5-4837-AB16-FDC54298AB42",
	"translatorType":1,
	"label":"CiteLine",
	"creator":"William Mayor",
	"target":"citeline",
	"minVersion":"1.0.0b3.r1",
	"maxVersion":"",
	"priority":100,
	"browserSupport":"gcs",
	"configOptions":{"dataMode":"block"},
	"inRepository":false,
	"lastUpdated":"2011-07-21 19:58:00"
}

function detectImport() {
    Zotero.debug("detectImport");
    var title = Zotero.read("title".length).toLowerCase();
    return title == "title";
}
function doImport() {
    Zotero.debug("doImport");
	var details = getDetails();
	var item = makeItem(details['type']);

	var citeseeingToZotero = {"title":"title", "doi":"doi", "edition":"edition", "number":"issue", "pages":"pages", "report type":"reportType", "volume":"volume", "publisher": "publisher", "publication": ["place", "bookTitle", "publicationTitle"], "series":"series", "institution":"institution", "school":"university"};
	for (var where in citeseeingToZotero)
	{
	    var value = details[where];
	    if (citeseeingToZotero[where].constructor == Array) {
            for (var i = 0; i < citeseeingToZotero[where].length; i++) {
                item[citeseeingToZotero[where][i]] = value;
            }
        }
        else {
            item[citeseeingToZotero[where]] = value;
        }

	}

	var year = details["year"];
	if (year) item.date = year;
	var month = details["month"];
	if (month)
	{
	   if (year)
           item.date += "-" + month;
        else
            item.date = month;
	}

	var authors = details['author'];
	if (authors) {
    	if (authors.constructor != Array) {
    	   authors = [authors];
    	}
    	for (var i = 0; i < authors.length; i++) {
    	   item.creators.push(Zotero.Utilities.cleanAuthor(authors[i], "author"));
    	}
	}

	var editors = details['editor'];
	if (editors) {
    	if (editors.constructor != Array) {
    	   editors = [editors];
    	}
    	for (var i = 0; i < editors.length; i++) {
    	   item.creators.push(Zotero.Utilities.cleanAuthor(editors[i], "editor"));
    	}
    }

    var urls = details['url'];
	if (urls) {
    	if (urls.constructor != Array) {
    	   item.url = urls;
    	}
    	else {
    	   item.url = urls[0];
    	   item.attachments = [];
    	   for (var i = 1; i < urls.length; i++) {
    	       item.attachments.push({url:urls[i], title:"Alternate Link " + i, mimeType:"application/pdf"});
    	   }
    	}

    }

    item.notes.push({note:"This document's details were found using citeseeing.com"});

    item.complete();
}
function getDetails() {
    var details = {};
	var pair = getNextPair();
	while (pair.length == 2) {
	   var key = pair[0].toLowerCase();
	   if (details[key])
	   {
	       if (details[key].constructor == Array) {
	           details[key].push(pair[1]);
	       }
	       else {
	           var arr = [details[key], pair[1]];
	           details[key] = arr;
	       }
	   }
	   else {
	       details[key] = pair[1];
	   }
	   pair = getNextPair();
	}
	return details;
}
function getNextPair() {
    var line = "";
    var current;
    while ((current = Zotero.read(1)) && current != "\n") {
        line += current;
    }
    return line.split(": ");
}
function makeItem(type) {
    var name = "book";
    switch (type) {
        case "Article":
        case "Document":
            name = "journalArticle";
            break;
        case "In Collection":
            name = "bookSection";
            break;
        case "Conference":
        case "In Proceedings":
            name = "conferencePaper";
            break;
        case "Masters Thesis":
        case "PhD Thesis":
            name = "thesis";
            break;
        case "Tech Report":
            name = "report";
            break;
        case "Unpublished":
            name = "manuscript";
            break;
    }
    return new Zotero.Item(name);
}

## CiteSeeing.js
{
	"translatorID":"F72D8E0A-E99F-4EF5-8674-5D3FD190FF5D",
	"translatorType":4,
	"label":"CiteSeeing",
	"creator":"William Mayor",
	"target":"^http://localhost/document/view/[0-9]+",
	"minVersion":"1.0.0b3.r1",
	"maxVersion":"",
	"priority":100,
	"inRepository":false,
	"lastUpdated":"2011-07-15 10:30:00"
}

function detectWeb(doc, url) {
	return "journalArticle";
}

function doWeb(doc, url) {

	var type = doc.evaluate("//tr[@id='type']/td/text()", doc, null, XPathResult.STRING_TYPE, null);

	var item = makeItem(type.stringValue);

	var citeseeingToZotero = {"Title":"title", "DOI":"doi", "Edition":"edition", "Number":"issue", "Pages":"pages", "Type":"reportType", "Volume":"volume"}
	for (var where in citeseeingToZotero)
	{
	   var value = getValue(doc, where);
	   if (value) item[citeseeingToZotero[where]] = value;
	}

	var year = getValue(doc, "Year");
	if (year) item.date = year;
	var month = getValue(doc, "Month");
	if (month)
	{
	   if (year)
           item.date += "-" + month;
        else
            item.date = month;
	}

	var authors = doc.evaluate("//tr[@class='author']/td/a/text()", doc, null, XPathResult.UNORDERED_NODE_ITERATOR_TYPE, null);
	var author;
	while (author = authors.iterateNext()) {
        item.creators.push(Zotero.Utilities.cleanAuthor(author.textContent, "author"));
    }

    var editors = doc.evaluate("//tr[@class='editor']/td/a/text()", doc, null, XPathResult.UNORDERED_NODE_ITERATOR_TYPE, null);
	var editor;
	while (editor = editors.iterateNext()) {
        item.creators.push(Zotero.Utilities.cleanAuthor(editor.textContent, "editor"));
    }

    item.url = url;
    var urls = doc.evaluate("//tr[@class='url']/td/a/text()", doc, null, XPathResult.UNORDERED_NODE_ITERATOR_TYPE, null);
	var link, i = 1;
	while (link = urls.iterateNext()) {
	    // It seems that Zotero will only accept one of these attachments
	    if (i == 1) item.attachments = [];
        item.attachments.push({url:link.textContent, title:"CiteSeeing Link " + i++, mimeType:"application/pdf"});
    }

    var publisher = getCombined(doc, "publisher");
    if (publisher) item.publisher = publisher;
    var publication = getCombined(doc, "publication");
    if (publication) {
        item.place = publication;
        item.bookTitle = publication;
        item.publicationTitle = publication;
    }
    var series = getCombined(doc, "series");
    if (series) item.series = series;
    var institution = getCombined(doc, "institution");
    if (institution) item.institution = institution;
    var school = getCombined(doc, "school");
    if (school) item.university = school;

    item.notes.push({note:"This document's details were found using citeseeing.com"});

    item.complete();
}

function getCombined(doc, id) {
    var publisher = doc.evaluate("//tr[@id='" + id + "']/following-sibling::tr[position() < 3 and (@class='name' or @class='address')]/td[2]/text()", doc, null, XPathResult.UNORDERED_NODE_ITERATOR_TYPE, null);
    var value = publisher.iterateNext();
    var combined = null;
    if (value) combined = value.textContent;
    while (value = publisher.iterateNext()) {
        combined += ", " + value.textContent;
    }
    return combined;
}


function makeItem(type) {
    var name = "book";
    switch (type) {
        case "Article":
        case "Document":
            name = "journalArticle";
            break;
        case "In Collection":
            name = "bookSection";
            break;
        case "Conference":
        case "In Proceedings":
            name = "conferencePaper";
            break;
        case "Masters Thesis":
        case "PhD Thesis":
            name = "thesis";
            break;
        case "Tech Report":
            name = "report";
            break;
        case "Unpublished":
            name = "manuscript";
            break;
    }
    return new Zotero.Item(name);
}

function getValue(doc, type) {
    var result = doc.evaluate("//td[text()='" + type + "']/following-sibling::td/text()", doc, null, XPathResult.STRING_TYPE, null);
	return result.stringValue;
}

## recognisePDF.js
/*
    ***** BEGIN LICENSE BLOCK *****

    Copyright © 2009 Center for History and New Media
                     George Mason University, Fairfax, Virginia, USA
                     http://zotero.org

    This file is part of Zotero.

    Zotero is free software: you can redistribute it and/or modify
    it under the terms of the GNU Affero General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    Zotero is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Affero General Public License for more details.

    You should have received a copy of the GNU Affero General Public License
    along with Zotero.  If not, see <http://www.gnu.org/licenses/>.

    ***** END LICENSE BLOCK *****
*/

/**
 * @fileOverview Tools for automatically retrieving a citation for the given PDF
 */
const Zotero_RecognizePDF_SUCCESS_IMAGE = "chrome://zotero/skin/tick.png";
const Zotero_RecognizePDF_FAILURE_IMAGE = "chrome://zotero/skin/cross.png";
const Zotero_RecognizePDF_LOADING_IMAGE = "chrome://global/skin/icons/loading_16.png";

/**
 * Front end for recognizing PDFs
 * @namespace
 */
var Zotero_RecognizePDF = new function() {
	var _progressWindow, _progressIndicator;

	/**
	 * Checks whether a given PDF could theoretically be recognized
	 * @returns {Boolean} True if the PDF can be recognized, false if it cannot be
	 */
	this.canRecognize = function(/**Zotero.Item*/ item) {
		return (item.attachmentMIMEType &&
			item.attachmentMIMEType == "application/pdf" && !item.getSource());
	}

	/**
	 * Retrieves metadata for the PDF(s) selected in the Zotero Pane, placing the PDFs as a children
	 * of the new items
	 */
	this.recognizeSelected = function() {
		var installed = ZoteroPane_Local.checkPDFConverter();
		if (!installed) {
			return;
		}

		var items = ZoteroPane_Local.getSelectedItems();
		if (!items) return;
		var itemRecognizer = new Zotero_RecognizePDF.ItemRecognizer();
		itemRecognizer.recognizeItems(items);
	}
}

/**
 * @class Handles UI, etc. for recognizing multiple items
 */
Zotero_RecognizePDF.ItemRecognizer = function () {
	this._stopped = false;
}

/**
 * Retreives metadata for the PDF items passed, displaying a progress dialog during conversion
 * and placing the PDFs as a children of the new items
 * @param {Zotero.Item[]} items
 */
Zotero_RecognizePDF.ItemRecognizer.prototype.recognizeItems = function(items) {
	var me = this;
	this._items = items.slice();
	this._itemTotal = items.length;

	this._progressWindow = window.openDialog("chrome://zotero/content/pdfProgress.xul", "", "chrome,close=yes,resizable=yes,dependent,dialog,centerscreen");
	this._progressWindow.addEventListener("pageshow", function() { me._onWindowLoaded() }, false);
}

/**
 * Halts recognition of PDFs
 */
Zotero_RecognizePDF.ItemRecognizer.prototype.stop = function() {
	this._stopped = true;
}

/**
 * Called when the progress window has been opened; adds items to the tree and begins recognizing
 * @param
 */
Zotero_RecognizePDF.ItemRecognizer.prototype._onWindowLoaded = function() {
	// populate progress window
	var treechildren = this._progressWindow.document.getElementById("treechildren");
	for(var i in this._items) {
		var treeitem = this._progressWindow.document.createElement('treeitem');
		var treerow = this._progressWindow.document.createElement('treerow');

		var treecell = this._progressWindow.document.createElement('treecell');
		treecell.setAttribute("id", "item-"+this._items[i].id+"-icon");
		treerow.appendChild(treecell);

		treecell = this._progressWindow.document.createElement('treecell');
		treecell.setAttribute("label", this._items[i].getField("title"));
		treerow.appendChild(treecell);

		treecell = this._progressWindow.document.createElement('treecell');
		treecell.setAttribute("id", "item-"+this._items[i].id+"-title");
		treerow.appendChild(treecell);

		treeitem.appendChild(treerow);
		treechildren.appendChild(treeitem);
	}

	var me = this;
	this._progressIndicator = this._progressWindow.document.getElementById("progress-indicator");
	this._progressWindow.document.getElementById("cancel-button").addEventListener("command", function() {
		me.stop();
		me._progressWindow.close();
	}, false);
	this._progressWindow.addEventListener("close", function() { me.stop() }, false);
	this._recognizeItem();
}

/**
 * Shifts an item off of this._items and recognizes it, then calls itself again if there are more
 * @private
 */
Zotero_RecognizePDF.ItemRecognizer.prototype._recognizeItem = function() {
	if(!this._items.length) {
		this._done();
		return;
	}

	this._progressIndicator.value = (this._itemTotal-this._items.length)/this._itemTotal*100;
	this._item = this._items.shift();

	this._progressWindow.document.getElementById("item-"+this._item.id+"-icon").
		setAttribute("src", Zotero_RecognizePDF_LOADING_IMAGE);

	var file = this._item.getFile();
	if(file) {
		var recognizer = new Zotero_RecognizePDF.Recognizer();
		var me = this;
		recognizer.recognize(file, this._item.libraryID, function(newItem, error) { me._callback(newItem, error) });
	} else {
		this._callback(false, "recognizePDF.fileNotFound");
	}
}

/**
 * Cleans up after items are recognized, disabling the cancel button and making the progress window
 * close on blur
 */
Zotero_RecognizePDF.ItemRecognizer.prototype._done = function() {
	this._progressIndicator.value = 100;
	this._progressWindow.document.getElementById("cancel-button").label = Zotero.getString("recognizePDF.close.label");
	var me = this;
	this._progressWindow.addEventListener("blur",
		function() { me._progressWindow.setTimeout(function() { me._progressWindow.close() }, 2000) }, false);
	this._progressWindow.document.getElementById("label").value = Zotero.getString("recognizePDF.complete.label");
}

/**
 * Callback function to be executed upon recognition completion
 * @param {Zotero.Item|Boolean} newItem The new item created from translation, or false if
 *	recognition was unsuccessful
 * @param {String} [error] The error name, if recognition was unsuccessful.
 */
Zotero_RecognizePDF.ItemRecognizer.prototype._callback = function(newItem, error) {
	if(this._stopped) {
		if(newItem) Zotero.Items.erase(newItem.id);
		return;
	}

	if(newItem) {
		// put new item in same collections as the old one
		var itemCollections = this._item.getCollections();
		for(var j=0; j<itemCollections.length; j++) {
			var collection = Zotero.Collections.get(itemCollections[j]);
			collection.addItem(newItem.id);
		}

		// put old item as a child of the new item
		this._item.setSource(newItem.id);
		this._item.save();
	}

	// add name
	this._progressWindow.document.getElementById("item-"+this._item.id+"-title").
		setAttribute("label", (newItem ? newItem.getField("title") : Zotero.getString(error)));
	// update icon
	this._progressWindow.document.getElementById("item-"+this._item.id+"-icon").
		setAttribute("src", (newItem ? Zotero_RecognizePDF_SUCCESS_IMAGE : Zotero_RecognizePDF_FAILURE_IMAGE));

	if(error == "recognizePDF.limit") {
		// now done, since we hit the query limit
		var error = Zotero.getString(error);
		for(var i in this._items) {
			this._progressWindow.document.getElementById("item-"+this._items[i].id+"-title").
				setAttribute("label", error);
			this._progressWindow.document.getElementById("item-"+this._items[i].id+"-icon").
				setAttribute("src", Zotero_RecognizePDF_FAILURE_IMAGE);
		}
		this._done();
	} else {
		// scroll to this item
		this._progressWindow.document.getElementById("tree").treeBoxObject.scrollToRow(Math.max(0, this._itemTotal-this._items.length-5));
		// continue recognizing
		this._recognizeItem();
	}
}

/*Zotero_RecognizePDF.ItemRecognizer.prototype._captchaCallback = function(img) {
	var io = {dataIn:img};
	Zotero.debug(img);
	this._progressWindow.openDialog("chrome://zotero/content/pdfCaptcha.xul", "", "chrome,modal,resizable=no", io);

	if(io.dataOut) return io.dataOut;

	this.stop();
	this._progressWindow.close();
	return false;
}*/

/**
 * @class PDF recognizer backend
 */
Zotero_RecognizePDF.Recognizer = function () {
    this._citeseeing = 1;
    this._crossref = 2;
    this._googlescholar = 4;
    this._mode = 7;
}

/**
 * Retrieves metadata for a PDF and saves it as an item
 *
 * @param {nsIFile} file The PDF file to retrieve metadata for
 * @param {Function} callback The function to be executed when recognition is complete
 * @param {Function} [captchaCallback] The function to be executed if a CAPTCHA is encountered
 *	(function will be passed image as URL and must return text of CAPTCHA)
 */
Zotero_RecognizePDF.Recognizer.prototype.recognize = function(file, libraryID, callback, captchaCallback) {
	// Save arguments to object so later recursive calls don't have to provide them.
	if (file) this._file = file;
	if (libraryID) this._libraryID = libraryID;
	if (callback) this._callback = callback;
    Zotero.debug("Trying to recognize " + this._file.path);

	if (this._mode & this._citeseeing) {
	    Zotero.debug("Checking citeseeing.com for PDF details.");
	    this._mode -= this._citeseeing;

	    this._queryCiteSeeing(this._file, this._libraryID);
	}
	else {
	    this._getLines(this._file);
	    if (this._mode & this._crossref) {
	        Zotero.debug("Checking for DOI then searching CrossRef for PDF details.");
	        this._mode -= this._crossref;

	        var allText = this._lines.join("\n");
        	Zotero.debug(allText);
        	var m = Zotero.Utilities.cleanDOI(allText);
        	if(m) {
        		this._queryCrossRef(m[0]);
        	}
        	else {
        	   this.recognize();
        	}
	    }
	    else if (this._mode & this._googlescholar) {
	        Zotero.debug("Searching GoogleScholar for PDF details.");
	        this._mode -= this._googlescholar;

	        // get (not quite) median length
        	var lineLengthsLength = this._lineLengths.length;
        	if(lineLengthsLength < 20) {
        		this._callback(false, "recognizePDF.noOCR");
        	}
        	else {
        		var sortedLengths = this._lineLengths.sort();
        		var medianLength = sortedLengths[Math.floor(lineLengthsLength/2)];

        		// pick lines within 4 chars of the median (this is completely arbitrary)
        		this._goodLines = [];
        		var uBound = medianLength + 4;
        		var lBound = medianLength - 4;
        		for (var i=0; i<lineLengthsLength; i++) {
        			if(this._lineLengths[i] > lBound && this._lineLengths[i] < uBound) {
        				// Strip quotation marks so they don't mess up search query quoting
        				var line = this._lines[i].replace('"', '');
        				this._goodLines.push(line);
        			}
        		}
        		this._startLine = this._iteration = 0;
        	}

        	if(lineLengthsLength >= 20) {
        		this._queryGoogle();
        	}
	    }
	    else {
	       this._callback(false, "recognizePDF.noMatches");
	    }
	}
}
Zotero_RecognizePDF.Recognizer.prototype._getLines = function(file) {
    if (this._lines && this._lineLengths) {
        return;
    }
    Zotero.debug("Running pdf2text and saving lines in first three pages.");
    const MAX_PAGES = 3;
	const lineRe = /^\s*([^\s]+(?: [^\s]+)+)/;

	var cacheFile = Zotero.getZoteroDirectory();
	cacheFile.append("recognizePDFcache.txt");
	if(cacheFile.exists()) {
		cacheFile.remove(false);
	}

	Zotero.debug('Running pdftotext -enc UTF-8 -nopgbrk '
				+ '-l ' + MAX_PAGES + ' "' + file.path + '" "'
				+ cacheFile.path + '"');

	var proc = Components.classes["@mozilla.org/process/util;1"].
			createInstance(Components.interfaces.nsIProcess);
	var exec = Zotero.getZoteroDirectory();
	exec.append(Zotero.Fulltext.pdfConverterFileName);
	proc.init(exec);

	var args = ['-enc', 'UTF-8', '-nopgbrk', '-layout', '-l', MAX_PAGES];
	args.push(file.path, cacheFile.path);
	proc.run(true, args, args.length);

	if(!cacheFile.exists()) {
		this._callback(false, "recognizePDF.couldNotRead");
		return;
	}

	var inputStream = Components.classes["@mozilla.org/network/file-input-stream;1"]
		.createInstance(Components.interfaces.nsIFileInputStream);
	inputStream.init(cacheFile, 0x01, 0664, 0);
	var intlStream = Components.classes["@mozilla.org/intl/converter-input-stream;1"]
		.createInstance(Components.interfaces.nsIConverterInputStream);
	intlStream.init(inputStream, "UTF-8", 65535,
		Components.interfaces.nsIConverterInputStream.DEFAULT_REPLACEMENT_CHARACTER);
	intlStream.QueryInterface(Components.interfaces.nsIUnicharLineInputStream);

	// get the lines in this sample
	this._lines = [];
	this._lineLengths = [];
	var str = {};
	while(intlStream.readLine(str)) {
		var line = lineRe.exec(str.value);
		if(line) {
			this._lines.push(line[1]);
			this._lineLengths.push(line[1].length);
		}
	}

	inputStream.close();
	cacheFile.remove(false);
}
/**
 * Tries to fetch an nsiFile pointer to the system's java binary.
 * Returns null if java cannot be found.
 * @private
 */
Zotero_RecognizePDF.Recognizer.prototype._getJava = function() {
    Zotero.debug("Getting java");

	var env = Components.classes["@mozilla.org/process/environment;1"].
          getService(Components.interfaces.nsIEnvironment);

    if (!env.exists("PATH"))
    {
        return null;
    }
    var path;
    if (Zotero.isWin)
    {
        path = env.get("PATH").split(";");
    }
    else
    {
        path = env.get("PATH").split(":");
    }

    for (var i = 0; i < path.length; i++)
    {
        var file = Components.classes["@mozilla.org/file/local;1"].
            createInstance(Components.interfaces.nsILocalFile);
        file.initWithPath(path[i]);
        file.append("java");
        if (file.exists()) return file;
    }
    return null;
}
/**
 * Queries citeseeing.com for details on the given pdf.
 * If details are found then they are stored in a file and imported using
 * the CiteLine translator. This file is then deleted.
 * Returns a Zotero.Item object reference if identified, null if not.
 * @private
 */
Zotero_RecognizePDF.Recognizer.prototype._queryCiteSeeing = function(pdf, libraryID) {
    var java = this._getJava();
    if (null == java) return null;
    Zotero.debug(java.path);
    var detailFile = Zotero.getZoteroDirectory();
    detailFile.append("pdfDetails.citeline");
    if(detailFile.exists()) {
	    detailFile.remove(false);
    }

    var jarFile = Zotero.getZoteroDirectory();
    jarFile.append("citeline.jar");
    if (!jarFile.exists()) {
        Zotero.debug("No citeline.jar");
        this.recognize();
        return null;
    }
    Zotero.debug("Running java -jar " + jarFile.path + " " + pdf.path + " " + detailFile.path);
	var proc = Components.classes["@mozilla.org/process/util;1"].
		createInstance(Components.interfaces.nsIProcess);
    proc.init(java);
	var args = ["-jar", jarFile.path, pdf.path, detailFile.path];
	proc.run(true, args, args.length);
	Zotero.debug("Run");
	if (!detailFile.exists()) {
        Zotero.debug("No details");
	    this.recognize();
	    return null;
	}
	    Zotero.debug("Got details");

	var data = "";
    var fstream = Components.classes["@mozilla.org/network/file-input-stream;1"].
                  createInstance(Components.interfaces.nsIFileInputStream);
    var cstream = Components.classes["@mozilla.org/intl/converter-input-stream;1"].
                  createInstance(Components.interfaces.nsIConverterInputStream);
    fstream.init(detailFile, -1, 0, 0);
    cstream.init(fstream, "UTF-8", 0, 0);

    var str = {};
    var read = 0;
    do {
        read = cstream.readString(0xffffffff, str);
        data += str.value;
    } while (read != 0);

    cstream.close();

    Zotero.debug(data);

	var me = this;
    var translate = new Zotero.Translate.Import;
	translate.setTranslator("17AF2C40-AAA5-4837-AB16-FDC54298AB42");
	var location = {"path":detailFile.path};
	translate.setLocation(location);
	translate.setString(data);
	translate.setHandler("itemDone", function(translate, item) {
	   Zotero.debug("Item done");
		me._callback(item);
	});
	translate.setHandler("select", function(translate, items, callback) {
		return me._selectItems(translate, items, callback);
	});
	translate.setHandler("done", function(translate, success) {
		if(!success) me.recognize();
	});
	translate.translate(this._libraryID, false);

	detailFile.remove(false);
}

Zotero_RecognizePDF.Recognizer.prototype._queryCrossRef = function(doi) {
	// use CrossRef to look for DOI
	var me = this;
	var translate = new Zotero.Translate("search");
	translate.setTranslator("11645bd1-0420-45c1-badb-53fb41eeb753");
	var item = {"itemType":"journalArticle", "DOI":doi};
	translate.setSearch(item);
	translate.setHandler("itemDone", function(translate, item) {
		me._callback(item);
	});
	translate.setHandler("select", function(translate, items, callback) {
		return me._selectItems(translate, items, callback);
	});
	translate.setHandler("done", function(translate, success) {
	    Zotero.debug("Finished search");
		if(!success) me.recognize();
	});
    Zotero.debug("Searching crossref for " + doi);
	translate.translate(this._libraryID, false);
}

/**
 * Queries Google Scholar for metadata for this PDF
 * @private
 */
Zotero_RecognizePDF.Recognizer.prototype._queryGoogle = function() {
	if(this._iteration > 3 || this._startLine >= this._goodLines.length) {
		try {
			if(this._hiddenBrowser) Zotero.Browser.deleteHiddenBrowser(me._hiddenBrowser);
		} catch(e) {}
		this.recognize();
		return;
	}
	this._iteration++;

	var queryString = "";
	var me = this;
	// take the relevant parts of some lines (exclude hyphenated word)
	var queryStringWords = 0;
	while(queryStringWords < 25 && this._startLine < this._goodLines.length) {
		var words = this._goodLines[this._startLine].split(/\s+/);
		// get rid of first and last words
		words.shift();
		words.pop();
		// make sure there are no long words (probably OCR mistakes)
		var skipLine = false;
		for(var i=0; i<words.length; i++) {
			if(words[i].length > 20) {
				skipLine = true;
				break;
			}
		}
		// add words to query
		if(!skipLine && words.length) {
			queryStringWords += words.length;
			queryString += '"'+words.join(" ")+'" ';
		}
		this._startLine++;
	}

	Zotero.debug("RecognizePDF: Query string "+queryString);

	// pass query string to Google Scholar and translate
	var url = "http://scholar.google.com/scholar?q="+encodeURIComponent(queryString)+"&hl=en&lr=&btnG=Search";
	if(!this._hiddenBrowser) {
		this._hiddenBrowser = Zotero.Browser.createHiddenBrowser();
		this._hiddenBrowser.docShell.allowImages = false;
	}

	var translate = new Zotero.Translate("web");
	translate.setTranslator("57a00950-f0d1-4b41-b6ba-44ff0fc30289");
	translate.setHandler("itemDone", function(translate, item) {
		Zotero.Browser.deleteHiddenBrowser(me._hiddenBrowser);
		me._callback(item);
	});
	translate.setHandler("select", function(translate, items, callback) {
		me._selectItems(translate, items, callback);
	});
	translate.setHandler("done", function(translate, success) {
		if(!success) me._queryGoogle();
	});

	this._hiddenBrowser.addEventListener("pageshow", function() { me._scrape(translate) }, true);

	this._hiddenBrowser.loadURIWithFlags(url,
		Components.interfaces.nsIWebNavigation.LOAD_FLAGS_BYPASS_HISTORY, null, null, null);
}

/**
 * To be executed when Google Scholar is loaded
 * @private
 */
Zotero_RecognizePDF.Recognizer.prototype._scrape = function(/**Zotero.Translate*/ translate) {
	if(this._hiddenBrowser.contentDocument.location.href == "about:blank") return;

	if(this._hiddenBrowser.contentDocument.title == "403 Forbidden") {
		// hit the captcha
		/*
		var forms = this._hiddenBrowser.contentDocument.getElementsByTagName("form");
		if(forms.length && forms[0].getAttribute("action") == "Captcha") {
			var captchaImage = forms[0].getElementsByTagName("img");
			var captchaBox = this._hiddenBrowser.contentDocument.getElementsByName("captcha");
			if(captchaImage.length && captchaBox.length && this._captchaCallback) {
				var text = this._captchaCallback(captchaImage[0].src);
				if(text) {
					captchaBox[0].value = text;
					forms[0].submit();
					return;
				}
			}
		}*/
		this._callback(false, "recognizePDF.limit");
		return;
	}

	this._hiddenBrowser.removeEventListener("pageshow", this._scrape.caller, true);
	translate.setDocument(this._hiddenBrowser.contentDocument);
	translate.translate(this._libraryID, false);
}

/**
 * Callback to pick first item in the Google Scholar item list
 * @private
 * @type Object
 */
Zotero_RecognizePDF.Recognizer.prototype._selectItems = function(/**Zotero.Translate*/ translate,
		/**Object*/ items, /**Function**/ callback) {
	for(var i in items) {
		var obj = {};
		obj[i] = items;
		callback(obj);
		return;
	}
}
	{
	"translatorID":"17AF2C40-AAA5-4837-AB16-FDC54298AB42",
	"translatorType":1,
	"label":"CiteLine",
	"creator":"William Mayor",
	"target":"citeline",
	"minVersion":"1.0.0b3.r1",
	"maxVersion":"",
	"priority":100,
	"browserSupport":"gcs",
	"configOptions":{"dataMode":"block"},
	"inRepository":false,
	"lastUpdated":"2011-07-21 19:58:00"
	}

	function detectImport() {
	Zotero.debug("detectImport");
	var title = Zotero.read("title".length).toLowerCase();
	return title == "title";
	}
	function doImport() {
	Zotero.debug("doImport");
	var details = getDetails();
	var item = makeItem(details['type']);

	var citeseeingToZotero = {"title":"title", "doi":"doi", "edition":"edition", "number":"issue", "pages":"pages", "report type":"reportType", "volume":"volume", "publisher": "publisher", "publication": ["place", "bookTitle", "publicationTitle"], "series":"series", "institution":"institution", "school":"university"};
	for (var where in citeseeingToZotero)
	{
	var value = details[where];
	if (citeseeingToZotero[where].constructor == Array) {
	for (var i = 0; i < citeseeingToZotero[where].length; i++) {
	item[citeseeingToZotero[where][i]] = value;
	}
	}
	else {
	item[citeseeingToZotero[where]] = value;
	}

	}

	var year = details["year"];
	if (year) item.date = year;
	var month = details["month"];
	if (month)
	{
	if (year)
	item.date += "-" + month;
	else
	item.date = month;
	}

	var authors = details['author'];
	if (authors) {
	if (authors.constructor != Array) {
	authors = [authors];
	}
	for (var i = 0; i < authors.length; i++) {
	item.creators.push(Zotero.Utilities.cleanAuthor(authors[i], "author"));
	}
	}

	var editors = details['editor'];
	if (editors) {
	if (editors.constructor != Array) {
	editors = [editors];
	}
	for (var i = 0; i < editors.length; i++) {
	item.creators.push(Zotero.Utilities.cleanAuthor(editors[i], "editor"));
	}
	}

	var urls = details['url'];
	if (urls) {
	if (urls.constructor != Array) {
	item.url = urls;
	}
	else {
	item.url = urls[0];
	item.attachments = [];
	for (var i = 1; i < urls.length; i++) {
	item.attachments.push({url:urls[i], title:"Alternate Link " + i, mimeType:"application/pdf"});
	}
	}

	}

	item.notes.push({note:"This document's details were found using citeseeing.com"});

	item.complete();
	}
	function getDetails() {
	var details = {};
	var pair = getNextPair();
	while (pair.length == 2) {
	var key = pair[0].toLowerCase();
	if (details[key])
	{
	if (details[key].constructor == Array) {
	details[key].push(pair[1]);
	}
	else {
	var arr = [details[key], pair[1]];
	details[key] = arr;
	}
	}
	else {
	details[key] = pair[1];
	}
	pair = getNextPair();
	}
	return details;
	}
	function getNextPair() {
	var line = "";
	var current;
	while ((current = Zotero.read(1)) && current != "\n") {
	line += current;
	}
	return line.split(": ");
	}
	function makeItem(type) {
	var name = "book";
	switch (type) {
	case "Article":
	case "Document":
	name = "journalArticle";
	break;
	case "In Collection":
	name = "bookSection";
	break;
	case "Conference":
	case "In Proceedings":
	name = "conferencePaper";
	break;
	case "Masters Thesis":
	case "PhD Thesis":
	name = "thesis";
	break;
	case "Tech Report":
	name = "report";
	break;
	case "Unpublished":
	name = "manuscript";
	break;
	}
	return new Zotero.Item(name);
	}
	{
	"translatorID":"F72D8E0A-E99F-4EF5-8674-5D3FD190FF5D",
	"translatorType":4,
	"label":"CiteSeeing",
	"creator":"William Mayor",
	"target":"^http://localhost/document/view/[0-9]+",
	"minVersion":"1.0.0b3.r1",
	"maxVersion":"",
	"priority":100,
	"inRepository":false,
	"lastUpdated":"2011-07-15 10:30:00"
	}

	function detectWeb(doc, url) {
	return "journalArticle";
	}

	function doWeb(doc, url) {

	var type = doc.evaluate("//tr[@id='type']/td/text()", doc, null, XPathResult.STRING_TYPE, null);

	var item = makeItem(type.stringValue);

	var citeseeingToZotero = {"Title":"title", "DOI":"doi", "Edition":"edition", "Number":"issue", "Pages":"pages", "Type":"reportType", "Volume":"volume"}
	for (var where in citeseeingToZotero)
	{
	var value = getValue(doc, where);
	if (value) item[citeseeingToZotero[where]] = value;
	}

	var year = getValue(doc, "Year");
	if (year) item.date = year;
	var month = getValue(doc, "Month");
	if (month)
	{
	if (year)
	item.date += "-" + month;
	else
	item.date = month;
	}

	var authors = doc.evaluate("//tr[@class='author']/td/a/text()", doc, null, XPathResult.UNORDERED_NODE_ITERATOR_TYPE, null);
	var author;
	while (author = authors.iterateNext()) {
	item.creators.push(Zotero.Utilities.cleanAuthor(author.textContent, "author"));
	}

	var editors = doc.evaluate("//tr[@class='editor']/td/a/text()", doc, null, XPathResult.UNORDERED_NODE_ITERATOR_TYPE, null);
	var editor;
	while (editor = editors.iterateNext()) {
	item.creators.push(Zotero.Utilities.cleanAuthor(editor.textContent, "editor"));
	}

	item.url = url;
	var urls = doc.evaluate("//tr[@class='url']/td/a/text()", doc, null, XPathResult.UNORDERED_NODE_ITERATOR_TYPE, null);
	var link, i = 1;
	while (link = urls.iterateNext()) {
	// It seems that Zotero will only accept one of these attachments
	if (i == 1) item.attachments = [];
	item.attachments.push({url:link.textContent, title:"CiteSeeing Link " + i++, mimeType:"application/pdf"});
	}

	var publisher = getCombined(doc, "publisher");
	if (publisher) item.publisher = publisher;
	var publication = getCombined(doc, "publication");
	if (publication) {
	item.place = publication;
	item.bookTitle = publication;
	item.publicationTitle = publication;
	}
	var series = getCombined(doc, "series");
	if (series) item.series = series;
	var institution = getCombined(doc, "institution");
	if (institution) item.institution = institution;
	var school = getCombined(doc, "school");
	if (school) item.university = school;

	item.notes.push({note:"This document's details were found using citeseeing.com"});

	item.complete();
	}

	function getCombined(doc, id) {
	var publisher = doc.evaluate("//tr[@id='" + id + "']/following-sibling::tr[position() < 3 and (@class='name' or @class='address')]/td[2]/text()", doc, null, XPathResult.UNORDERED_NODE_ITERATOR_TYPE, null);
	var value = publisher.iterateNext();
	var combined = null;
	if (value) combined = value.textContent;
	while (value = publisher.iterateNext()) {
	combined += ", " + value.textContent;
	}
	return combined;
	}


	function makeItem(type) {
	var name = "book";
	switch (type) {
	case "Article":
	case "Document":
	name = "journalArticle";
	break;
	case "In Collection":
	name = "bookSection";
	break;
	case "Conference":
	case "In Proceedings":
	name = "conferencePaper";
	break;
	case "Masters Thesis":
	case "PhD Thesis":
	name = "thesis";
	break;
	case "Tech Report":
	name = "report";
	break;
	case "Unpublished":
	name = "manuscript";
	break;
	}
	return new Zotero.Item(name);
	}

	function getValue(doc, type) {
	var result = doc.evaluate("//td[text()='" + type + "']/following-sibling::td/text()", doc, null, XPathResult.STRING_TYPE, null);
	return result.stringValue;
	}
	/*
	*** BEGIN LICENSE BLOCK ***

	Copyright © 2009 Center for History and New Media
	George Mason University, Fairfax, Virginia, USA
	http://zotero.org

	This file is part of Zotero.

	Zotero is free software: you can redistribute it and/or modify
	it under the terms of the GNU Affero General Public License as published by
	the Free Software Foundation, either version 3 of the License, or
	(at your option) any later version.

	Zotero is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	GNU Affero General Public License for more details.

	You should have received a copy of the GNU Affero General Public License
	along with Zotero. If not, see <http://www.gnu.org/licenses/>.

	*** END LICENSE BLOCK ***
	*/

	/**
	* @fileOverview Tools for automatically retrieving a citation for the given PDF
	*/
	const Zotero_RecognizePDF_SUCCESS_IMAGE = "chrome://zotero/skin/tick.png";
	const Zotero_RecognizePDF_FAILURE_IMAGE = "chrome://zotero/skin/cross.png";
	const Zotero_RecognizePDF_LOADING_IMAGE = "chrome://global/skin/icons/loading_16.png";

	/**
	* Front end for recognizing PDFs
	* @namespace
	*/
	var Zotero_RecognizePDF = new function() {
	var _progressWindow, _progressIndicator;

	/**
	* Checks whether a given PDF could theoretically be recognized
	* @returns {Boolean} True if the PDF can be recognized, false if it cannot be
	*/
	this.canRecognize = function(/*Zotero.Item/ item) {
	return (item.attachmentMIMEType &&
	item.attachmentMIMEType == "application/pdf" && !item.getSource());
	}

	/**
	* Retrieves metadata for the PDF(s) selected in the Zotero Pane, placing the PDFs as a children
	* of the new items
	*/
	this.recognizeSelected = function() {
	var installed = ZoteroPane_Local.checkPDFConverter();
	if (!installed) {
	return;
	}

	var items = ZoteroPane_Local.getSelectedItems();
	if (!items) return;
	var itemRecognizer = new Zotero_RecognizePDF.ItemRecognizer();
	itemRecognizer.recognizeItems(items);
	}
	}

	/**
	* @class Handles UI, etc. for recognizing multiple items
	*/
	Zotero_RecognizePDF.ItemRecognizer = function () {
	this._stopped = false;
	}

	/**
	* Retreives metadata for the PDF items passed, displaying a progress dialog during conversion
	* and placing the PDFs as a children of the new items
	* @param {Zotero.Item[]} items
	*/
	Zotero_RecognizePDF.ItemRecognizer.prototype.recognizeItems = function(items) {
	var me = this;
	this._items = items.slice();
	this._itemTotal = items.length;

	this._progressWindow = window.openDialog("chrome://zotero/content/pdfProgress.xul", "", "chrome,close=yes,resizable=yes,dependent,dialog,centerscreen");
	this._progressWindow.addEventListener("pageshow", function() { me._onWindowLoaded() }, false);
	}

	/**
	* Halts recognition of PDFs
	*/
	Zotero_RecognizePDF.ItemRecognizer.prototype.stop = function() {
	this._stopped = true;
	}

	/**
	* Called when the progress window has been opened; adds items to the tree and begins recognizing
	* @param
	*/
	Zotero_RecognizePDF.ItemRecognizer.prototype._onWindowLoaded = function() {
	// populate progress window
	var treechildren = this._progressWindow.document.getElementById("treechildren");
	for(var i in this._items) {
	var treeitem = this._progressWindow.document.createElement('treeitem');
	var treerow = this._progressWindow.document.createElement('treerow');

	var treecell = this._progressWindow.document.createElement('treecell');
	treecell.setAttribute("id", "item-"+this._items[i].id+"-icon");
	treerow.appendChild(treecell);

	treecell = this._progressWindow.document.createElement('treecell');
	treecell.setAttribute("label", this._items[i].getField("title"));
	treerow.appendChild(treecell);

	treecell = this._progressWindow.document.createElement('treecell');
	treecell.setAttribute("id", "item-"+this._items[i].id+"-title");
	treerow.appendChild(treecell);

	treeitem.appendChild(treerow);
	treechildren.appendChild(treeitem);
	}

	var me = this;
	this._progressIndicator = this._progressWindow.document.getElementById("progress-indicator");
	this._progressWindow.document.getElementById("cancel-button").addEventListener("command", function() {
	me.stop();
	me._progressWindow.close();
	}, false);
	this._progressWindow.addEventListener("close", function() { me.stop() }, false);
	this._recognizeItem();
	}

	/**
	* Shifts an item off of this._items and recognizes it, then calls itself again if there are more
	* @private
	*/
	Zotero_RecognizePDF.ItemRecognizer.prototype._recognizeItem = function() {
	if(!this._items.length) {
	this._done();
	return;
	}

	this._progressIndicator.value = (this._itemTotal-this._items.length)/this._itemTotal*100;
	this._item = this._items.shift();

	this._progressWindow.document.getElementById("item-"+this._item.id+"-icon").
	setAttribute("src", Zotero_RecognizePDF_LOADING_IMAGE);

	var file = this._item.getFile();
	if(file) {
	var recognizer = new Zotero_RecognizePDF.Recognizer();
	var me = this;
	recognizer.recognize(file, this._item.libraryID, function(newItem, error) { me._callback(newItem, error) });
	} else {
	this._callback(false, "recognizePDF.fileNotFound");
	}
	}

	/**
	* Cleans up after items are recognized, disabling the cancel button and making the progress window
	* close on blur
	*/
	Zotero_RecognizePDF.ItemRecognizer.prototype._done = function() {
	this._progressIndicator.value = 100;
	this._progressWindow.document.getElementById("cancel-button").label = Zotero.getString("recognizePDF.close.label");
	var me = this;
	this._progressWindow.addEventListener("blur",
	function() { me._progressWindow.setTimeout(function() { me._progressWindow.close() }, 2000) }, false);
	this._progressWindow.document.getElementById("label").value = Zotero.getString("recognizePDF.complete.label");
	}

	/**
	* Callback function to be executed upon recognition completion
	* @param {Zotero.Item\|Boolean} newItem The new item created from translation, or false if
	* recognition was unsuccessful
	* @param {String} [error] The error name, if recognition was unsuccessful.
	*/
	Zotero_RecognizePDF.ItemRecognizer.prototype._callback = function(newItem, error) {
	if(this._stopped) {
	if(newItem) Zotero.Items.erase(newItem.id);
	return;
	}

	if(newItem) {
	// put new item in same collections as the old one
	var itemCollections = this._item.getCollections();
	for(var j=0; j<itemCollections.length; j++) {
	var collection = Zotero.Collections.get(itemCollections[j]);
	collection.addItem(newItem.id);
	}

	// put old item as a child of the new item
	this._item.setSource(newItem.id);
	this._item.save();
	}

	// add name
	this._progressWindow.document.getElementById("item-"+this._item.id+"-title").
	setAttribute("label", (newItem ? newItem.getField("title") : Zotero.getString(error)));
	// update icon
	this._progressWindow.document.getElementById("item-"+this._item.id+"-icon").
	setAttribute("src", (newItem ? Zotero_RecognizePDF_SUCCESS_IMAGE : Zotero_RecognizePDF_FAILURE_IMAGE));

	if(error == "recognizePDF.limit") {
	// now done, since we hit the query limit
	var error = Zotero.getString(error);
	for(var i in this._items) {
	this._progressWindow.document.getElementById("item-"+this._items[i].id+"-title").
	setAttribute("label", error);
	this._progressWindow.document.getElementById("item-"+this._items[i].id+"-icon").
	setAttribute("src", Zotero_RecognizePDF_FAILURE_IMAGE);
	}
	this._done();
	} else {
	// scroll to this item
	this._progressWindow.document.getElementById("tree").treeBoxObject.scrollToRow(Math.max(0, this._itemTotal-this._items.length-5));
	// continue recognizing
	this._recognizeItem();
	}
	}

	/*Zotero_RecognizePDF.ItemRecognizer.prototype._captchaCallback = function(img) {
	var io = {dataIn:img};
	Zotero.debug(img);
	this._progressWindow.openDialog("chrome://zotero/content/pdfCaptcha.xul", "", "chrome,modal,resizable=no", io);

	if(io.dataOut) return io.dataOut;

	this.stop();
	this._progressWindow.close();
	return false;
	}*/

	/**
	* @class PDF recognizer backend
	*/
	Zotero_RecognizePDF.Recognizer = function () {
	this._citeseeing = 1;
	this._crossref = 2;
	this._googlescholar = 4;
	this._mode = 7;
	}

	/**
	* Retrieves metadata for a PDF and saves it as an item
	*
	* @param {nsIFile} file The PDF file to retrieve metadata for
	* @param {Function} callback The function to be executed when recognition is complete
	* @param {Function} [captchaCallback] The function to be executed if a CAPTCHA is encountered
	* (function will be passed image as URL and must return text of CAPTCHA)
	*/
	Zotero_RecognizePDF.Recognizer.prototype.recognize = function(file, libraryID, callback, captchaCallback) {
	// Save arguments to object so later recursive calls don't have to provide them.
	if (file) this._file = file;
	if (libraryID) this._libraryID = libraryID;
	if (callback) this._callback = callback;
	Zotero.debug("Trying to recognize " + this._file.path);

	if (this._mode & this._citeseeing) {
	Zotero.debug("Checking citeseeing.com for PDF details.");
	this._mode -= this._citeseeing;

	this._queryCiteSeeing(this._file, this._libraryID);
	}
	else {
	this._getLines(this._file);
	if (this._mode & this._crossref) {
	Zotero.debug("Checking for DOI then searching CrossRef for PDF details.");
	this._mode -= this._crossref;

	var allText = this._lines.join("\n");
	Zotero.debug(allText);
	var m = Zotero.Utilities.cleanDOI(allText);
	if(m) {
	this._queryCrossRef(m[0]);
	}
	else {
	this.recognize();
	}
	}
	else if (this._mode & this._googlescholar) {
	Zotero.debug("Searching GoogleScholar for PDF details.");
	this._mode -= this._googlescholar;

	// get (not quite) median length
	var lineLengthsLength = this._lineLengths.length;
	if(lineLengthsLength < 20) {
	this._callback(false, "recognizePDF.noOCR");
	}
	else {
	var sortedLengths = this._lineLengths.sort();
	var medianLength = sortedLengths[Math.floor(lineLengthsLength/2)];

	// pick lines within 4 chars of the median (this is completely arbitrary)
	this._goodLines = [];
	var uBound = medianLength + 4;
	var lBound = medianLength - 4;
	for (var i=0; i<lineLengthsLength; i++) {
	if(this._lineLengths[i] > lBound && this._lineLengths[i] < uBound) {
	// Strip quotation marks so they don't mess up search query quoting
	var line = this._lines[i].replace('"', '');
	this._goodLines.push(line);
	}
	}
	this._startLine = this._iteration = 0;
	}

	if(lineLengthsLength >= 20) {
	this._queryGoogle();
	}
	}
	else {
	this._callback(false, "recognizePDF.noMatches");
	}
	}
	}
	Zotero_RecognizePDF.Recognizer.prototype._getLines = function(file) {
	if (this._lines && this._lineLengths) {
	return;
	}
	Zotero.debug("Running pdf2text and saving lines in first three pages.");
	const MAX_PAGES = 3;
	const lineRe = /^\s*([^\s]+(?: [^\s]+)+)/;

	var cacheFile = Zotero.getZoteroDirectory();
	cacheFile.append("recognizePDFcache.txt");
	if(cacheFile.exists()) {
	cacheFile.remove(false);
	}

	Zotero.debug('Running pdftotext -enc UTF-8 -nopgbrk '
	+ '-l ' + MAX_PAGES + ' "' + file.path + '" "'
	+ cacheFile.path + '"');

	var proc = Components.classes["@mozilla.org/process/util;1"].
	createInstance(Components.interfaces.nsIProcess);
	var exec = Zotero.getZoteroDirectory();
	exec.append(Zotero.Fulltext.pdfConverterFileName);
	proc.init(exec);

	var args = ['-enc', 'UTF-8', '-nopgbrk', '-layout', '-l', MAX_PAGES];
	args.push(file.path, cacheFile.path);
	proc.run(true, args, args.length);

	if(!cacheFile.exists()) {
	this._callback(false, "recognizePDF.couldNotRead");
	return;
	}

	var inputStream = Components.classes["@mozilla.org/network/file-input-stream;1"]
	.createInstance(Components.interfaces.nsIFileInputStream);
	inputStream.init(cacheFile, 0x01, 0664, 0);
	var intlStream = Components.classes["@mozilla.org/intl/converter-input-stream;1"]
	.createInstance(Components.interfaces.nsIConverterInputStream);
	intlStream.init(inputStream, "UTF-8", 65535,
	Components.interfaces.nsIConverterInputStream.DEFAULT_REPLACEMENT_CHARACTER);
	intlStream.QueryInterface(Components.interfaces.nsIUnicharLineInputStream);

	// get the lines in this sample
	this._lines = [];
	this._lineLengths = [];
	var str = {};
	while(intlStream.readLine(str)) {
	var line = lineRe.exec(str.value);
	if(line) {
	this._lines.push(line[1]);
	this._lineLengths.push(line[1].length);
	}
	}

	inputStream.close();
	cacheFile.remove(false);
	}
	/**
	* Tries to fetch an nsiFile pointer to the system's java binary.
	* Returns null if java cannot be found.
	* @private
	*/
	Zotero_RecognizePDF.Recognizer.prototype._getJava = function() {
	Zotero.debug("Getting java");

	var env = Components.classes["@mozilla.org/process/environment;1"].
	getService(Components.interfaces.nsIEnvironment);

	if (!env.exists("PATH"))
	{
	return null;
	}
	var path;
	if (Zotero.isWin)
	{
	path = env.get("PATH").split(";");
	}
	else
	{
	path = env.get("PATH").split(":");
	}

	for (var i = 0; i < path.length; i++)
	{
	var file = Components.classes["@mozilla.org/file/local;1"].
	createInstance(Components.interfaces.nsILocalFile);
	file.initWithPath(path[i]);
	file.append("java");
	if (file.exists()) return file;
	}
	return null;
	}
	/**
	* Queries citeseeing.com for details on the given pdf.
	* If details are found then they are stored in a file and imported using
	* the CiteLine translator. This file is then deleted.
	* Returns a Zotero.Item object reference if identified, null if not.
	* @private
	*/
	Zotero_RecognizePDF.Recognizer.prototype._queryCiteSeeing = function(pdf, libraryID) {
	var java = this._getJava();
	if (null == java) return null;
	Zotero.debug(java.path);
	var detailFile = Zotero.getZoteroDirectory();
	detailFile.append("pdfDetails.citeline");
	if(detailFile.exists()) {
	detailFile.remove(false);
	}

	var jarFile = Zotero.getZoteroDirectory();
	jarFile.append("citeline.jar");
	if (!jarFile.exists()) {
	Zotero.debug("No citeline.jar");
	this.recognize();
	return null;
	}
	Zotero.debug("Running java -jar " + jarFile.path + " " + pdf.path + " " + detailFile.path);
	var proc = Components.classes["@mozilla.org/process/util;1"].
	createInstance(Components.interfaces.nsIProcess);
	proc.init(java);
	var args = ["-jar", jarFile.path, pdf.path, detailFile.path];
	proc.run(true, args, args.length);
	Zotero.debug("Run");
	if (!detailFile.exists()) {
	Zotero.debug("No details");
	this.recognize();
	return null;
	}
	Zotero.debug("Got details");

	var data = "";
	var fstream = Components.classes["@mozilla.org/network/file-input-stream;1"].
	createInstance(Components.interfaces.nsIFileInputStream);
	var cstream = Components.classes["@mozilla.org/intl/converter-input-stream;1"].
	createInstance(Components.interfaces.nsIConverterInputStream);
	fstream.init(detailFile, -1, 0, 0);
	cstream.init(fstream, "UTF-8", 0, 0);

	var str = {};
	var read = 0;
	do {
	read = cstream.readString(0xffffffff, str);
	data += str.value;
	} while (read != 0);

	cstream.close();

	Zotero.debug(data);

	var me = this;
	var translate = new Zotero.Translate.Import;
	translate.setTranslator("17AF2C40-AAA5-4837-AB16-FDC54298AB42");
	var location = {"path":detailFile.path};
	translate.setLocation(location);
	translate.setString(data);
	translate.setHandler("itemDone", function(translate, item) {
	Zotero.debug("Item done");
	me._callback(item);
	});
	translate.setHandler("select", function(translate, items, callback) {
	return me._selectItems(translate, items, callback);
	});
	translate.setHandler("done", function(translate, success) {
	if(!success) me.recognize();
	});
	translate.translate(this._libraryID, false);

	detailFile.remove(false);
	}

	Zotero_RecognizePDF.Recognizer.prototype._queryCrossRef = function(doi) {
	// use CrossRef to look for DOI
	var me = this;
	var translate = new Zotero.Translate("search");
	translate.setTranslator("11645bd1-0420-45c1-badb-53fb41eeb753");
	var item = {"itemType":"journalArticle", "DOI":doi};
	translate.setSearch(item);
	translate.setHandler("itemDone", function(translate, item) {
	me._callback(item);
	});
	translate.setHandler("select", function(translate, items, callback) {
	return me._selectItems(translate, items, callback);
	});
	translate.setHandler("done", function(translate, success) {
	Zotero.debug("Finished search");
	if(!success) me.recognize();
	});
	Zotero.debug("Searching crossref for " + doi);
	translate.translate(this._libraryID, false);
	}

	/**
	* Queries Google Scholar for metadata for this PDF
	* @private
	*/
	Zotero_RecognizePDF.Recognizer.prototype._queryGoogle = function() {
	if(this._iteration > 3 \|\| this._startLine >= this._goodLines.length) {
	try {
	if(this._hiddenBrowser) Zotero.Browser.deleteHiddenBrowser(me._hiddenBrowser);
	} catch(e) {}
	this.recognize();
	return;
	}
	this._iteration++;

	var queryString = "";
	var me = this;
	// take the relevant parts of some lines (exclude hyphenated word)
	var queryStringWords = 0;
	while(queryStringWords < 25 && this._startLine < this._goodLines.length) {
	var words = this._goodLines[this._startLine].split(/\s+/);
	// get rid of first and last words
	words.shift();
	words.pop();
	// make sure there are no long words (probably OCR mistakes)
	var skipLine = false;
	for(var i=0; i<words.length; i++) {
	if(words[i].length > 20) {
	skipLine = true;
	break;
	}
	}
	// add words to query
	if(!skipLine && words.length) {
	queryStringWords += words.length;
	queryString += '"'+words.join(" ")+'" ';
	}
	this._startLine++;
	}

	Zotero.debug("RecognizePDF: Query string "+queryString);

	// pass query string to Google Scholar and translate
	var url = "http://scholar.google.com/scholar?q="+encodeURIComponent(queryString)+"&hl=en&lr=&btnG=Search";
	if(!this._hiddenBrowser) {
	this._hiddenBrowser = Zotero.Browser.createHiddenBrowser();
	this._hiddenBrowser.docShell.allowImages = false;
	}

	var translate = new Zotero.Translate("web");
	translate.setTranslator("57a00950-f0d1-4b41-b6ba-44ff0fc30289");
	translate.setHandler("itemDone", function(translate, item) {
	Zotero.Browser.deleteHiddenBrowser(me._hiddenBrowser);
	me._callback(item);
	});
	translate.setHandler("select", function(translate, items, callback) {
	me._selectItems(translate, items, callback);
	});
	translate.setHandler("done", function(translate, success) {
	if(!success) me._queryGoogle();
	});

	this._hiddenBrowser.addEventListener("pageshow", function() { me._scrape(translate) }, true);

	this._hiddenBrowser.loadURIWithFlags(url,
	Components.interfaces.nsIWebNavigation.LOAD_FLAGS_BYPASS_HISTORY, null, null, null);
	}

	/**
	* To be executed when Google Scholar is loaded
	* @private
	*/
	Zotero_RecognizePDF.Recognizer.prototype._scrape = function(/*Zotero.Translate/ translate) {
	if(this._hiddenBrowser.contentDocument.location.href == "about:blank") return;

	if(this._hiddenBrowser.contentDocument.title == "403 Forbidden") {
	// hit the captcha
	/*
	var forms = this._hiddenBrowser.contentDocument.getElementsByTagName("form");
	if(forms.length && forms[0].getAttribute("action") == "Captcha") {
	var captchaImage = forms[0].getElementsByTagName("img");
	var captchaBox = this._hiddenBrowser.contentDocument.getElementsByName("captcha");
	if(captchaImage.length && captchaBox.length && this._captchaCallback) {
	var text = this._captchaCallback(captchaImage[0].src);
	if(text) {
	captchaBox[0].value = text;
	forms[0].submit();
	return;
	}
	}
	}*/
	this._callback(false, "recognizePDF.limit");
	return;
	}

	this._hiddenBrowser.removeEventListener("pageshow", this._scrape.caller, true);
	translate.setDocument(this._hiddenBrowser.contentDocument);
	translate.translate(this._libraryID, false);
	}

	/**
	* Callback to pick first item in the Google Scholar item list
	* @private
	* @type Object
	*/
	Zotero_RecognizePDF.Recognizer.prototype._selectItems = function(/*Zotero.Translate/ translate,
	/*Object/ items, /Function/ callback) {
	for(var i in items) {
	var obj = {};
	obj[i] = items;
	callback(obj);
	return;
	}
	}