jhotmann/ScriptDocumentAssembler.java Secret

## classify.xml
<Rules>
    <!--<Rule>
        <Comment>Optional comment for logging</Comment>
        <NewDocType>Doc type to set document to if all rules pass</NewDocType>
        <SourceDocType>Optional document type filter, separate multiple doc types with a pipe '|'</SourceDocType>
        <Priority>Rule priority, higher numbers take precedence, in case of a tie whichever comes first wins</Priority>
        <Criteria>
            <Criterion>
                <Type>hocr (default), doctype, plf/kvpp, dlf/kve, file, path, email</Type>
                <Page>Page number within document, default is 1</Page>
                <FieldName>Name of page or document level field or email header</FieldName>
                <Operator>equals (default), same, distance, matches, not-matches, contains, not-contains, has-value, not-has-value, starts-with, or ends-with</Operator>
                <Value>Value to compare criteria with (not to be used with same, has-value, or not-has-value)</Value>
            </Criterion>
        </Criteria>
    </Rule>-->
    <Rule>
        <Comment>Classify Based on HOCR</Comment>
        <NewDocType>ExampleDocType</NewDocType>
        <Priority>50</Priority>
        <Criteria>
            <Criterion>
                <Type>hocr</Type>
                <Operator>matches</Operator>
                <Value>(?i)^.*(some-domain.com|\bSome Other Key-phrase\b).*$</Value>
            </Criterion>
            <Criterion>
                <Type>hocr</Type>
                <Operator>not-matches</Operator>
                <Value>(?i)^.*(Page can't contain this text. Remove if not necessary).*$</Value>
            </Criterion>
        </Criteria>
    </Rule>
    <Rule>
        <Comment>Classify Based on Email Header</Comment>
        <NewDocType>ExampleDocType</NewDocType>
        <Priority>100</Priority>
        <Criteria>
            <Criterion>
                <Type>email</Type>
                <FieldName>From</FieldName>
                <Operator>matches</Operator>
                <Value>^(?i).*@some-domain.com.*$</Value>
            </Criterion>
        </Criteria>
    </Rule>
</Rules>

## merge.xml
<Rules>
    <!--<Rule>
        <Comment>Optional Comment for Logging</Comment>
        <FirstDoc>1 (default) or 2</FirstDoc>
        <Criteria>
            <Criterion>
                <Type>hocr (default), doctype, plf/kvpp, dlf/kve, file, path, email</Type>
                <DocumentNumber>1, 2, or blank for both</DocumentNumber>
                <FieldName>Name of page or document level field or email header</FieldName>
                <Operator>equals (default), same, distance, matches, not-matches, contains, not-contains, has-value, not-has-value, starts-with, or ends-with</Operator>
                <Value>Value to compare criteria with (not to be used with same, has-value, or not-has-value)</Value>
            </Criterion>
        </Criteria>
    </Rule>-->
    <Rule>
        <Comment>Merge pages with little or no content to previous doc</Comment>
        <FirstDoc>1</FirstDoc>
        <Criteria>
            <Criterion>
                <Type>hocr</Type>
                <DocumentNumber>2</DocumentNumber>
                <Operator>matches</Operator>
                <Value>^.{0,200}$</Value>
            </Criterion>
        </Criteria>
    </Rule>
</Rules>

## ScriptDocumentAssembler.java
import com.ephesoft.dcma.script.IJDomScript;
import com.ephesoft.dcma.util.logger.EphesoftLogger;
import com.ephesoft.dcma.util.logger.ScriptLoggerFactory;
import org.apache.commons.io.FileUtils;
import org.jdom.*;
import org.jdom.input.SAXBuilder;
import org.jdom.output.Format;
import org.jdom.output.XMLOutputter;
import org.jdom.xpath.XPath;

import javax.naming.Context;
import javax.naming.InitialContext;
import javax.sql.DataSource;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.StringReader;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.*;

public class ScriptDocumentAssembler implements IJDomScript {
	private static EphesoftLogger LOGGER = ScriptLoggerFactory.getLogger(ScriptDocumentAssembler.class);

	private String SCRIPT_NAME = this.getClass().getSimpleName();
	private boolean IS_DEBUG = System.getProperty("os.name").matches("(?i)Mac OS X|Windows 10");
	private String BATCHNAME_VALUE;
	private String BATCHLOCALPATH_VALUE;
	private String BATCHUNCPATH_VALUE;
	private String BATCHINSTANCEID_VALUE;
	private String BATCHCLASSID_VALUE;
	private String PATH_TO_SCRIPT_CONFIG;
	private String SCRIPT_CONFIG_FILE;
	private Properties SCRIPT_PROPS;
	private Element ROOT;
	private Connection DB_CONNECTION = null;

	public Object execute(Document documentFile, String methodName, String documentIdentifier) {
		Exception exception = null;
		printDebug("*************  Start execution of the " + SCRIPT_NAME);
		try {
			if (null == documentFile) {
				throw (new Exception("Input document is null."));
			}

			init(documentFile);

			printDebug("*************  Batch " + BATCHINSTANCEID_VALUE + " - " + BATCHNAME_VALUE);

			if (Boolean.parseBoolean(getPropValue("RunClassificationRules", "true"))) {
				classify(getPropValue("ClassificationConfig", "classify.xml"));
			}

			if (Boolean.parseBoolean(getPropValue("RunMergeRules", "true"))) {
				merge(getPropValue("MergeConfig", "merge.xml"));
			}

			reassignDocIdentifier();

			if (IS_DEBUG) writeXmlFile(System.getProperty("user.home") + "/Downloads/" + SCRIPT_NAME + "-out.xml", documentFile);

		} catch (Exception e) {
			exception = e;
			if (IS_DEBUG) e.printStackTrace();
			else LOGGER.error("ERROR!!! - " + e.getMessage());
		} finally {
			if (DB_CONNECTION != null) {
				try { DB_CONNECTION.close(); } catch (SQLException e) { e.printStackTrace(); }
			}
		}
		printDebug("*************  End execution of the " + SCRIPT_NAME);
		return exception;
	}

	@SuppressWarnings("unchecked")
	private void classify(String configName) throws JDOMException, IOException, SQLException {
		printDebug("***** Begin Classification *****");
		String configPath = concatPath(PATH_TO_SCRIPT_CONFIG, configName);
		// Check for classification xml
		if (!fileExists(configPath)) {
			printDebug(configName, "not found!");
			return;
		}
		printDebug(configName, "found");
		// Parse xml for classification rules
		Element config = readXmlFile(configPath).getRootElement();
		// Loop through documents and classify pages
		List<Element> docList = XPath.newInstance("//Document").selectNodes(ROOT);
		for (Element doc : docList) {
			String docId = doc.getChildText(XMLGenerics.IDENTIFIER);
			printDebug("Comparing", docId, "to classification rules");
			String docType = doc.getChildText(XMLGenerics.TYPE);
			List<Element> classifyRules = config.getChildren("Rule");
			String newDocType = "";
			String newDocTypeDesc = "";
			int newDocTypeConfThresh = 100;
			int priority = 0;
			// Loop through classification rules
			for (Element rule : classifyRules) {
				// NewDocType
				String ruleNewDocType = getChildText(rule, "NewDocType");
				if (ruleNewDocType.isEmpty()) {
					printDebug("NewDocType is not set, skipping rule");
					continue;
				}
				String comment = getChildText(rule, "Comment");
				if (!comment.isEmpty()) printDebug("Rule:", comment);
				// SourceDocType
				if (!getChildText(rule, "SourceDocType").isEmpty()) {
					List<String> sourceDocTypes = Arrays.asList(getChildText(rule, "SourceDocType").split("\\|"));
					if (!sourceDocTypes.contains(docType)) {
						printDebug("The document type", docType, "does not match the rule's doc type(s):", String.join(", ", sourceDocTypes));
						continue;
					}
				}
				// Priority
				int rulePriority = (!getChildText(rule, "Priority").isEmpty() ?
						Integer.parseInt(getChildText(rule, "Priority")) : 1);
				if (rulePriority < priority) {
					printDebug("Rule priority lower than highest matching priority, skipping rule");
					continue;
				}
				boolean criteriaMet = true;
				// Loop through all criteria for classification rule
				List<Element> criteria = XPath.newInstance(".//Criterion").selectNodes(rule);
				for (Element criterion : criteria) {
					String criterionType = getChildText(criterion, "Type");
					String criterionPage = (!getChildText(criterion, "Page").isEmpty() ? getChildText(criterion, "Page") : "1");
					String criterionField = getChildText(criterion, "FieldName");
					String criterionOperator = getChildText(criterion, "Operator");
					String criterionValue = getChildText(criterion, "Value");
					// Comparison source
					String source = getDocDetail(doc, criterionType, criterionField, criterionPage);
					// Check if criterion met
					criteriaMet = checkCriteria(criterionOperator, criterionValue, source);
					printDebug("Criterion:", docId, "\"" + source + "\"", criterionOperator,
							(!criterionOperator.contains("has-value") ? "\"" + criterionValue + "\"." : "-"),
							(criteriaMet ? "Passed!" : "Failed"));
					if (!criteriaMet) break;
				}
				if (!criteriaMet) {
					printDebug("Criteria not met.");
					continue;
				}
				if (rulePriority <= priority) {
					printDebug("Criteria met, but rule priority lower than previous rule(s)");
					continue;
				}
				printDebug("All criteria met and rule is highest priority so far: ", rulePriority);
				newDocType = ruleNewDocType;
				newDocTypeDesc = getDocTypeDescription(ruleNewDocType);
				newDocTypeConfThresh = getDocTypeConfidenceThreshold(ruleNewDocType);
				priority = rulePriority;
			}
			if (newDocType.isEmpty()) continue;
			// At least one classification rule passed, setting doc type to doc type of highest priority rule
			printDebug("Setting", docId, "to:", newDocType);
			doc.getChild(XMLGenerics.TYPE).setText(newDocType);
			doc.getChild(XMLGenerics.DESCRIPTION).setText(newDocTypeDesc);
			doc.getChild(XMLGenerics.CONFIDENCE_THRESHOLD).setText(newDocTypeConfThresh + ".0");
			if (Boolean.parseBoolean(getPropValue("SetConfidenceToPriority", "true"))) {
				setChildText(doc, XMLGenerics.CONFIDENCE, priority + ".00");
			} else {
				setChildText(doc, XMLGenerics.CONFIDENCE, "100.00");
			}
		}
	}

	@SuppressWarnings("unchecked")
	private void merge(String configName) throws JDOMException, IOException {
		printDebug("***** Begin Merges *****");
		String configPath = concatPath(PATH_TO_SCRIPT_CONFIG, configName);
		// Check for merge xml
		if (!fileExists(configPath)) {
			printDebug(configName, "not found!");
			return;
		}
		printDebug(configName, "found");
		// Parse xml for merge rules
		Element config = readXmlFile(configPath).getRootElement();
		// Loop through rules and merge pages
		List<Element> rules = config.getChildren("Rule");
		for (Element rule : rules) {
			String ruleComment = getChildText(rule, "Comment");
			String firstDoc = getChildText(rule, "FirstDoc");
			if (!ruleComment.isEmpty()) printDebug("--", ruleComment, "--");
			List<Element> docList = XPath.newInstance("//Document").selectNodes(ROOT);
			// Loop through all documents for each rule
			for (int i = 0; i < docList.size() - 1; i++) {
				Element doc1 = docList.get(i);
				Element doc2 = docList.get(i + 1);
				String docId1 = doc1.getChildText(XMLGenerics.IDENTIFIER);
				String docId2 = doc2.getChildText(XMLGenerics.IDENTIFIER);
				boolean criteriaMet = true;
				// Check all criteria against the documents
				List<Element> criteria = rule.getChild("Criteria").getChildren("Criterion");
				for (Element criterion : criteria) {
					String criterionType = getChildText(criterion, "Type");
					String criterionDocNumber = getChildText(criterion, "DocumentNumber");
					String criterionField = getChildText(criterion, "FieldName");
					String criterionOperator = getChildText(criterion, "Operator");
					String criterionValue = getChildText(criterion, "Value");
					// Get source value for each document
					String source1 = getDocDetail(doc1, criterionType, criterionField, null);
					String source2 = getDocDetail(doc2, criterionType, criterionField, null);
					// Check if criterion met
					if (criterionOperator.equals("same")) {
						criteriaMet = checkCriteria(criterionOperator, criterionValue, source1, source2);
					} else {
						if (criterionDocNumber.equals("1")) {
							criteriaMet = checkCriteria(criterionOperator, criterionValue, source1);
						} else if (criterionDocNumber.equals("2")) {
							criteriaMet = checkCriteria(criterionOperator, criterionValue, source2);
						} else {
							if (criterionValue.isEmpty()) criteriaMet = checkCriteria(criterionOperator, source1, source1, source2);
							else criteriaMet = checkCriteria(criterionOperator, criterionValue, source1, source2);
						}
					}
					// Log results
					if (!criterionOperator.matches("same|.*has-value|distance")) {
						if (criterionDocNumber.equals("1")) {
							printDebug("Criterion:", docId1, "\"" + source1 + "\"", criterionOperator, "\"" + criterionValue + "\".", (criteriaMet ? "Passed!" : "Failed"));
						} else if (criterionDocNumber.equals("2")) {
							printDebug("Criterion:", docId2, "\"" + source2 + "\"", criterionOperator, "\"" + criterionValue + "\".", (criteriaMet ? "Passed!" : "Failed"));
						} else {
							printDebug("Criterion:", docId1, "\"" + source1 + "\"", "and", docId2, "\"" + source2 + "\"", criterionOperator, "\"" + criterionValue + "\".", (criteriaMet ? "Passed!" : "Failed"));
						}
					} else if (criterionOperator.equals("same")) {
						printDebug("Criterion:", docId1, "\"" + source1 + "\"", "and", docId2, "\"" + source2 + "\"", "are the same.", (criteriaMet ? "Passed!" : "Failed"));
					} else if (criterionOperator.equals("distance")) {
						printDebug("Criterion:", docId1, "\"" + source1 + "\"", "and", docId2, "\"" + source2 + "\"", "distance less than or equal to", criterionValue + ".", (criteriaMet ? "Passed!" : "Failed"));
					} else if (criterionOperator.matches(".*has-value")) {
						if (criterionDocNumber.equals("1")) {
							printDebug("Criterion:", docId1, "\"" + source1 + "\"", criterionOperator + ".", (criteriaMet ? "Passed!" : "Failed"));
						} else if (criterionDocNumber.equals("2")) {
							printDebug("Criterion:", docId2, "\"" + source2 + "\"", criterionOperator + ".", (criteriaMet ? "Passed!" : "Failed"));
						} else {
							printDebug("Criterion:", docId1, "\"" + source1 + "\"", "and/or", docId2, "\"" + source2 + "\"", criterionOperator + ".", (criteriaMet ? "Passed!" : "Failed"));
						}
					}
					// Stop checking criteria if not met
					if (!criteriaMet) break;
				}
				// Move on to next document if criteria not met
				if (!criteriaMet) {
					printDebug("Criteria not met");
					continue;
				}
				// Merge documents if all criteria met
				printDebug("Criteria met!");
				mergeDocuments(doc1, doc2, (firstDoc.equals("2") ? 2 : 1));
				docList = XPath.newInstance("//Document").selectNodes(ROOT);
				i--;
			}
		}
	}

	private String getDocDetail(Element doc, String detailType, String fieldName, String pageNumber) throws JDOMException, IOException {
		pageNumber = (pageNumber != null ? pageNumber : "1");
		switch (detailType.toLowerCase()) {
			case "doctype": return doc.getChildText(XMLGenerics.TYPE);
			case "plf":
			case "kvpp": return getPlfValue(fieldName, doc);
			case "dlf":
			case "kve": return getDlfValue(doc, fieldName);
			case "file": {
				Text fileNameText = (Text) XPath.newInstance(".//" + XMLGenerics.PAGES + "/" + XMLGenerics.PAGE + "[position()=" + pageNumber + "]" + "/" + XMLGenerics.OLD_FILE_NAME + "/text()").selectSingleNode(doc);
				return (fileNameText != null ? fileNameText.getValue().replaceAll("-\\d{4}-\\d{4}\\.[A-z]+$", "") : "");
			}
			case "path": return BATCHUNCPATH_VALUE;
			case "email": return getEmailHeader(fieldName);
			default: {
				Element page = (Element) XPath.newInstance(".//" + XMLGenerics.PAGES + "/" + XMLGenerics.PAGE + "[position()=" + pageNumber + "]").selectSingleNode(doc);
				return getHocrContent(page);
			}
		}
	}

	private boolean checkCriteria(String operator, String value, String... sources) {
		List<String> sourceList = Arrays.asList(sources);
		switch (operator.toLowerCase()) {
			case "same": return (sourceList.size() > 1 && sourceList.stream().distinct().count() == 1);
			case "distance": return (sourceList.size() > 1 && sourceList.stream().allMatch(s -> distance(sourceList.get(0), s) <= Integer.parseInt(value)));
			case "not-equals": return sourceList.stream().noneMatch(s -> s.equals(value));
			case "matches": return sourceList.stream().allMatch(s -> s.matches(value));
			case "not-matches": return sourceList.stream().noneMatch(s -> s.matches(value));
			case "contains": return sourceList.stream().allMatch(s -> s.contains(value));
			case "not-contains": return sourceList.stream().noneMatch(s -> s.contains(value));
			case "has-value": return sourceList.stream().noneMatch(String::isEmpty);
			case "not-has-value": return sourceList.stream().allMatch(String::isEmpty);
			case "starts-with": return sourceList.stream().allMatch(s -> s.startsWith(value));
			case "ends-with": return sourceList.stream().allMatch(s -> s.endsWith(value));
			default: return sourceList.stream().allMatch(s -> s.equals(value));
		}
	}

	@SuppressWarnings("unchecked")
	private void reassignDocIdentifier() throws JDOMException {
		printDebug("***** Updating document IDs to be sequential after merging *****");
		List<Element> docList = XPath.newInstance("//" + XMLGenerics.DOCUMENT).selectNodes(ROOT);
		for (int i = 0; i < docList.size(); i++) {
			docList.get(i).getChild(XMLGenerics.IDENTIFIER).setText("DOC" + (i + 1));
		}
	}

	// #####  HELPER METHODS  #####

	private String getEmailHeader(String headerName) throws JDOMException {
		Element header = (Element) XPath.newInstance("//EmailHeaders/Email/Headers/Header[Name='" + headerName + "']").selectSingleNode(ROOT);
		if (header == null || header.getChild(XMLGenerics.VALUE) == null) return "";
		return getChildText(header, XMLGenerics.VALUE);
	}

	private int distance(String a, String b) {
		a = a.toLowerCase();
		b = b.toLowerCase();
		int [] costs = new int [b.length() + 1];
		for (int j = 0; j < costs.length; j++)
			costs[j] = j;
		for (int i = 1; i <= a.length(); i++) {
			costs[0] = i;
			int nw = i - 1;
			for (int j = 1; j <= b.length(); j++) {
				int cj = Math.min(1 + Math.min(costs[j], costs[j - 1]), a.charAt(i - 1) == b.charAt(j - 1) ? nw : nw + 1);
				nw = costs[j];
				costs[j] = cj;
			}
		}
		return costs[b.length()];
	}

	private String getHocrContent(Element page) throws JDOMException, IOException {
		if (IS_DEBUG) return "Your HOCR Content HERE. Blah Blah Invoice Number 12345 Blah Blah";
		String hocrPath = concatPath(BATCHLOCALPATH_VALUE, BATCHINSTANCEID_VALUE, page.getChildText("HocrFileName"));
		Element hocrDoc = readXmlFile(hocrPath).getRootElement();
		return getChildText(hocrDoc.getChild("HocrPage"), "HocrContent");
	}

	@SuppressWarnings("unchecked")
	private String getPlfValue(String plfName, Element doc) throws JDOMException {
		List<Text> values = XPath.newInstance(".//PageLevelField[Name='" + plfName + "']/Value/text()").selectNodes(doc);
		String returnString = "";
		double highestConf = 0.0;
		for (Text value : values) {
			Element plfElement = (Element) value.getParent().getParent();
			double confidence = Double.parseDouble(plfElement.getChildText(XMLGenerics.CONFIDENCE));
			if (confidence > highestConf) {
				highestConf = confidence;
				returnString = value.getValue();
			}
		}
		return returnString;
	}

	private Element getDlf(Element doc, String dlfName) throws JDOMException {
		return (Element) XPath.newInstance(".//" + XMLGenerics.DOCUMENT_LEVEL_FIELD +
				"[" + XMLGenerics.NAME + "='" + dlfName + "']").selectSingleNode(doc);
	}

	private String getDlfValue(Element doc, String dlfName) throws JDOMException {
		Element dlf = getDlf(doc, dlfName);
		if (dlf != null && dlf.getChild(XMLGenerics.VALUE) != null) {
			return dlf.getChildText(XMLGenerics.VALUE);
		}
		return "";
	}

	private String getChildText(Element parent, String child) {
		return (parent.getChild(child) != null ? parent.getChildText(child) : "");
	}

	private void setChildText(Element parent, String childName, String childValue) {
		Element childElement = parent.getChild(childName);
		if (childElement == null) {
			childElement = new Element(childName);
			parent.addContent(childElement);
		}
		childElement.setText(childValue);
	}

	private void mergeDocuments(Element doc, Element nextDoc, int firstDoc) throws JDOMException {
		if (firstDoc == 2) mergeDocuments(nextDoc, doc);
		else mergeDocuments(doc, nextDoc);
	}

	@SuppressWarnings("unchecked")
	private void mergeDocuments(Element doc, Element nextDoc) throws JDOMException {
		String docId1 = doc.getChildText(XMLGenerics.IDENTIFIER);
		String docId2 = nextDoc.getChildText(XMLGenerics.IDENTIFIER);
		printDebug("Merging", docId1, "and", docId2);
		// If any index fields are empty for doc1, copy doc2's value (if exists)
		List<Element> emptyIndexFields = XPath.newInstance(".//" + XMLGenerics.DOCUMENT_LEVEL_FIELD +
				"[not(Value) or not(Value/text())]").selectNodes(doc);
		for (Element emptyDlf : emptyIndexFields) {
			String emptyDlfName = emptyDlf.getChildText(XMLGenerics.NAME);
			printDebug(docId1, emptyDlfName, "is empty, checking", docId2, "to see if it has a value");
			String secondDocValue = getDlfValue(nextDoc, emptyDlfName);
			if (!secondDocValue.isEmpty()) {
				printDebug(nextDoc.getChildText(XMLGenerics.IDENTIFIER), emptyDlfName, "has value:", secondDocValue);
				Element parent = emptyDlf.getParentElement();
				int index = parent.indexOf(emptyDlf);
				parent.addContent(index, (Content) getDlf(nextDoc, emptyDlfName).detach().clone());
				emptyDlf.detach();
			}
		}
		// Copy pages from second doc
		Element pages = doc.getChild(XMLGenerics.PAGES);
		List<Element> pageList = nextDoc.getChild(XMLGenerics.PAGES).getChildren(XMLGenerics.PAGE);
		for (Element page : pageList) {
			pages.addContent((Content) page.clone());
		}
		nextDoc.detach();
	}

	private String getDocTypeDescription(String docTypeName) {
		if (IS_DEBUG) return docTypeName;
		if (DB_CONNECTION == null) DB_CONNECTION = connectToJNDIDBConnection("jdbc/ephesoft");
		if (DB_CONNECTION == null) return docTypeName;
		try (Statement s = DB_CONNECTION.createStatement()) {
			String query = "SELECT dt.document_type_description FROM document_type dt WHERE dt.document_type_name = '" + docTypeName + "' AND dt.id IN (SELECT bcdt.document_type_id FROM batch_class_document_type bcdt WHERE bcdt.batch_class_id = (SELECT bc.id FROM batch_class bc WHERE bc.identifier = '" + BATCHCLASSID_VALUE + "'))";
			ResultSet rs = s.executeQuery(query);
			if (rs.next()) {
				return rs.getString(1);
			}
		} catch (SQLException e) {
			LOGGER.error(e.getMessage());
		}
		return docTypeName;
	}

	private int getDocTypeConfidenceThreshold(String docTypeName) throws SQLException {
		if (IS_DEBUG) return 100;
		if (DB_CONNECTION == null) DB_CONNECTION = connectToJNDIDBConnection("jdbc/ephesoft");
		if (DB_CONNECTION == null) return 100;
		Statement s = null;
		try {
			s = DB_CONNECTION.createStatement();
			String query = "SELECT dt.min_confidence_threshold FROM document_type dt WHERE dt.document_type_name = '" + docTypeName + "' AND dt.id IN (SELECT bcdt.document_type_id FROM batch_class_document_type bcdt WHERE bcdt.batch_class_id = (SELECT bc.id FROM batch_class bc WHERE bc.identifier = '" + BATCHCLASSID_VALUE + "'))";
			ResultSet rs = s.executeQuery(query);
			if (rs.next()) return rs.getInt(1);
		} catch (Exception e) {
			LOGGER.error(e.getMessage());
		} finally {
			try {
				s.close();
			} catch (NullPointerException e) {
				LOGGER.error(e.getMessage());
			}
		}
		return 100;
	}

	private Connection connectToJNDIDBConnection(final String dataSourceName) {
		// Attempt to connect to the Ephesoft DB
		try {
			LOGGER.info("************ CONNECTING TO JNDI RESOURCE DB: " + dataSourceName);
			// Obtain our environment naming context
			Context initCtx = new InitialContext();
			Context envCtx = (Context) initCtx.lookup("java:comp/env");
			// Look up our data source
			DataSource ds = (DataSource) envCtx.lookup(dataSourceName);
			// Allocate and use a connection from the pool
			Connection conn = ds.getConnection();
			LOGGER.info("************ Connected to JNDI resource DB: " + dataSourceName);
			return conn;
		} catch (Exception e) {
			LOGGER.error("************ Error encountered whilst trying to connect to JNDI Resource Connection: " + dataSourceName);
			return null;
		}
	}

	private boolean fileExists(String path) {
		File theFile = new File(path);
		return (theFile.exists() && !theFile.isDirectory());
	}

	private String concatPath(String... strings) { return String.join(File.separator, Arrays.asList(strings)); }

	private void init(Document documentFile) throws IOException {
		ROOT = documentFile.getRootElement();
		BATCHNAME_VALUE = getChildText(ROOT, XMLGenerics.BATCH_NAME);
		BATCHLOCALPATH_VALUE = getChildText(ROOT, XMLGenerics.BATCH_LOCAL_PATH);
		BATCHUNCPATH_VALUE = getChildText(ROOT, XMLGenerics.UNC_FOLDER_PATH);
		BATCHINSTANCEID_VALUE = getChildText(ROOT, XMLGenerics.BATCH_INSTANCE_ID);
		BATCHCLASSID_VALUE = getChildText(ROOT, XMLGenerics.BATCH_CLASS_ID);

		PATH_TO_SCRIPT_CONFIG = concatPath((IS_DEBUG ? concatPath(new File("").getAbsolutePath(), "src") : concatPath(BATCHLOCALPATH_VALUE.replaceFirst("ephesoft-system-folder", ""), BATCHCLASSID_VALUE)), "script-config");
		SCRIPT_CONFIG_FILE = concatPath(PATH_TO_SCRIPT_CONFIG, SCRIPT_NAME + ".properties");

		if (fileExists(SCRIPT_CONFIG_FILE)) {
			SCRIPT_PROPS = readKeyWordProperties(SCRIPT_NAME + ".properties");
		} else {
			FileUtils.writeStringToFile(new File(SCRIPT_CONFIG_FILE), "# Properties file to control " + SCRIPT_NAME + " options\r\n", "UTF-8", true);
			SCRIPT_PROPS = new Properties();
		}
	}

	private Properties readKeyWordProperties(String fileName) throws IOException {
		Properties prop = new Properties();
		String propFileName = concatPath(PATH_TO_SCRIPT_CONFIG, fileName);
		String propFileContents = new Scanner(new File(propFileName)).useDelimiter("\\Z").next();
		prop.load(new StringReader(propFileContents.replace("\\", "\\\\")));
		return prop;
	}

	private void appendToPropsFile(File file, String propertyName, String defaultValue) throws IOException {
		FileUtils.writeStringToFile(file, propertyName + '=' + defaultValue + "\r\n", "UTF-8", true);
		SCRIPT_PROPS.setProperty(propertyName, defaultValue);
	}

	private String getPropValue(String propertyName, String defaultValue) throws IOException {
		String propValue = SCRIPT_PROPS.getProperty(propertyName);
		if (propValue != null) {
			return propValue;
		}
		if (defaultValue != null && !defaultValue.isEmpty()) {
			File f = new File(SCRIPT_CONFIG_FILE);
			appendToPropsFile(f, propertyName, defaultValue);
		}
		return defaultValue;
	}

	private static Document readXmlFile(String path) throws JDOMException, IOException {
		SAXBuilder sb = new SAXBuilder();
		return sb.build(path);
	}

	private void writeXmlFile(String path, Document doc) throws IOException {
		XMLOutputter xmlOutput = new XMLOutputter();
		xmlOutput.setFormat(Format.getPrettyFormat());
		xmlOutput.output(doc, new FileWriter(path));
	}

	private void printDebug(Object... messages) {
		List<String> stringMessages = new ArrayList<>();
		for (Object message : messages) { stringMessages.add(String.valueOf(message)); }
		String message = String.join(" ", stringMessages);
		if (IS_DEBUG) {
			System.out.println(message);
		} else {
			LOGGER.info(message);
		}
	}

	public static void main(String... args) {
		String filePath = System.getProperty("user.home") + "/Downloads/BI1_batch (1).xml";
		try {
			Document doc = readXmlFile(filePath);
			ScriptDocumentAssembler se = new ScriptDocumentAssembler();
			se.execute(doc, null, null);
		}
		catch (Exception x) { x.printStackTrace(); }
	}

	@SuppressWarnings("unused")
	private static class XMLGenerics {
		private static final String BATCH_LOCAL_PATH = "BatchLocalPath";
		private static final String BATCH_NAME = "BatchName";
		private static final String BATCH_INSTANCE_ID = "BatchInstanceIdentifier";
		private static final String BATCH_CLASS_ID = "BatchClassIdentifier";
		private static final String UNC_FOLDER_PATH = "UNCFolderPath";
		private static final String PAGES = "Pages";
		private static final String PAGE = "Page";
		private static final String DOCUMENTS = "Documents";
		private static final String DOCUMENT = "Document";
		private static final String NAME = "Name";
		private static final String TYPE = "Type";
		private static final String VALUE = "Value";
		private static final String VALID = "Valid";
		private static final String DESCRIPTION = "Description";
		private static final String IDENTIFIER = "Identifier";
		private static final String DOCUMENT_LEVEL_FIELDS = "DocumentLevelFields";
		private static final String DOCUMENT_LEVEL_FIELD = "DocumentLevelField";
		private static final String PAGE_LEVEL_FIELDS = "PageLevelFields";
		private static final String PAGE_LEVEL_FIELD = "PageLevelField";
		private static final String ALTERNATE_VALUES = "AlternateValues";
		private static final String ALTERNATE_VALUE = "AlternateValue";
		private static final String FIELD_VALUE_OPTION_LIST = "FieldValueOptionList";
		private static final String OLD_FILE_NAME = "OldFileName";
		private static final String CONFIDENCE = "Confidence";
		private static final String CONFIDENCE_THRESHOLD = "ConfidenceThreshold";
		private static final String OCR_CONFIDENCE = "OcrConfidence";
		private static final String OCR_CONFIDENCE_THRESHOLD = "OcrConfidenceThreshold";
		private static final String FORCE_REVIEW = "ForceReview";
		private static final String REVIEWED = "Reviewed";
		private static final String COORDINATES_LIST = "CoordinatesList";
		private static final String BATCH_LEVEL_FIELDS = "BatchLevelFields";
		private static final String BATCH_LEVEL_FIELD = "BatchLevelField";
	}
}
	<Rules>
	<!--<Rule>
	<Comment>Optional comment for logging</Comment>
	<NewDocType>Doc type to set document to if all rules pass</NewDocType>
	<SourceDocType>Optional document type filter, separate multiple doc types with a pipe '\|'</SourceDocType>
	<Priority>Rule priority, higher numbers take precedence, in case of a tie whichever comes first wins</Priority>
	<Criteria>
	<Criterion>
	<Type>hocr (default), doctype, plf/kvpp, dlf/kve, file, path, email</Type>
	<Page>Page number within document, default is 1</Page>
	<FieldName>Name of page or document level field or email header</FieldName>
	<Operator>equals (default), same, distance, matches, not-matches, contains, not-contains, has-value, not-has-value, starts-with, or ends-with</Operator>
	<Value>Value to compare criteria with (not to be used with same, has-value, or not-has-value)</Value>
	</Criterion>
	</Criteria>
	</Rule>-->
	<Rule>
	<Comment>Classify Based on HOCR</Comment>
	<NewDocType>ExampleDocType</NewDocType>
	<Priority>50</Priority>
	<Criteria>
	<Criterion>
	<Type>hocr</Type>
	<Operator>matches</Operator>
	<Value>(?i)^.(some-domain.com\|\bSome Other Key-phrase\b).$</Value>
	</Criterion>
	<Criterion>
	<Type>hocr</Type>
	<Operator>not-matches</Operator>
	<Value>(?i)^.(Page can't contain this text. Remove if not necessary).$</Value>
	</Criterion>
	</Criteria>
	</Rule>
	<Rule>
	<Comment>Classify Based on Email Header</Comment>
	<NewDocType>ExampleDocType</NewDocType>
	<Priority>100</Priority>
	<Criteria>
	<Criterion>
	<Type>email</Type>
	<FieldName>From</FieldName>
	<Operator>matches</Operator>
	<Value>^(?i).@some-domain.com.$</Value>
	</Criterion>
	</Criteria>
	</Rule>
	</Rules>