Skip to content

Instantly share code, notes, and snippets.

@jhotmann
Last active January 13, 2023 14:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save jhotmann/3af8bc2bd1cf367d213f4b1704d0e782 to your computer and use it in GitHub Desktop.
Save jhotmann/3af8bc2bd1cf367d213f4b1704d0e782 to your computer and use it in GitHub Desktop.
Classify/Merge Script
<Rules>
<!--<Rule>
<Comment>Optional comment for logging</Comment>
<NewDocType>Doc type to set document to if all rules pass</NewDocType>
<SourceDocType>Optional document type filter, separate multiple doc types with a pipe '|'</SourceDocType>
<Priority>Rule priority, higher numbers take precedence, in case of a tie whichever comes first wins</Priority>
<Criteria>
<Criterion>
<Type>hocr (default), doctype, plf/kvpp, dlf/kve, file, path, email</Type>
<Page>Page number within document, default is 1</Page>
<FieldName>Name of page or document level field or email header</FieldName>
<Operator>equals (default), same, distance, matches, not-matches, contains, not-contains, has-value, not-has-value, starts-with, or ends-with</Operator>
<Value>Value to compare criteria with (not to be used with same, has-value, or not-has-value)</Value>
</Criterion>
</Criteria>
</Rule>-->
<Rule>
<Comment>Classify Based on HOCR</Comment>
<NewDocType>ExampleDocType</NewDocType>
<Priority>50</Priority>
<Criteria>
<Criterion>
<Type>hocr</Type>
<Operator>matches</Operator>
<Value>(?i)^.*(some-domain.com|\bSome Other Key-phrase\b).*$</Value>
</Criterion>
<Criterion>
<Type>hocr</Type>
<Operator>not-matches</Operator>
<Value>(?i)^.*(Page can't contain this text. Remove if not necessary).*$</Value>
</Criterion>
</Criteria>
</Rule>
<Rule>
<Comment>Classify Based on Email Header</Comment>
<NewDocType>ExampleDocType</NewDocType>
<Priority>100</Priority>
<Criteria>
<Criterion>
<Type>email</Type>
<FieldName>From</FieldName>
<Operator>matches</Operator>
<Value>^(?i).*@some-domain.com.*$</Value>
</Criterion>
</Criteria>
</Rule>
</Rules>
<Rules>
<!--<Rule>
<Comment>Optional Comment for Logging</Comment>
<FirstDoc>1 (default) or 2</FirstDoc>
<Criteria>
<Criterion>
<Type>hocr (default), doctype, plf/kvpp, dlf/kve, file, path, email</Type>
<DocumentNumber>1, 2, or blank for both</DocumentNumber>
<FieldName>Name of page or document level field or email header</FieldName>
<Operator>equals (default), same, distance, matches, not-matches, contains, not-contains, has-value, not-has-value, starts-with, or ends-with</Operator>
<Value>Value to compare criteria with (not to be used with same, has-value, or not-has-value)</Value>
</Criterion>
</Criteria>
</Rule>-->
<Rule>
<Comment>Merge pages with little or no content to previous doc</Comment>
<FirstDoc>1</FirstDoc>
<Criteria>
<Criterion>
<Type>hocr</Type>
<DocumentNumber>2</DocumentNumber>
<Operator>matches</Operator>
<Value>^.{0,200}$</Value>
</Criterion>
</Criteria>
</Rule>
</Rules>
import com.ephesoft.dcma.script.IJDomScript;
import com.ephesoft.dcma.util.logger.EphesoftLogger;
import com.ephesoft.dcma.util.logger.ScriptLoggerFactory;
import org.apache.commons.io.FileUtils;
import org.jdom.*;
import org.jdom.input.SAXBuilder;
import org.jdom.output.Format;
import org.jdom.output.XMLOutputter;
import org.jdom.xpath.XPath;
import javax.naming.Context;
import javax.naming.InitialContext;
import javax.sql.DataSource;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.StringReader;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.*;
public class ScriptDocumentAssembler implements IJDomScript {
private static EphesoftLogger LOGGER = ScriptLoggerFactory.getLogger(ScriptDocumentAssembler.class);
private String SCRIPT_NAME = this.getClass().getSimpleName();
private boolean IS_DEBUG = System.getProperty("os.name").matches("(?i)Mac OS X|Windows 10");
private String BATCHNAME_VALUE;
private String BATCHLOCALPATH_VALUE;
private String BATCHUNCPATH_VALUE;
private String BATCHINSTANCEID_VALUE;
private String BATCHCLASSID_VALUE;
private String PATH_TO_SCRIPT_CONFIG;
private String SCRIPT_CONFIG_FILE;
private Properties SCRIPT_PROPS;
private Element ROOT;
private Connection DB_CONNECTION = null;
public Object execute(Document documentFile, String methodName, String documentIdentifier) {
Exception exception = null;
printDebug("************* Start execution of the " + SCRIPT_NAME);
try {
if (null == documentFile) {
throw (new Exception("Input document is null."));
}
init(documentFile);
printDebug("************* Batch " + BATCHINSTANCEID_VALUE + " - " + BATCHNAME_VALUE);
if (Boolean.parseBoolean(getPropValue("RunClassificationRules", "true"))) {
classify(getPropValue("ClassificationConfig", "classify.xml"));
}
if (Boolean.parseBoolean(getPropValue("RunMergeRules", "true"))) {
merge(getPropValue("MergeConfig", "merge.xml"));
}
reassignDocIdentifier();
if (IS_DEBUG) writeXmlFile(System.getProperty("user.home") + "/Downloads/" + SCRIPT_NAME + "-out.xml", documentFile);
} catch (Exception e) {
exception = e;
if (IS_DEBUG) e.printStackTrace();
else LOGGER.error("ERROR!!! - " + e.getMessage());
} finally {
if (DB_CONNECTION != null) {
try { DB_CONNECTION.close(); } catch (SQLException e) { e.printStackTrace(); }
}
}
printDebug("************* End execution of the " + SCRIPT_NAME);
return exception;
}
@SuppressWarnings("unchecked")
private void classify(String configName) throws JDOMException, IOException, SQLException {
printDebug("***** Begin Classification *****");
String configPath = concatPath(PATH_TO_SCRIPT_CONFIG, configName);
// Check for classification xml
if (!fileExists(configPath)) {
printDebug(configName, "not found!");
return;
}
printDebug(configName, "found");
// Parse xml for classification rules
Element config = readXmlFile(configPath).getRootElement();
// Loop through documents and classify pages
List<Element> docList = XPath.newInstance("//Document").selectNodes(ROOT);
for (Element doc : docList) {
String docId = doc.getChildText(XMLGenerics.IDENTIFIER);
printDebug("Comparing", docId, "to classification rules");
String docType = doc.getChildText(XMLGenerics.TYPE);
List<Element> classifyRules = config.getChildren("Rule");
String newDocType = "";
String newDocTypeDesc = "";
int newDocTypeConfThresh = 100;
int priority = 0;
// Loop through classification rules
for (Element rule : classifyRules) {
// NewDocType
String ruleNewDocType = getChildText(rule, "NewDocType");
if (ruleNewDocType.isEmpty()) {
printDebug("NewDocType is not set, skipping rule");
continue;
}
String comment = getChildText(rule, "Comment");
if (!comment.isEmpty()) printDebug("Rule:", comment);
// SourceDocType
if (!getChildText(rule, "SourceDocType").isEmpty()) {
List<String> sourceDocTypes = Arrays.asList(getChildText(rule, "SourceDocType").split("\\|"));
if (!sourceDocTypes.contains(docType)) {
printDebug("The document type", docType, "does not match the rule's doc type(s):", String.join(", ", sourceDocTypes));
continue;
}
}
// Priority
int rulePriority = (!getChildText(rule, "Priority").isEmpty() ?
Integer.parseInt(getChildText(rule, "Priority")) : 1);
if (rulePriority < priority) {
printDebug("Rule priority lower than highest matching priority, skipping rule");
continue;
}
boolean criteriaMet = true;
// Loop through all criteria for classification rule
List<Element> criteria = XPath.newInstance(".//Criterion").selectNodes(rule);
for (Element criterion : criteria) {
String criterionType = getChildText(criterion, "Type");
String criterionPage = (!getChildText(criterion, "Page").isEmpty() ? getChildText(criterion, "Page") : "1");
String criterionField = getChildText(criterion, "FieldName");
String criterionOperator = getChildText(criterion, "Operator");
String criterionValue = getChildText(criterion, "Value");
// Comparison source
String source = getDocDetail(doc, criterionType, criterionField, criterionPage);
// Check if criterion met
criteriaMet = checkCriteria(criterionOperator, criterionValue, source);
printDebug("Criterion:", docId, "\"" + source + "\"", criterionOperator,
(!criterionOperator.contains("has-value") ? "\"" + criterionValue + "\"." : "-"),
(criteriaMet ? "Passed!" : "Failed"));
if (!criteriaMet) break;
}
if (!criteriaMet) {
printDebug("Criteria not met.");
continue;
}
if (rulePriority <= priority) {
printDebug("Criteria met, but rule priority lower than previous rule(s)");
continue;
}
printDebug("All criteria met and rule is highest priority so far: ", rulePriority);
newDocType = ruleNewDocType;
newDocTypeDesc = getDocTypeDescription(ruleNewDocType);
newDocTypeConfThresh = getDocTypeConfidenceThreshold(ruleNewDocType);
priority = rulePriority;
}
if (newDocType.isEmpty()) continue;
// At least one classification rule passed, setting doc type to doc type of highest priority rule
printDebug("Setting", docId, "to:", newDocType);
doc.getChild(XMLGenerics.TYPE).setText(newDocType);
doc.getChild(XMLGenerics.DESCRIPTION).setText(newDocTypeDesc);
doc.getChild(XMLGenerics.CONFIDENCE_THRESHOLD).setText(newDocTypeConfThresh + ".0");
if (Boolean.parseBoolean(getPropValue("SetConfidenceToPriority", "true"))) {
setChildText(doc, XMLGenerics.CONFIDENCE, priority + ".00");
} else {
setChildText(doc, XMLGenerics.CONFIDENCE, "100.00");
}
}
}
@SuppressWarnings("unchecked")
private void merge(String configName) throws JDOMException, IOException {
printDebug("***** Begin Merges *****");
String configPath = concatPath(PATH_TO_SCRIPT_CONFIG, configName);
// Check for merge xml
if (!fileExists(configPath)) {
printDebug(configName, "not found!");
return;
}
printDebug(configName, "found");
// Parse xml for merge rules
Element config = readXmlFile(configPath).getRootElement();
// Loop through rules and merge pages
List<Element> rules = config.getChildren("Rule");
for (Element rule : rules) {
String ruleComment = getChildText(rule, "Comment");
String firstDoc = getChildText(rule, "FirstDoc");
if (!ruleComment.isEmpty()) printDebug("--", ruleComment, "--");
List<Element> docList = XPath.newInstance("//Document").selectNodes(ROOT);
// Loop through all documents for each rule
for (int i = 0; i < docList.size() - 1; i++) {
Element doc1 = docList.get(i);
Element doc2 = docList.get(i + 1);
String docId1 = doc1.getChildText(XMLGenerics.IDENTIFIER);
String docId2 = doc2.getChildText(XMLGenerics.IDENTIFIER);
boolean criteriaMet = true;
// Check all criteria against the documents
List<Element> criteria = rule.getChild("Criteria").getChildren("Criterion");
for (Element criterion : criteria) {
String criterionType = getChildText(criterion, "Type");
String criterionDocNumber = getChildText(criterion, "DocumentNumber");
String criterionField = getChildText(criterion, "FieldName");
String criterionOperator = getChildText(criterion, "Operator");
String criterionValue = getChildText(criterion, "Value");
// Get source value for each document
String source1 = getDocDetail(doc1, criterionType, criterionField, null);
String source2 = getDocDetail(doc2, criterionType, criterionField, null);
// Check if criterion met
if (criterionOperator.equals("same")) {
criteriaMet = checkCriteria(criterionOperator, criterionValue, source1, source2);
} else {
if (criterionDocNumber.equals("1")) {
criteriaMet = checkCriteria(criterionOperator, criterionValue, source1);
} else if (criterionDocNumber.equals("2")) {
criteriaMet = checkCriteria(criterionOperator, criterionValue, source2);
} else {
if (criterionValue.isEmpty()) criteriaMet = checkCriteria(criterionOperator, source1, source1, source2);
else criteriaMet = checkCriteria(criterionOperator, criterionValue, source1, source2);
}
}
// Log results
if (!criterionOperator.matches("same|.*has-value|distance")) {
if (criterionDocNumber.equals("1")) {
printDebug("Criterion:", docId1, "\"" + source1 + "\"", criterionOperator, "\"" + criterionValue + "\".", (criteriaMet ? "Passed!" : "Failed"));
} else if (criterionDocNumber.equals("2")) {
printDebug("Criterion:", docId2, "\"" + source2 + "\"", criterionOperator, "\"" + criterionValue + "\".", (criteriaMet ? "Passed!" : "Failed"));
} else {
printDebug("Criterion:", docId1, "\"" + source1 + "\"", "and", docId2, "\"" + source2 + "\"", criterionOperator, "\"" + criterionValue + "\".", (criteriaMet ? "Passed!" : "Failed"));
}
} else if (criterionOperator.equals("same")) {
printDebug("Criterion:", docId1, "\"" + source1 + "\"", "and", docId2, "\"" + source2 + "\"", "are the same.", (criteriaMet ? "Passed!" : "Failed"));
} else if (criterionOperator.equals("distance")) {
printDebug("Criterion:", docId1, "\"" + source1 + "\"", "and", docId2, "\"" + source2 + "\"", "distance less than or equal to", criterionValue + ".", (criteriaMet ? "Passed!" : "Failed"));
} else if (criterionOperator.matches(".*has-value")) {
if (criterionDocNumber.equals("1")) {
printDebug("Criterion:", docId1, "\"" + source1 + "\"", criterionOperator + ".", (criteriaMet ? "Passed!" : "Failed"));
} else if (criterionDocNumber.equals("2")) {
printDebug("Criterion:", docId2, "\"" + source2 + "\"", criterionOperator + ".", (criteriaMet ? "Passed!" : "Failed"));
} else {
printDebug("Criterion:", docId1, "\"" + source1 + "\"", "and/or", docId2, "\"" + source2 + "\"", criterionOperator + ".", (criteriaMet ? "Passed!" : "Failed"));
}
}
// Stop checking criteria if not met
if (!criteriaMet) break;
}
// Move on to next document if criteria not met
if (!criteriaMet) {
printDebug("Criteria not met");
continue;
}
// Merge documents if all criteria met
printDebug("Criteria met!");
mergeDocuments(doc1, doc2, (firstDoc.equals("2") ? 2 : 1));
docList = XPath.newInstance("//Document").selectNodes(ROOT);
i--;
}
}
}
private String getDocDetail(Element doc, String detailType, String fieldName, String pageNumber) throws JDOMException, IOException {
pageNumber = (pageNumber != null ? pageNumber : "1");
switch (detailType.toLowerCase()) {
case "doctype": return doc.getChildText(XMLGenerics.TYPE);
case "plf":
case "kvpp": return getPlfValue(fieldName, doc);
case "dlf":
case "kve": return getDlfValue(doc, fieldName);
case "file": {
Text fileNameText = (Text) XPath.newInstance(".//" + XMLGenerics.PAGES + "/" + XMLGenerics.PAGE + "[position()=" + pageNumber + "]" + "/" + XMLGenerics.OLD_FILE_NAME + "/text()").selectSingleNode(doc);
return (fileNameText != null ? fileNameText.getValue().replaceAll("-\\d{4}-\\d{4}\\.[A-z]+$", "") : "");
}
case "path": return BATCHUNCPATH_VALUE;
case "email": return getEmailHeader(fieldName);
default: {
Element page = (Element) XPath.newInstance(".//" + XMLGenerics.PAGES + "/" + XMLGenerics.PAGE + "[position()=" + pageNumber + "]").selectSingleNode(doc);
return getHocrContent(page);
}
}
}
private boolean checkCriteria(String operator, String value, String... sources) {
List<String> sourceList = Arrays.asList(sources);
switch (operator.toLowerCase()) {
case "same": return (sourceList.size() > 1 && sourceList.stream().distinct().count() == 1);
case "distance": return (sourceList.size() > 1 && sourceList.stream().allMatch(s -> distance(sourceList.get(0), s) <= Integer.parseInt(value)));
case "not-equals": return sourceList.stream().noneMatch(s -> s.equals(value));
case "matches": return sourceList.stream().allMatch(s -> s.matches(value));
case "not-matches": return sourceList.stream().noneMatch(s -> s.matches(value));
case "contains": return sourceList.stream().allMatch(s -> s.contains(value));
case "not-contains": return sourceList.stream().noneMatch(s -> s.contains(value));
case "has-value": return sourceList.stream().noneMatch(String::isEmpty);
case "not-has-value": return sourceList.stream().allMatch(String::isEmpty);
case "starts-with": return sourceList.stream().allMatch(s -> s.startsWith(value));
case "ends-with": return sourceList.stream().allMatch(s -> s.endsWith(value));
default: return sourceList.stream().allMatch(s -> s.equals(value));
}
}
@SuppressWarnings("unchecked")
private void reassignDocIdentifier() throws JDOMException {
printDebug("***** Updating document IDs to be sequential after merging *****");
List<Element> docList = XPath.newInstance("//" + XMLGenerics.DOCUMENT).selectNodes(ROOT);
for (int i = 0; i < docList.size(); i++) {
docList.get(i).getChild(XMLGenerics.IDENTIFIER).setText("DOC" + (i + 1));
}
}
// ##### HELPER METHODS #####
private String getEmailHeader(String headerName) throws JDOMException {
Element header = (Element) XPath.newInstance("//EmailHeaders/Email/Headers/Header[Name='" + headerName + "']").selectSingleNode(ROOT);
if (header == null || header.getChild(XMLGenerics.VALUE) == null) return "";
return getChildText(header, XMLGenerics.VALUE);
}
private int distance(String a, String b) {
a = a.toLowerCase();
b = b.toLowerCase();
int [] costs = new int [b.length() + 1];
for (int j = 0; j < costs.length; j++)
costs[j] = j;
for (int i = 1; i <= a.length(); i++) {
costs[0] = i;
int nw = i - 1;
for (int j = 1; j <= b.length(); j++) {
int cj = Math.min(1 + Math.min(costs[j], costs[j - 1]), a.charAt(i - 1) == b.charAt(j - 1) ? nw : nw + 1);
nw = costs[j];
costs[j] = cj;
}
}
return costs[b.length()];
}
private String getHocrContent(Element page) throws JDOMException, IOException {
if (IS_DEBUG) return "Your HOCR Content HERE. Blah Blah Invoice Number 12345 Blah Blah";
String hocrPath = concatPath(BATCHLOCALPATH_VALUE, BATCHINSTANCEID_VALUE, page.getChildText("HocrFileName"));
Element hocrDoc = readXmlFile(hocrPath).getRootElement();
return getChildText(hocrDoc.getChild("HocrPage"), "HocrContent");
}
@SuppressWarnings("unchecked")
private String getPlfValue(String plfName, Element doc) throws JDOMException {
List<Text> values = XPath.newInstance(".//PageLevelField[Name='" + plfName + "']/Value/text()").selectNodes(doc);
String returnString = "";
double highestConf = 0.0;
for (Text value : values) {
Element plfElement = (Element) value.getParent().getParent();
double confidence = Double.parseDouble(plfElement.getChildText(XMLGenerics.CONFIDENCE));
if (confidence > highestConf) {
highestConf = confidence;
returnString = value.getValue();
}
}
return returnString;
}
private Element getDlf(Element doc, String dlfName) throws JDOMException {
return (Element) XPath.newInstance(".//" + XMLGenerics.DOCUMENT_LEVEL_FIELD +
"[" + XMLGenerics.NAME + "='" + dlfName + "']").selectSingleNode(doc);
}
private String getDlfValue(Element doc, String dlfName) throws JDOMException {
Element dlf = getDlf(doc, dlfName);
if (dlf != null && dlf.getChild(XMLGenerics.VALUE) != null) {
return dlf.getChildText(XMLGenerics.VALUE);
}
return "";
}
private String getChildText(Element parent, String child) {
return (parent.getChild(child) != null ? parent.getChildText(child) : "");
}
private void setChildText(Element parent, String childName, String childValue) {
Element childElement = parent.getChild(childName);
if (childElement == null) {
childElement = new Element(childName);
parent.addContent(childElement);
}
childElement.setText(childValue);
}
private void mergeDocuments(Element doc, Element nextDoc, int firstDoc) throws JDOMException {
if (firstDoc == 2) mergeDocuments(nextDoc, doc);
else mergeDocuments(doc, nextDoc);
}
@SuppressWarnings("unchecked")
private void mergeDocuments(Element doc, Element nextDoc) throws JDOMException {
String docId1 = doc.getChildText(XMLGenerics.IDENTIFIER);
String docId2 = nextDoc.getChildText(XMLGenerics.IDENTIFIER);
printDebug("Merging", docId1, "and", docId2);
// If any index fields are empty for doc1, copy doc2's value (if exists)
List<Element> emptyIndexFields = XPath.newInstance(".//" + XMLGenerics.DOCUMENT_LEVEL_FIELD +
"[not(Value) or not(Value/text())]").selectNodes(doc);
for (Element emptyDlf : emptyIndexFields) {
String emptyDlfName = emptyDlf.getChildText(XMLGenerics.NAME);
printDebug(docId1, emptyDlfName, "is empty, checking", docId2, "to see if it has a value");
String secondDocValue = getDlfValue(nextDoc, emptyDlfName);
if (!secondDocValue.isEmpty()) {
printDebug(nextDoc.getChildText(XMLGenerics.IDENTIFIER), emptyDlfName, "has value:", secondDocValue);
Element parent = emptyDlf.getParentElement();
int index = parent.indexOf(emptyDlf);
parent.addContent(index, (Content) getDlf(nextDoc, emptyDlfName).detach().clone());
emptyDlf.detach();
}
}
// Copy pages from second doc
Element pages = doc.getChild(XMLGenerics.PAGES);
List<Element> pageList = nextDoc.getChild(XMLGenerics.PAGES).getChildren(XMLGenerics.PAGE);
for (Element page : pageList) {
pages.addContent((Content) page.clone());
}
nextDoc.detach();
}
private String getDocTypeDescription(String docTypeName) {
if (IS_DEBUG) return docTypeName;
if (DB_CONNECTION == null) DB_CONNECTION = connectToJNDIDBConnection("jdbc/ephesoft");
if (DB_CONNECTION == null) return docTypeName;
try (Statement s = DB_CONNECTION.createStatement()) {
String query = "SELECT dt.document_type_description FROM document_type dt WHERE dt.document_type_name = '" + docTypeName + "' AND dt.id IN (SELECT bcdt.document_type_id FROM batch_class_document_type bcdt WHERE bcdt.batch_class_id = (SELECT bc.id FROM batch_class bc WHERE bc.identifier = '" + BATCHCLASSID_VALUE + "'))";
ResultSet rs = s.executeQuery(query);
if (rs.next()) {
return rs.getString(1);
}
} catch (SQLException e) {
LOGGER.error(e.getMessage());
}
return docTypeName;
}
private int getDocTypeConfidenceThreshold(String docTypeName) throws SQLException {
if (IS_DEBUG) return 100;
if (DB_CONNECTION == null) DB_CONNECTION = connectToJNDIDBConnection("jdbc/ephesoft");
if (DB_CONNECTION == null) return 100;
Statement s = null;
try {
s = DB_CONNECTION.createStatement();
String query = "SELECT dt.min_confidence_threshold FROM document_type dt WHERE dt.document_type_name = '" + docTypeName + "' AND dt.id IN (SELECT bcdt.document_type_id FROM batch_class_document_type bcdt WHERE bcdt.batch_class_id = (SELECT bc.id FROM batch_class bc WHERE bc.identifier = '" + BATCHCLASSID_VALUE + "'))";
ResultSet rs = s.executeQuery(query);
if (rs.next()) return rs.getInt(1);
} catch (Exception e) {
LOGGER.error(e.getMessage());
} finally {
try {
s.close();
} catch (NullPointerException e) {
LOGGER.error(e.getMessage());
}
}
return 100;
}
private Connection connectToJNDIDBConnection(final String dataSourceName) {
// Attempt to connect to the Ephesoft DB
try {
LOGGER.info("************ CONNECTING TO JNDI RESOURCE DB: " + dataSourceName);
// Obtain our environment naming context
Context initCtx = new InitialContext();
Context envCtx = (Context) initCtx.lookup("java:comp/env");
// Look up our data source
DataSource ds = (DataSource) envCtx.lookup(dataSourceName);
// Allocate and use a connection from the pool
Connection conn = ds.getConnection();
LOGGER.info("************ Connected to JNDI resource DB: " + dataSourceName);
return conn;
} catch (Exception e) {
LOGGER.error("************ Error encountered whilst trying to connect to JNDI Resource Connection: " + dataSourceName);
return null;
}
}
private boolean fileExists(String path) {
File theFile = new File(path);
return (theFile.exists() && !theFile.isDirectory());
}
private String concatPath(String... strings) { return String.join(File.separator, Arrays.asList(strings)); }
private void init(Document documentFile) throws IOException {
ROOT = documentFile.getRootElement();
BATCHNAME_VALUE = getChildText(ROOT, XMLGenerics.BATCH_NAME);
BATCHLOCALPATH_VALUE = getChildText(ROOT, XMLGenerics.BATCH_LOCAL_PATH);
BATCHUNCPATH_VALUE = getChildText(ROOT, XMLGenerics.UNC_FOLDER_PATH);
BATCHINSTANCEID_VALUE = getChildText(ROOT, XMLGenerics.BATCH_INSTANCE_ID);
BATCHCLASSID_VALUE = getChildText(ROOT, XMLGenerics.BATCH_CLASS_ID);
PATH_TO_SCRIPT_CONFIG = concatPath((IS_DEBUG ? concatPath(new File("").getAbsolutePath(), "src") : concatPath(BATCHLOCALPATH_VALUE.replaceFirst("ephesoft-system-folder", ""), BATCHCLASSID_VALUE)), "script-config");
SCRIPT_CONFIG_FILE = concatPath(PATH_TO_SCRIPT_CONFIG, SCRIPT_NAME + ".properties");
if (fileExists(SCRIPT_CONFIG_FILE)) {
SCRIPT_PROPS = readKeyWordProperties(SCRIPT_NAME + ".properties");
} else {
FileUtils.writeStringToFile(new File(SCRIPT_CONFIG_FILE), "# Properties file to control " + SCRIPT_NAME + " options\r\n", "UTF-8", true);
SCRIPT_PROPS = new Properties();
}
}
private Properties readKeyWordProperties(String fileName) throws IOException {
Properties prop = new Properties();
String propFileName = concatPath(PATH_TO_SCRIPT_CONFIG, fileName);
String propFileContents = new Scanner(new File(propFileName)).useDelimiter("\\Z").next();
prop.load(new StringReader(propFileContents.replace("\\", "\\\\")));
return prop;
}
private void appendToPropsFile(File file, String propertyName, String defaultValue) throws IOException {
FileUtils.writeStringToFile(file, propertyName + '=' + defaultValue + "\r\n", "UTF-8", true);
SCRIPT_PROPS.setProperty(propertyName, defaultValue);
}
private String getPropValue(String propertyName, String defaultValue) throws IOException {
String propValue = SCRIPT_PROPS.getProperty(propertyName);
if (propValue != null) {
return propValue;
}
if (defaultValue != null && !defaultValue.isEmpty()) {
File f = new File(SCRIPT_CONFIG_FILE);
appendToPropsFile(f, propertyName, defaultValue);
}
return defaultValue;
}
private static Document readXmlFile(String path) throws JDOMException, IOException {
SAXBuilder sb = new SAXBuilder();
return sb.build(path);
}
private void writeXmlFile(String path, Document doc) throws IOException {
XMLOutputter xmlOutput = new XMLOutputter();
xmlOutput.setFormat(Format.getPrettyFormat());
xmlOutput.output(doc, new FileWriter(path));
}
private void printDebug(Object... messages) {
List<String> stringMessages = new ArrayList<>();
for (Object message : messages) { stringMessages.add(String.valueOf(message)); }
String message = String.join(" ", stringMessages);
if (IS_DEBUG) {
System.out.println(message);
} else {
LOGGER.info(message);
}
}
public static void main(String... args) {
String filePath = System.getProperty("user.home") + "/Downloads/BI1_batch (1).xml";
try {
Document doc = readXmlFile(filePath);
ScriptDocumentAssembler se = new ScriptDocumentAssembler();
se.execute(doc, null, null);
}
catch (Exception x) { x.printStackTrace(); }
}
@SuppressWarnings("unused")
private static class XMLGenerics {
private static final String BATCH_LOCAL_PATH = "BatchLocalPath";
private static final String BATCH_NAME = "BatchName";
private static final String BATCH_INSTANCE_ID = "BatchInstanceIdentifier";
private static final String BATCH_CLASS_ID = "BatchClassIdentifier";
private static final String UNC_FOLDER_PATH = "UNCFolderPath";
private static final String PAGES = "Pages";
private static final String PAGE = "Page";
private static final String DOCUMENTS = "Documents";
private static final String DOCUMENT = "Document";
private static final String NAME = "Name";
private static final String TYPE = "Type";
private static final String VALUE = "Value";
private static final String VALID = "Valid";
private static final String DESCRIPTION = "Description";
private static final String IDENTIFIER = "Identifier";
private static final String DOCUMENT_LEVEL_FIELDS = "DocumentLevelFields";
private static final String DOCUMENT_LEVEL_FIELD = "DocumentLevelField";
private static final String PAGE_LEVEL_FIELDS = "PageLevelFields";
private static final String PAGE_LEVEL_FIELD = "PageLevelField";
private static final String ALTERNATE_VALUES = "AlternateValues";
private static final String ALTERNATE_VALUE = "AlternateValue";
private static final String FIELD_VALUE_OPTION_LIST = "FieldValueOptionList";
private static final String OLD_FILE_NAME = "OldFileName";
private static final String CONFIDENCE = "Confidence";
private static final String CONFIDENCE_THRESHOLD = "ConfidenceThreshold";
private static final String OCR_CONFIDENCE = "OcrConfidence";
private static final String OCR_CONFIDENCE_THRESHOLD = "OcrConfidenceThreshold";
private static final String FORCE_REVIEW = "ForceReview";
private static final String REVIEWED = "Reviewed";
private static final String COORDINATES_LIST = "CoordinatesList";
private static final String BATCH_LEVEL_FIELDS = "BatchLevelFields";
private static final String BATCH_LEVEL_FIELD = "BatchLevelField";
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment