Skip to content

Instantly share code, notes, and snippets.

@GroupDocsGists
Last active June 27, 2019 15:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save GroupDocsGists/c580d6a015cf388b7762428f01735aa8 to your computer and use it in GitHub Desktop.
Save GroupDocsGists/c580d6a015cf388b7762428f01735aa8 to your computer and use it in GitHub Desktop.
// For complete examples and data files, please go to https://github.com/groupdocs-parser/GroupDocs.Parser-for-.NET
// Create a collection of template fields
TemplateField[] templateFields = new TemplateField[]
{
new TemplateField("FromCompany", TemplateFieldPosition.CreateFixed(new Rectangle(35, 135, 100, 10))),
new TemplateField("FromAddress", TemplateFieldPosition.CreateFixed(new Rectangle(35, 150, 100, 35))),
new TemplateField("FromEmail", TemplateFieldPosition.CreateFixed(new Rectangle(35, 190, 150, 2))),
new TemplateField("ToCompany", TemplateFieldPosition.CreateFixed(new Rectangle(35, 250, 100, 2))),
new TemplateField("ToAddress", TemplateFieldPosition.CreateFixed(new Rectangle(35, 260, 100, 15))),
new TemplateField("ToEmail", TemplateFieldPosition.CreateFixed(new Rectangle(35, 290, 150, 2))),
new TemplateField("InvoiceNumber", TemplateFieldPosition.CreateRegex("Invoice Number")),
new TemplateField("InvoiceNumberValue", TemplateFieldPosition.CreateRelated("InvoiceNumber",TemplateFieldRelatedPositionType.Right, new Size(200, 15))),
new TemplateField("InvoiceOrder", TemplateFieldPosition.CreateRegex("Order Number")),
new TemplateField("InvoiceOrderValue", TemplateFieldPosition.CreateRelated("InvoiceOrder",TemplateFieldRelatedPositionType.Right, new Size(200, 15))),
new TemplateField("InvoiceDate", TemplateFieldPosition.CreateRegex("Invoice Date")),
new TemplateField("InvoiceDateValue", TemplateFieldPosition.CreateRelated("InvoiceDate", TemplateFieldRelatedPositionType.Right, new Size(200, 15))),
new TemplateField("DueDate", TemplateFieldPosition.CreateRegex("Due Date")),
new TemplateField("DueDateValue", TemplateFieldPosition.CreateRelated("DueDate",TemplateFieldRelatedPositionType.Right,new Size(200, 15))),
new TemplateField("TotalDue", TemplateFieldPosition.CreateRegex("Total Due")),
new TemplateField("TotalDueValue", TemplateFieldPosition.CreateRelated("TotalDue",TemplateFieldRelatedPositionType.Right,new Size(200, 15))),
};
// Create a document template
DocumentTemplate template = new DocumentTemplate(templateFields);
// Extract data from PDF
string filePath = Common.GetFilePath(fileName);
DocumentData data = DocumentParser.Default.ParseByTemplate("invoice.pdf", template);
// Get all the fields with "Address" name
IList<DocumentDataField> addressFields = data.GetDataFieldsByName("Address");
if (addressFields.Count == 0)
{
Console.WriteLine("Address not found");
}
else
{
Console.WriteLine("Address");
// Iterate over the fields collection
for (int i = 0; i < addressFields.Count; i++)
{
Console.WriteLine(addressFields[i].Value);
// If it's a related field:
if (addressFields[i].RelatedDataField != null)
{
Console.Write("Linked to ");
Console.WriteLine(addressFields[i].RelatedDataField.Value);
}
}
}
// For complete examples and data files, please go to https://github.com/groupdocs-parser/GroupDocs.Parser-for-Java
// Create a collection of template fields
TemplateField[] templateFields = new TemplateField[]
{
new TemplateField("FromCompany", TemplateFieldPosition.createFixed(new Rectangle(35, 135, 100, 10))),
new TemplateField("FromAddress", TemplateFieldPosition.createFixed(new Rectangle(35, 150, 100, 35))),
new TemplateField("FromEmail", TemplateFieldPosition.createFixed(new Rectangle(35, 190, 150, 2))),
new TemplateField("ToCompany", TemplateFieldPosition.createFixed(new Rectangle(35, 250, 100, 2))),
new TemplateField("ToAddress", TemplateFieldPosition.createFixed(new Rectangle(35, 260, 100, 15))),
new TemplateField("ToEmail", TemplateFieldPosition.createFixed(new Rectangle(35, 290, 150, 2))),
new TemplateField("InvoiceNumber", TemplateFieldPosition.createRegex("Invoice Number")),
new TemplateField("InvoiceNumberValue", TemplateFieldPosition.createRelated(
"InvoiceNumber",
TemplateFieldRelatedPositionType.Right,
new Size(200, 15))),
new TemplateField("InvoiceOrder", TemplateFieldPosition.createRegex("Order Number")),
new TemplateField("InvoiceOrderValue", TemplateFieldPosition.createRelated(
"InvoiceOrder",
TemplateFieldRelatedPositionType.Right,
new Size(200, 15))),
new TemplateField("InvoiceDate", TemplateFieldPosition.createRegex("Invoice Date")),
new TemplateField("InvoiceDateValue", TemplateFieldPosition.createRelated(
"InvoiceDate",
TemplateFieldRelatedPositionType.Right,
new Size(200, 15))),
new TemplateField("DueDate", TemplateFieldPosition.createRegex("Due Date")),
new TemplateField("DueDateValue", TemplateFieldPosition.createRelated(
"DueDate",
TemplateFieldRelatedPositionType.Right,
new Size(200, 15))),
new TemplateField("TotalDue", TemplateFieldPosition.createRegex("Total Due")),
new TemplateField("TotalDueValue", TemplateFieldPosition.createRelated(
"TotalDue",
TemplateFieldRelatedPositionType.Right,
new Size(200, 15))),
};
// Create a document template
DocumentTemplate template = new DocumentTemplate(Arrays.asList(templateFields));
// Extract data from PDF
DocumentData data = DocumentParser.DEFAULT.parseByTemplate("invoice.pdf", template);
// Get all the fields with "Address" name
List<DocumentDataField> addressFields = data.getDataFieldsByName("Address");
if (addressFields.size() == 0) {
System.out.println("Address not fount");
} else {
System.out.println("Address");
// Iterate over the fields collection
for (int i = 0; i < addressFields.size(); i++) {
System.out.println(addressFields.get(i).getValue());
// If it's a related field:
if (addressFields.get(i).getRelatedDataField() != null) {
System.out.print("Linked to ");
System.out.println(addressFields.get(i).getRelatedDataField().getValue());
}
}
}
// For complete examples and data files, please go to https://github.com/groupdocs-parser/GroupDocs.Parser-for-.NET
// Create a collection of template fields
TemplateField[] templateFields = new TemplateField[]
{
new TemplateField("FromCompany", TemplateFieldPosition.CreateFixed(new Rectangle(35, 135, 100, 10))),
new TemplateField("FromAddress", TemplateFieldPosition.CreateFixed(new Rectangle(35, 150, 100, 35))),
new TemplateField("FromEmail", TemplateFieldPosition.CreateFixed(new Rectangle(35, 190, 150, 2))),
new TemplateField("ToCompany", TemplateFieldPosition.CreateFixed(new Rectangle(35, 250, 100, 2))),
new TemplateField("ToAddress", TemplateFieldPosition.CreateFixed(new Rectangle(35, 260, 100, 15))),
new TemplateField("ToEmail", TemplateFieldPosition.CreateFixed(new Rectangle(35, 290, 150, 2))),
new TemplateField("InvoiceNumber", TemplateFieldPosition.CreateRegex("Invoice Number")),
new TemplateField("InvoiceNumberValue", TemplateFieldPosition.CreateRelated("InvoiceNumber",TemplateFieldRelatedPositionType.Right, new Size(200, 15))),
new TemplateField("InvoiceOrder", TemplateFieldPosition.CreateRegex("Order Number")),
new TemplateField("InvoiceOrderValue", TemplateFieldPosition.CreateRelated("InvoiceOrder",TemplateFieldRelatedPositionType.Right, new Size(200, 15))),
new TemplateField("InvoiceDate", TemplateFieldPosition.CreateRegex("Invoice Date")),
new TemplateField("InvoiceDateValue", TemplateFieldPosition.CreateRelated("InvoiceDate", TemplateFieldRelatedPositionType.Right, new Size(200, 15))),
new TemplateField("DueDate", TemplateFieldPosition.CreateRegex("Due Date")),
new TemplateField("DueDateValue", TemplateFieldPosition.CreateRelated("DueDate",TemplateFieldRelatedPositionType.Right,new Size(200, 15))),
new TemplateField("TotalDue", TemplateFieldPosition.CreateRegex("Total Due")),
new TemplateField("TotalDueValue", TemplateFieldPosition.CreateRelated("TotalDue",TemplateFieldRelatedPositionType.Right,new Size(200, 15))),
};
// Create detector parameters for "Details" table
TableAreaDetectorParameters detailsTableParameters = new TableAreaDetectorParameters();
detailsTableParameters.Rectangle = new Rectangle(35, 320, 530, 55);
// Create detector parameters for "Summary" table
TableAreaDetectorParameters summaryTableParameters = new TableAreaDetectorParameters();
summaryTableParameters.Rectangle = new Rectangle(330, 385, 220, 65);
// Create a collection of template tables
TemplateTable[] templateTables = new TemplateTable[]
{
new TemplateTable("details", detailsTableParameters),
new TemplateTable("summary", summaryTableParameters)
};
// Create a document template
DocumentTemplate template = new DocumentTemplate(templateFields, templateTables);
// Extract data from PDF
string filePath = Common.GetFilePath(fileName);
DocumentData data = DocumentParser.Default.ParseByTemplate("invoice.pdf", template);
// Get all the tables
IList<DocumentDataTable> dataTables = data.GetDataTables();
// Iterate over tables
foreach (DocumentDataTable table in dataTables)
{
// Print a table name
Console.WriteLine(table.TableName);
// Iterate over rows
for (int r = 0; r < table.RowCount; r++)
{
// Iterate over columns
for (int c = 0; c < table.ColumnCount; c++)
{
// Print a value of the cell
Console.Write(table[r, c]);
Console.Write(" ");
}
Console.WriteLine();
}
}
// For complete examples and data files, please go to https://github.com/groupdocs-parser/GroupDocs.Parser-for-Java
// Create a collection of template fields
TemplateField[] templateFields = new TemplateField[]
{
new TemplateField("FromCompany", TemplateFieldPosition.createFixed(new Rectangle(35, 135, 100, 10))),
new TemplateField("FromAddress", TemplateFieldPosition.createFixed(new Rectangle(35, 150, 100, 35))),
new TemplateField("FromEmail", TemplateFieldPosition.createFixed(new Rectangle(35, 190, 150, 2))),
new TemplateField("ToCompany", TemplateFieldPosition.createFixed(new Rectangle(35, 250, 100, 2))),
new TemplateField("ToAddress", TemplateFieldPosition.createFixed(new Rectangle(35, 260, 100, 15))),
new TemplateField("ToEmail", TemplateFieldPosition.createFixed(new Rectangle(35, 290, 150, 2))),
new TemplateField("InvoiceNumber", TemplateFieldPosition.createRegex("Invoice Number")),
new TemplateField("InvoiceNumberValue", TemplateFieldPosition.createRelated(
"InvoiceNumber",
TemplateFieldRelatedPositionType.Right,
new Size(200, 15))),
new TemplateField("InvoiceOrder", TemplateFieldPosition.createRegex("Order Number")),
new TemplateField("InvoiceOrderValue", TemplateFieldPosition.createRelated(
"InvoiceOrder",
TemplateFieldRelatedPositionType.Right,
new Size(200, 15))),
new TemplateField("InvoiceDate", TemplateFieldPosition.createRegex("Invoice Date")),
new TemplateField("InvoiceDateValue", TemplateFieldPosition.createRelated(
"InvoiceDate",
TemplateFieldRelatedPositionType.Right,
new Size(200, 15))),
new TemplateField("DueDate", TemplateFieldPosition.createRegex("Due Date")),
new TemplateField("DueDateValue", TemplateFieldPosition.createRelated(
"DueDate",
TemplateFieldRelatedPositionType.Right,
new Size(200, 15))),
new TemplateField("TotalDue", TemplateFieldPosition.createRegex("Total Due")),
new TemplateField("TotalDueValue", TemplateFieldPosition.createRelated(
"TotalDue",
TemplateFieldRelatedPositionType.Right,
new Size(200, 15))),
};
// Create detector parameters for "Details" table
TableAreaDetectorParameters detailsTableParameters = new TableAreaDetectorParameters();
detailsTableParameters.setRectangle(new Rectangle(35, 320, 530, 55));
// Create detector parameters for "Summary" table
TableAreaDetectorParameters summaryTableParameters = new TableAreaDetectorParameters();
summaryTableParameters.setRectangle(new Rectangle(330, 385, 220, 65));
// Create a collection of template tables
TemplateTable[] templateTables = new TemplateTable[]
{
new TemplateTable("details", detailsTableParameters),
new TemplateTable("summary", summaryTableParameters)
};
// Create a document template
DocumentTemplate template = new DocumentTemplate(Arrays.asList(templateFields), Arrays.asList(templateTables));
// Extract data from PDF
DocumentData data = DocumentParser.DEFAULT.parseByTemplate("Invoice.pdf", template);
// Get all the tables
List<DocumentDataTable> dataTables = data.getDataTables();
// Iterate over tables
for (DocumentDataTable table : dataTables) {
// Print a table name
System.out.println(table.getTableName());
// Iterate over rows
for (int r = 0; r < table.getRowCount(); r++) {
// Iterate over columns
for (int c = 0; c < table.getColumnCount(); c++) {
// Print a value of the cell
System.out.print(table.get_Item(r, c));
System.out.print(" ");
}
System.out.println();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment