Last active
January 27, 2021 07:31
-
-
Save GroupDocsGists/c9cde916cd2a125b834c8f2c13dacc82 to your computer and use it in GitHub Desktop.
Create Template and Extract Data from Invoice or Receipt in C#
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Create detector parameters for "Details" table | |
TemplateTableParameters detailsTableParameters = new TemplateTableParameters(new Rectangle(new Point(35, 320), new Size(530, 55)), null); | |
// Create detector parameters for "Summary" table | |
TemplateTableParameters summaryTableParameters = new TemplateTableParameters(new Rectangle(new Point(330, 385), new Size(220, 65)), null); | |
// Create a collection of template items | |
TemplateItem[] templateItems = new TemplateItem[] | |
{ | |
new TemplateField(new TemplateFixedPosition(new Rectangle(new Point(35, 135), new Size(100, 10))), "FromCompany"), | |
new TemplateField(new TemplateFixedPosition(new Rectangle(new Point(35, 150), new Size(100, 35))), "FromAddress"), | |
new TemplateField(new TemplateFixedPosition(new Rectangle(new Point(35, 190), new Size(150, 2))), "FromEmail"), | |
new TemplateField(new TemplateFixedPosition(new Rectangle(new Point(35, 250), new Size(100, 2))), "ToCompany"), | |
new TemplateField(new TemplateFixedPosition(new Rectangle(new Point(35, 260), new Size(100, 15))), "ToAddress"), | |
new TemplateField(new TemplateFixedPosition(new Rectangle(new Point(35, 290), new Size(150, 2))), "ToEmail"), | |
new TemplateField(new TemplateRegexPosition("Invoice Number"), "InvoiceNumber"), | |
new TemplateField(new TemplateLinkedPosition( | |
"InvoiceNumber", | |
new Size(200, 15), | |
new TemplateLinkedPositionEdges(false, false, true, false)), | |
"InvoiceNumberValue"), | |
new TemplateField(new TemplateRegexPosition("Order Number"), "InvoiceOrder"), | |
new TemplateField(new TemplateLinkedPosition( | |
"InvoiceOrder", | |
new Size(200, 15), | |
new TemplateLinkedPositionEdges(false, false, true, false)), | |
"InvoiceOrderValue"), | |
new TemplateField(new TemplateRegexPosition("Invoice Date"), "InvoiceDate"), | |
new TemplateField(new TemplateLinkedPosition( | |
"InvoiceDate", | |
new Size(200, 15), | |
new TemplateLinkedPositionEdges(false, false, true, false)), | |
"InvoiceDateValue"), | |
new TemplateField(new TemplateRegexPosition("Due Date"), "DueDate"), | |
new TemplateField(new TemplateLinkedPosition( | |
"DueDate", | |
new Size(200, 15), | |
new TemplateLinkedPositionEdges(false, false, true, false)), | |
"DueDateValue"), | |
new TemplateField(new TemplateRegexPosition("Total Due"), "TotalDue"), | |
new TemplateField(new TemplateLinkedPosition( | |
"TotalDue", | |
new Size(200, 15), | |
new TemplateLinkedPositionEdges(false, false, true, false)), | |
"TotalDueValue"), | |
new TemplateTable(detailsTableParameters, "details", null), | |
new TemplateTable(summaryTableParameters, "summary", null) | |
}; | |
// Create a document template | |
Template template = new Template(templateItems); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Create an instance of Parser class | |
using (Parser parser = new Parser("invoice.pdf")) | |
{ | |
// Parse the document by the template | |
DocumentData data = parser.ParseByTemplate(template); | |
// Print all extracted data | |
for (int i = 0; i < data.Count; i++) | |
{ | |
Console.Write(data[i].Name + ": "); | |
// Check if the field is a table | |
PageTableArea area = data[i].PageArea as PageTableArea; | |
if (area == null) | |
{ | |
PageArea pageArea = data[i].PageArea; | |
Console.WriteLine(pageArea.ToString()); | |
} | |
else | |
{ | |
// Iterate via table rows | |
for (int row = 0; row < area.RowCount; row++) | |
{ | |
// Iterate via table columns | |
for (int column = 0; column < area.ColumnCount; column++) | |
{ | |
// Get the cell value | |
PageTextArea cellValue = area[row, column].PageArea as PageTextArea; | |
// Print the space between columns | |
if (column > 0) | |
{ | |
Console.Write("\t"); | |
} | |
// Print the cell value | |
Console.Write(cellValue == null ? "" : cellValue.Text); | |
} | |
// Print new line | |
Console.WriteLine(); | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment