Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aspose-com-gists/97a90eacc17f509deafc22fd5721a59a to your computer and use it in GitHub Desktop.
Save aspose-com-gists/97a90eacc17f509deafc22fd5721a59a to your computer and use it in GitHub Desktop.
Extract Data from Tables in PDF using Java
// Load source PDF document
com.aspose.pdf.Document pdfDocument = new com.aspose.pdf.Document("sample.pdf");
// Get reference of the desired page
com.aspose.pdf.Page page = pdfDocument.getPages().get_Item(1);
// Create an annotation selector
com.aspose.pdf.AnnotationSelector annotationSelector = new com.aspose.pdf.AnnotationSelector(
new com.aspose.pdf.SquareAnnotation(page, com.aspose.pdf.Rectangle.getTrivial()));
// Get list of annotations
java.util.List<com.aspose.pdf.Annotation> list = annotationSelector.getSelected();
if (list.size() == 0) {
System.out.println("Marked tables not found..");
return;
}
// Create a square annotation
com.aspose.pdf.SquareAnnotation squareAnnotation = (com.aspose.pdf.SquareAnnotation) list.get(0);
// Create table absorber and visit page
com.aspose.pdf.TableAbsorber absorber = new com.aspose.pdf.TableAbsorber();
absorber.visit(page);
// Iterate through tables
for (com.aspose.pdf.AbsorbedTable table : absorber.getTableList()) {
{
// Check is table exists
boolean isInRegion = (squareAnnotation.getRect().getLLX() < table.getRectangle().getLLX())
&& (squareAnnotation.getRect().getLLY() < table.getRectangle().getLLY())
&& (squareAnnotation.getRect().getURX() > table.getRectangle().getURX())
&& (squareAnnotation.getRect().getURY() > table.getRectangle().getURY());
if (isInRegion) {
for (com.aspose.pdf.AbsorbedRow row : table.getRowList()) {
{
for (com.aspose.pdf.AbsorbedCell cell : row.getCellList()) {
for (com.aspose.pdf.TextFragment fragment : cell.getTextFragments()) {
StringBuilder sb = new StringBuilder();
for (com.aspose.pdf.TextSegment seg : fragment.getSegments())
sb.append(seg.getText());
System.out.print(sb.toString() + "|");
}
}
System.out.println();
}
}
}
}
}
// Load source PDF document
com.aspose.pdf.Document pdfDocument = new com.aspose.pdf.Document("sample.pdf");
// Create a table absrober
com.aspose.pdf.TableAbsorber absorber = new com.aspose.pdf.TableAbsorber();
// Scan pages
for (com.aspose.pdf.Page page : pdfDocument.getPages()) {
// Vist page
absorber.visit(page);
// Iterate through tables
for (com.aspose.pdf.AbsorbedTable table : absorber.getTableList()) {
System.out.println("Table");
// Iterate throught list of rows
for (com.aspose.pdf.AbsorbedRow row : table.getRowList()) {
// Iterate throught list of cell
for (com.aspose.pdf.AbsorbedCell cell : row.getCellList()) {
// Iterate through text fragments
for (com.aspose.pdf.TextFragment fragment : cell.getTextFragments()) {
StringBuilder sb = new StringBuilder();
for (com.aspose.pdf.TextSegment seg : fragment.getSegments())
sb.append(seg.getText());
System.out.print(sb.toString() + "|");
}
}
System.out.println();
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment