Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aspose-com-gists/0fbf54fdf5406ad739b76e02e3a2f00e to your computer and use it in GitHub Desktop.
Save aspose-com-gists/0fbf54fdf5406ad739b76e02e3a2f00e to your computer and use it in GitHub Desktop.
Read the complete article about extracting data from tables in PDF files using C++ by visiting the following link.
https://blog.aspose.com/2021/07/14/extract-data-from-tables-in-pdf-files-using-cpp/
// Load the PDF document
auto pdfDocument = MakeObject<Document>(u"SourceDirectory\\PDF\\Table_input3.pdf");
// Iterate through the pages of the document
for (auto page : pdfDocument->get_Pages())
{
// Create an instance of the TableAbsorber class
auto absorber = MakeObject<TableAbsorber>();
absorber->Visit(page);
// Iterate through the tables
for (auto table : absorber->get_TableList())
{
Console::WriteLine(u"Table");
// Iterate through the rows
for (auto row : table->get_RowList())
{
// Iterate throught the cells
for (auto cell : row->get_CellList())
{
// Iterate throught the text fragments
for (auto fragment : cell->get_TextFragments())
{
String string = u"";
// Iterate through the text segments
for (auto seg : fragment->get_Segments())
{
// Get the text
string = String::Concat(string, seg->get_Text());
}
// Print the text
Console::WriteLine(string);
}
}
Console::WriteLine();
}
}
}
// Load the PDF document
auto pdfDocument = MakeObject<Document>(u"SourceDirectory\\PDF\\Table_input4.pdf");
// Get the first page of the document
auto page = pdfDocument->get_Pages()->idx_get(1);
// Iterate through the annotations on the page
for (auto annotation : page->get_Annotations())
{
// Check the annotation type
if (annotation->get_AnnotationType() == Annotations::AnnotationType::Square)
{
System::SharedPtr<SquareAnnotation> squareAnnotation = DynamicCast<SquareAnnotation>(annotation);
// Create an instance of the TableAbsorber class
auto absorber = MakeObject<TableAbsorber>();
absorber->Visit(page);
// Iterate through the tables
for (auto table : absorber->get_TableList())
{
// Check if the table is in the region
if ((squareAnnotation->get_Rect()->get_LLX() < table->get_Rectangle()->get_LLX()) &&
(squareAnnotation->get_Rect()->get_LLY() < table->get_Rectangle()->get_LLY()) &&
(squareAnnotation->get_Rect()->get_URX() > table->get_Rectangle()->get_URX()) &&
(squareAnnotation->get_Rect()->get_URY() > table->get_Rectangle()->get_URY())
)
{
// Iterate through the rows
for (auto row : table->get_RowList())
{
// Iterate throught the cells
for (auto cell : row->get_CellList())
{
// Iterate throught the text fragments
for (auto fragment : cell->get_TextFragments())
{
String string = u"";
// Iterate through the text segments
for (auto seg : fragment->get_Segments())
{
// Get the text
string = String::Concat(string, seg->get_Text());
}
// Print the text
Console::WriteLine(string);
}
}
Console::WriteLine();
}
}
}
break;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment