Skip to content

Instantly share code, notes, and snippets.

@JerryNixon
Created September 10, 2019 17:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save JerryNixon/b86792004c4a2dd7a74c54a0645b9e3c to your computer and use it in GitHub Desktop.
Save JerryNixon/b86792004c4a2dd7a74c54a0645b9e3c to your computer and use it in GitHub Desktop.
A test that we tried against artwork. Sort of works. Abandoned though.
static void Main(string[] args)
{
SetLicense();
var path = Path.Combine(Environment.CurrentDirectory, "artwork1.pdf");
using (var doc = new Document(path))
{
var tableOptions = new TextSearchOptions(false)
{
IgnoreShadowText = true,
SearchForTextRelatedGraphics = false,
UseFontEngineEncoding = false
};
var tableAbsorber = new TableAbsorber(tableOptions);
Console.WriteLine("Starting the TableAbsorber");
var page = doc.Pages.First();
tableAbsorber.Visit(page);
Console.WriteLine("Done with the TableAbsorber");
var tables = tableAbsorber.TableList
.Where(x => x.Rectangle.Width < 100)
.Where(x => x.Rectangle.Height > 100);
Console.WriteLine($"There are {tables.Count()} tables out of {tableAbsorber.TableList.Count()}.");
Console.Read();
var index = 0;
foreach (var table in tables)
{
Console.WriteLine($"{++index} of {tables.Count()}");
Console.WriteLine($"{table.Rectangle.Width} by {table.Rectangle.Height}");
DrawRectangleOnPage(table.Rectangle, page);
var text = ExtractText(table.Rectangle, page);
if (!string.IsNullOrEmpty(text))
{
var search = Regex.Replace(text.ToLower(), "[^a-z0-9:]", string.Empty);
Console.WriteLine(search);
}
else
{
Console.WriteLine("No text.");
}
}
SaveDoc(doc);
}
}
private static string ExtractText(Aspose.Pdf.Rectangle rectangle, Page page)
{
var options = new TextSearchOptions(rectangle)
{
LimitToPageBounds = true,
IsRegularExpressionUsed = false,
IgnoreShadowText = false,
};
var absorber = new TextAbsorber(options);
page.Accept(absorber);
return absorber.Text;
}
private static void DrawRectangleOnPage(Rectangle rectangle, Page page)
{
page.Contents.Add(new Aspose.Pdf.Operators.GSave());
page.Contents.Add(new Aspose.Pdf.Operators.ConcatenateMatrix(1, 0, 0, 1, 0, 0));
page.Contents.Add(new Aspose.Pdf.Operators.SetRGBColorStroke(0, 1, 0));
page.Contents.Add(new Aspose.Pdf.Operators.SetLineWidth(2));
page.Contents.Add(
new Aspose.Pdf.Operators.Re(rectangle.LLX,
rectangle.LLY,
rectangle.Width,
rectangle.Height));
page.Contents.Add(new Aspose.Pdf.Operators.ClosePathStroke());
page.Contents.Add(new Aspose.Pdf.Operators.GRestore());
}
private static void DrawPolygonOnPage(Point[] polygon, Page page)
{
page.Contents.Add(new Aspose.Pdf.Operators.GSave());
page.Contents.Add(new Aspose.Pdf.Operators.ConcatenateMatrix(1, 0, 0, 1, 0, 0));
page.Contents.Add(new Aspose.Pdf.Operators.SetRGBColorStroke(0, 0, 1));
page.Contents.Add(new Aspose.Pdf.Operators.SetLineWidth(1));
page.Contents.Add(new Aspose.Pdf.Operators.MoveTo(polygon[0].X, polygon[0].Y));
for (var i = 1; i < polygon.Length; i++)
{
page.Contents.Add(new Aspose.Pdf.Operators.LineTo(polygon[i].X, polygon[i].Y));
}
page.Contents.Add(new Aspose.Pdf.Operators.LineTo(polygon[0].X, polygon[0].Y));
page.Contents.Add(new Aspose.Pdf.Operators.ClosePathStroke());
page.Contents.Add(new Aspose.Pdf.Operators.GRestore());
}
private static void SetLicense()
{
var license = new Aspose.Pdf.License();
license.SetLicense("Aspose.Pdf.lic");
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment