Created
September 10, 2019 17:35
-
-
Save JerryNixon/b86792004c4a2dd7a74c54a0645b9e3c to your computer and use it in GitHub Desktop.
A test that we tried against artwork. Sort of works. Abandoned though.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
static void Main(string[] args) | |
{ | |
SetLicense(); | |
var path = Path.Combine(Environment.CurrentDirectory, "artwork1.pdf"); | |
using (var doc = new Document(path)) | |
{ | |
var tableOptions = new TextSearchOptions(false) | |
{ | |
IgnoreShadowText = true, | |
SearchForTextRelatedGraphics = false, | |
UseFontEngineEncoding = false | |
}; | |
var tableAbsorber = new TableAbsorber(tableOptions); | |
Console.WriteLine("Starting the TableAbsorber"); | |
var page = doc.Pages.First(); | |
tableAbsorber.Visit(page); | |
Console.WriteLine("Done with the TableAbsorber"); | |
var tables = tableAbsorber.TableList | |
.Where(x => x.Rectangle.Width < 100) | |
.Where(x => x.Rectangle.Height > 100); | |
Console.WriteLine($"There are {tables.Count()} tables out of {tableAbsorber.TableList.Count()}."); | |
Console.Read(); | |
var index = 0; | |
foreach (var table in tables) | |
{ | |
Console.WriteLine($"{++index} of {tables.Count()}"); | |
Console.WriteLine($"{table.Rectangle.Width} by {table.Rectangle.Height}"); | |
DrawRectangleOnPage(table.Rectangle, page); | |
var text = ExtractText(table.Rectangle, page); | |
if (!string.IsNullOrEmpty(text)) | |
{ | |
var search = Regex.Replace(text.ToLower(), "[^a-z0-9:]", string.Empty); | |
Console.WriteLine(search); | |
} | |
else | |
{ | |
Console.WriteLine("No text."); | |
} | |
} | |
SaveDoc(doc); | |
} | |
} | |
private static string ExtractText(Aspose.Pdf.Rectangle rectangle, Page page) | |
{ | |
var options = new TextSearchOptions(rectangle) | |
{ | |
LimitToPageBounds = true, | |
IsRegularExpressionUsed = false, | |
IgnoreShadowText = false, | |
}; | |
var absorber = new TextAbsorber(options); | |
page.Accept(absorber); | |
return absorber.Text; | |
} | |
private static void DrawRectangleOnPage(Rectangle rectangle, Page page) | |
{ | |
page.Contents.Add(new Aspose.Pdf.Operators.GSave()); | |
page.Contents.Add(new Aspose.Pdf.Operators.ConcatenateMatrix(1, 0, 0, 1, 0, 0)); | |
page.Contents.Add(new Aspose.Pdf.Operators.SetRGBColorStroke(0, 1, 0)); | |
page.Contents.Add(new Aspose.Pdf.Operators.SetLineWidth(2)); | |
page.Contents.Add( | |
new Aspose.Pdf.Operators.Re(rectangle.LLX, | |
rectangle.LLY, | |
rectangle.Width, | |
rectangle.Height)); | |
page.Contents.Add(new Aspose.Pdf.Operators.ClosePathStroke()); | |
page.Contents.Add(new Aspose.Pdf.Operators.GRestore()); | |
} | |
private static void DrawPolygonOnPage(Point[] polygon, Page page) | |
{ | |
page.Contents.Add(new Aspose.Pdf.Operators.GSave()); | |
page.Contents.Add(new Aspose.Pdf.Operators.ConcatenateMatrix(1, 0, 0, 1, 0, 0)); | |
page.Contents.Add(new Aspose.Pdf.Operators.SetRGBColorStroke(0, 0, 1)); | |
page.Contents.Add(new Aspose.Pdf.Operators.SetLineWidth(1)); | |
page.Contents.Add(new Aspose.Pdf.Operators.MoveTo(polygon[0].X, polygon[0].Y)); | |
for (var i = 1; i < polygon.Length; i++) | |
{ | |
page.Contents.Add(new Aspose.Pdf.Operators.LineTo(polygon[i].X, polygon[i].Y)); | |
} | |
page.Contents.Add(new Aspose.Pdf.Operators.LineTo(polygon[0].X, polygon[0].Y)); | |
page.Contents.Add(new Aspose.Pdf.Operators.ClosePathStroke()); | |
page.Contents.Add(new Aspose.Pdf.Operators.GRestore()); | |
} | |
private static void SetLicense() | |
{ | |
var license = new Aspose.Pdf.License(); | |
license.SetLicense("Aspose.Pdf.lic"); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment