-
-
Save jvymazal/c2a3b04c1b2f3adb2d60293c1b0d83c1 to your computer and use it in GitHub Desktop.
Sample app to parse file using GroupDocs.Parser API
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using GroupDocs.Parser; | |
using GroupDocs.Parser.Options; | |
using System.Xml; | |
using System.Linq; | |
public class parserPDF | |
{ | |
public static void Parse(string target) | |
{ | |
Parser parser = new Parser(target); | |
Features features = parser.Features; | |
IDocumentInfo info = parser.GetDocumentInfo(); | |
Console.WriteLine(string.Format("FileType: {0}", info.FileType)); | |
Console.WriteLine(string.Format("PageCount: {0}", info.PageCount)); | |
Console.WriteLine(string.Format("Size: {0}", info.Size)); | |
Console.WriteLine(" ----"); | |
if (features.Text) | |
{ | |
Console.WriteLine("Text:"); | |
Console.WriteLine(parser.GetText().ReadToEnd()); | |
} | |
if (features.Container) | |
{ | |
Console.WriteLine("Container (" + parser.GetContainer().Count() + "):"); | |
foreach (GroupDocs.Parser.Data.ContainerItem item in parser.GetContainer()) | |
{ | |
Console.WriteLine(" Name: " + item.Name); | |
Console.WriteLine(" Size: " + item.Size); | |
Console.WriteLine(" ?: " + item.ToString()); | |
} | |
} | |
if (features.Toc) | |
{ | |
Console.WriteLine("Toc (" + parser.GetToc().Count() + "):"); | |
foreach (GroupDocs.Parser.Data.TocItem tocItem in parser.GetToc()) | |
{ | |
Console.WriteLine(tocItem.ExtractText().ReadToEnd()); | |
} | |
} | |
if (features.Images) | |
{ | |
Console.WriteLine("Images (" + parser.GetImages().Count() + "):"); | |
int i = 0; | |
foreach (GroupDocs.Parser.Data.PageImageArea image in parser.GetImages()) | |
{ | |
i++; | |
image.Save(target + "_extracted-" + i + "." + image.FileType.Extension); | |
} | |
} | |
if (features.Metadata) | |
{ | |
Console.WriteLine("Metadata (" + parser.GetMetadata().Count() + "):"); | |
foreach (GroupDocs.Parser.Data.MetadataItem metaItem in parser.GetMetadata()) | |
{ | |
Console.WriteLine("Name: " + metaItem.Name + " || Value: " + metaItem.Value); | |
} | |
} | |
if (features.Structure) | |
{ | |
Console.WriteLine("Structure:"); | |
XmlReader reader = parser.GetStructure(); | |
while (reader.Read()) | |
{ | |
switch (reader.NodeType) | |
{ | |
case XmlNodeType.Element: | |
Console.WriteLine("Start Element {0}", reader.Name); | |
break; | |
case XmlNodeType.Text: | |
Console.WriteLine("Text Node: {0}", reader.Value); | |
break; | |
case XmlNodeType.EndElement: | |
Console.WriteLine("End Element {0}", reader.Name); | |
break; | |
default: | |
Console.WriteLine("Other node {0} with value {1}", | |
reader.NodeType, reader.Value); | |
break; | |
} | |
} | |
} | |
if (features.TextAreas) | |
{ | |
Console.WriteLine("TextAreas (" + parser.GetTextAreas().Count() + "):"); | |
foreach (GroupDocs.Parser.Data.PageTextArea page in parser.GetTextAreas()) | |
{ | |
Console.WriteLine(page.Text); | |
} | |
} | |
if (features.TextPage) | |
Console.WriteLine("TextPage"); | |
if (features.Search) | |
Console.WriteLine("Search"); | |
if (features.Tables) | |
Console.WriteLine("Tables"); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment