Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Extract Images from Word, Excel, PowerPoint, PDF Documents Programmatically
// Extract Images from Word, Excel, PPT, PDF in C# using GroupDocs.Parser for .NET
using (Parser parser = new Parser("path/document.pdf"))
{
IEnumerable<PageImageArea> images = parser.GetImages();
ImageOptions options = new ImageOptions(ImageFormat.Png);
int imageNumber = 0;
// Iterate over retrieved images
foreach (PageImageArea image in images)
{
// Save Image and print page index, rectangle and image type:
Console.WriteLine(string.Format("Page: {0}, R: {1}, Type: {2}", image.Page.Index, image.Rectangle, image.FileType));
image.Save("imageFilePath/image-" + imageNumber.ToString() + ".png", options);
imageNumber++;
}
}
// Extract Images from Word, Excel, PowerPoint, PDF Documents Programmatically using GroupDocs.Parser for Java
try (Parser parser = new Parser("path/document.pdf")) {
// Extract images
Iterable<PageImageArea> images = parser.getImages();
// Create the options to save images in PNG format
ImageOptions options = new ImageOptions(ImageFormat.Png);
int imageNumber = 0;
// Iterate over images and Save
for (PageImageArea image : images) {
// Print the page index, rectangle and image file type:
System.out.println(String.format("Page: %d, R: %s, Type: %s", image.getPage().getIndex(),
image.getRectangle(), image.getFileType()));
image.save(String.format("filesPath/image_%d.png", imageNumber), options);
imageNumber++;
}
}
// Extract Images from specific page of Word, Excel, PowerPoint, PDF in C# using GroupDocs.Parser for .NET
using (Parser parser = new Parser("path/document.pdf"))
{
// Get the document info
IDocumentInfo documentInfo = parser.GetDocumentInfo();
ImageOptions options = new ImageOptions(ImageFormat.Png);
int imageNumber = 0;
// Iterate over pages
for (int pageIndex = 0; pageIndex < documentInfo.PageCount; pageIndex++)
{
// Print a page number
Console.WriteLine(string.Format("Page {0}/{1}", pageIndex + 1, documentInfo.PageCount));
// Iterate over images. Ignoring null-check in the example
foreach (PageImageArea image in parser.GetImages(pageIndex))
{
// Print a rectangle and image type
Console.WriteLine(string.Format("R: {0}, Text: {1}", image.Rectangle, image.FileType));
image.Save("imageFilePath/image-" + imageNumber.ToString() + ".png", options);
imageNumber++;
}
}
}
// Extract Images from specific page of Word, Excel, PowerPoint, PDF in Java using GroupDocs.Parser
try (Parser parser = new Parser("path/document.pdf"")) {
// Get the document info
IDocumentInfo documentInfo = parser.getDocumentInfo();
// Create the options to save images in PNG format
ImageOptions options = new ImageOptions(ImageFormat.Jpeg);
int imageNumber = 0;
// Iterate over pages
for (int pageIndex = 0; pageIndex < documentInfo.getPageCount(); pageIndex++) {
// Print Page Numbers
System.out.println(String.format("Page %d/%d", pageIndex + 1, documentInfo.getPageCount()));
// Iterate over images - Ignoring NULL-Checking in the examples
for (PageImageArea image : parser.getImages(pageIndex)) {
// Print Image Information and Save file
System.out.println(String.format("R: %s, Text: %s", image.getRectangle(), image.getFileType()));
image.save(String.format("filesPath/image_%d.jpeg", imageNumber), options);
imageNumber++;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.