Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
C# example to extract images from a PDF
/* -----------------------------------------------------------------------
* <copyright file="makoimageextract.cs" company="Global Graphics Software Ltd">
* Copyright (c) 2021 Global Graphics Software Ltd. All rights reserved.
* </copyright>
* <summary>
* This example is provided on an "as is" basis and without warranty of any kind.
* Global Graphics Software Ltd. does not warrant or make any representations
* regarding the use or results of use of this example.
* </summary>
* -----------------------------------------------------------------------
*/
using System;
using System.IO;
using JawsMako;
namespace MakoImageExtract
{
class Program
{
static void Main(string[] args)
{
// Get arguments
var parameters = new Parameters("BP Solar information.pdf", string.Empty, "image", ".png");
// Start Mako
using var mako = IJawsMako.create();
IJawsMako.enableAllFeatures(mako);
// Open document
using var document = IPDFInput.create(mako).open(parameters.InputFullPath).getDocument();
// Process document
int documentImageCount = 0;
for (uint pageIndex = 0; pageIndex < document.getNumPages(); pageIndex++)
{
findImagesOnPage(mako, parameters, document, pageIndex, out var pageImageCount);
documentImageCount += pageImageCount;
}
Console.WriteLine($"File {parameters.InputFullPath} contains {documentImageCount} images on {document.getNumPages()} pages.");
}
public static void findImagesOnPage(IJawsMako mako, Parameters parameters, IDocument document,
uint pageIndex, out int imageCount)
{
// Get page
using var page = document.getPage(pageIndex);
// Find the path nodes (images are painted onto paths using an image brush).
using var pathNodes = page.getContent().findChildrenOfType(eDOMNodeType.eDOMPathNode, true);
Console.WriteLine($"-- Examining {pathNodes.size()} path nodes on page {pageIndex + 1}");
imageCount = 0;
for (uint i = 0; i < pathNodes.size(); i++)
{
using var path = IDOMPathNode.fromRCObject(pathNodes[i].toRCObject());
// Get the brush used to fill this path
using IDOMBrush brush = path.getFill();
if (brush != null)
{
// See if this brush is an image brush or a masked image brush
var imageBrush = IDOMImageBrush.fromRCObject(brush.toRCObject());
using var maskedBrush = IDOMMaskedBrush.fromRCObject(brush.toRCObject());
if (imageBrush != null)
{
// If it's a masked image, get the image from the brush that is used to paint the content, rather than the mask
if (maskedBrush != null)
{
using IDOMBrush contentBrush = maskedBrush.getBrush();
if (contentBrush != null)
imageBrush = IDOMImageBrush.fromRCObject(contentBrush.toRCObject());
}
using var image = imageBrush.getImageSource();
if (image != null)
{
var outputFilePath = Path.Combine(parameters.OutputPath,
$"{parameters.OutputBasename}_p{pageIndex + 1}_i{(imageCount++) + 1}{parameters.OutputExt}");
// Dump the image
switch (parameters.OutputExt)
{
case ".jpg":
IDOMJPEGImage.encode(mako, image,
IOutputStream.createToFile(mako, outputFilePath));
break;
case ".tif":
IDOMTIFFImage.encode(mako, image,
IOutputStream.createToFile(mako, outputFilePath));
break;
case ".png":
IDOMPNGImage.encode(mako, image,
IOutputStream.createToFile(mako, outputFilePath));
break;
}
Console.WriteLine($"An image was saved as {outputFilePath}");
}
}
}
}
}
public readonly struct Parameters
{
public Parameters(string ifp, string op, string ob, string oe)
{
InputFullPath = ifp;
OutputPath = op;
OutputBasename = ob;
OutputExt = oe;
}
public string InputFullPath { get; }
public string OutputPath { get; }
public string OutputBasename { get; }
public string OutputExt { get; }
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment