Skip to content

Instantly share code, notes, and snippets.

@marcgeld
Created February 25, 2016 10:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save marcgeld/c4fddd5117fc36cafc1e to your computer and use it in GitHub Desktop.
Save marcgeld/c4fddd5117fc36cafc1e to your computer and use it in GitHub Desktop.
Groovy script that extracts images from a pdf-file
#! /usr/bin/env groovy
//@GrabConfig(systemClassLoader=true)
@Grab(group='ch.qos.logback', module='logback-classic', version='1.1.2')
@Grab(group='org.apache.pdfbox', module='pdfbox', version='2.0.0-RC3')
import org.apache.pdfbox.pdfwriter.*
import org.apache.pdfbox.pdmodel.*
import org.apache.pdfbox.pdmodel.font.*
import org.apache.pdfbox.pdmodel.edit.*
import org.apache.pdfbox.pdmodel.graphics.*
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject
import java.awt.image.BufferedImage;
import javax.imageio.ImageIO;
//import org.apache.pdfbox.pdmodel.graphics.PDXObject
import org.apache.pdfbox.cos.*
def appName = this.getClass().getName()
def fileType = "png"
def cli = new CliBuilder(usage:"${appName} --file <file>")
cli.with {
f(longOpt: 'file', 'database url', args: 1, required: true)
h(longOpt: 'help', 'Print help', required: false)
}
def options = cli.parse(args)
if (!options) return
if (options.h) cli.usage()
println "open file: '" + options.f +"'"
PDDocument doc = null
File inputFile = new File(options.f)
File outputDir = inputFile.getParentFile()
String outfilenameTemplate = stripExt(inputFile.getName()).replaceAll(" ", "_").toLowerCase() + "-img"
try
{
doc = PDDocument.load( inputFile )
PDPageTree pageTree = doc.getDocumentCatalog().getPages()
String fileName = inputFile.getName().replace(".pdf", "_cover");
int totalImages = -1;
for ( PDPage page : pageTree.iterator() ) {
PDResources pdResources = page.getResources();
for ( COSName xObjCosName : pdResources.getXObjectNames() ) {
//println xObjCosName
PDXObject pdxObj = pdResources.getXObject(xObjCosName)
if ( pdxObj instanceof PDImageXObject ) {
PDImageXObject pdImageXObject = (PDImageXObject) pdxObj
BufferedImage bufferedImage = pdImageXObject.getImage()
println(bufferedImage)
File outFile = new File( outputDir, outfilenameTemplate + totalImages + "." + fileType)
ImageIO.write( bufferedImage, fileType, outFile );
println( "writtten: " + outFile )
totalImages++;
}
}
}
}
finally
{
if( doc != null )
{
doc.close();
}
}
static String stripExt (String str) {
if (str == null) return null;
int pos = str.lastIndexOf(".");
if (pos == -1) return str;
return str.substring(0, pos);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment