Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save JoelGeraci-Datalogics/fb53bb5509e1f7eaeaee to your computer and use it in GitHub Desktop.
Save JoelGeraci-Datalogics/fb53bb5509e1f7eaeaee to your computer and use it in GitHub Desktop.
"Splits" a document by creating a new set of PDF files by extracting pages from the source file based on a maximum number of pages in the new files. The original file is unmodified.
/*
* Copyright Datalogics, Inc. 2015
*/
package pdfjt.cookbook.document;
import com.adobe.internal.io.ByteReader;
import com.adobe.internal.io.ByteWriter;
import com.adobe.internal.io.InputStreamByteReader;
import com.adobe.pdfjt.pdf.document.PDFDocument;
import com.adobe.pdfjt.pdf.document.PDFOpenOptions;
import com.adobe.pdfjt.pdf.document.PDFSaveFullOptions;
import com.adobe.pdfjt.pdf.page.PDFPage;
import com.adobe.pdfjt.services.manipulations.PMMOptions;
import com.adobe.pdfjt.services.manipulations.PMMService;
import java.io.InputStream;
import java.net.URL;
import java.net.URLConnection;
import pdfjt.util.SampleFileServices;
/**
* "Splits" a document by creating a new set of PDF files by extracting pages
* from the source file based on a maximum number of pages in the new files.
* The original file is unmodified.
*
* What you need to know first:
* Not much really. The PMMService class was designed to make extracting pages
* super-simple.
*
*/
public class SplitDocumentBasedOnNumberOfPages {
private static final String inputPDFURL = "http://dev.datalogics.com/cookbook/document/AcrobatDC_PDFCreationSettings.pdf";
private static final String outputDir = "cookbook/Document/output/split/";
private static final String baseFileName = "Extracted_Pages_";
private static final int maxPageCount = 25;
public static void main(String[] args) throws Exception {
/*
* Read in PDF input file
*/
URLConnection connection = new URL(inputPDFURL).openConnection();
connection.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11");
connection.connect();
InputStream fis = connection.getInputStream();
ByteReader byteReader = new InputStreamByteReader(fis);
PDFDocument pdfDocument = PDFDocument.newInstance(byteReader, PDFOpenOptions.newInstance());
// Set up the Page Manager Service
PMMService pmmService = new PMMService(pdfDocument);
PMMOptions pmmOptions = PMMOptions.newInstanceAll();
PDFOpenOptions pdfOpenOptions = PDFOpenOptions.newInstance();
// Get the total number of pages
int numPages = pdfDocument.requirePages().getNumPages();
int numSubFiles = (int) Math.ceil(numPages/(double) maxPageCount);
for (int i = 0; i < numSubFiles; i++) {
PDFPage startPage = pdfDocument.requirePages().getPage(i*maxPageCount);
PDFDocument outputDocument = pmmService.extractPages(startPage, maxPageCount, pmmOptions, pdfOpenOptions);
// Construct the new filename
String outputFileName = constructOutputFileName(i, numSubFiles, numPages);
// Save it.
ByteWriter outputFile = SampleFileServices.getRAFByteWriter(outputDir + outputFileName);
outputDocument.save(outputFile, PDFSaveFullOptions.newInstance());
System.out.println("Created: " + outputFileName);
}
System.out.println("Done!");
}
/**
* Constructs a filename for the output PDF file.
*
* @param i
* The loop iteration.
* @param numSubFiles
* The number of iterations iteration.
* @param numPages
* The total number of pages in the file.
* @return String
*/
private static String constructOutputFileName(int i, int numSubFiles, int numPages) {
String outputFileName = baseFileName + String.format("%03d", (i*maxPageCount) + 1)+"-";
if (i == numSubFiles-1) {
outputFileName = outputFileName+String.format("%03d", numPages);
}
else {
outputFileName = outputFileName+String.format("%03d", (i*maxPageCount)+maxPageCount + 1);
}
outputFileName = outputFileName+".pdf";
return outputFileName;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment