Last active
June 10, 2016 21:27
-
-
Save JoelGeraci-Datalogics/fb53bb5509e1f7eaeaee to your computer and use it in GitHub Desktop.
"Splits" a document by creating a new set of PDF files by extracting pages from the source file based on a maximum number of pages in the new files. The original file is unmodified.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Copyright Datalogics, Inc. 2015 | |
*/ | |
package pdfjt.cookbook.document; | |
import com.adobe.internal.io.ByteReader; | |
import com.adobe.internal.io.ByteWriter; | |
import com.adobe.internal.io.InputStreamByteReader; | |
import com.adobe.pdfjt.pdf.document.PDFDocument; | |
import com.adobe.pdfjt.pdf.document.PDFOpenOptions; | |
import com.adobe.pdfjt.pdf.document.PDFSaveFullOptions; | |
import com.adobe.pdfjt.pdf.page.PDFPage; | |
import com.adobe.pdfjt.services.manipulations.PMMOptions; | |
import com.adobe.pdfjt.services.manipulations.PMMService; | |
import java.io.InputStream; | |
import java.net.URL; | |
import java.net.URLConnection; | |
import pdfjt.util.SampleFileServices; | |
/** | |
* "Splits" a document by creating a new set of PDF files by extracting pages | |
* from the source file based on a maximum number of pages in the new files. | |
* The original file is unmodified. | |
* | |
* What you need to know first: | |
* Not much really. The PMMService class was designed to make extracting pages | |
* super-simple. | |
* | |
*/ | |
public class SplitDocumentBasedOnNumberOfPages { | |
private static final String inputPDFURL = "http://dev.datalogics.com/cookbook/document/AcrobatDC_PDFCreationSettings.pdf"; | |
private static final String outputDir = "cookbook/Document/output/split/"; | |
private static final String baseFileName = "Extracted_Pages_"; | |
private static final int maxPageCount = 25; | |
public static void main(String[] args) throws Exception { | |
/* | |
* Read in PDF input file | |
*/ | |
URLConnection connection = new URL(inputPDFURL).openConnection(); | |
connection.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11"); | |
connection.connect(); | |
InputStream fis = connection.getInputStream(); | |
ByteReader byteReader = new InputStreamByteReader(fis); | |
PDFDocument pdfDocument = PDFDocument.newInstance(byteReader, PDFOpenOptions.newInstance()); | |
// Set up the Page Manager Service | |
PMMService pmmService = new PMMService(pdfDocument); | |
PMMOptions pmmOptions = PMMOptions.newInstanceAll(); | |
PDFOpenOptions pdfOpenOptions = PDFOpenOptions.newInstance(); | |
// Get the total number of pages | |
int numPages = pdfDocument.requirePages().getNumPages(); | |
int numSubFiles = (int) Math.ceil(numPages/(double) maxPageCount); | |
for (int i = 0; i < numSubFiles; i++) { | |
PDFPage startPage = pdfDocument.requirePages().getPage(i*maxPageCount); | |
PDFDocument outputDocument = pmmService.extractPages(startPage, maxPageCount, pmmOptions, pdfOpenOptions); | |
// Construct the new filename | |
String outputFileName = constructOutputFileName(i, numSubFiles, numPages); | |
// Save it. | |
ByteWriter outputFile = SampleFileServices.getRAFByteWriter(outputDir + outputFileName); | |
outputDocument.save(outputFile, PDFSaveFullOptions.newInstance()); | |
System.out.println("Created: " + outputFileName); | |
} | |
System.out.println("Done!"); | |
} | |
/** | |
* Constructs a filename for the output PDF file. | |
* | |
* @param i | |
* The loop iteration. | |
* @param numSubFiles | |
* The number of iterations iteration. | |
* @param numPages | |
* The total number of pages in the file. | |
* @return String | |
*/ | |
private static String constructOutputFileName(int i, int numSubFiles, int numPages) { | |
String outputFileName = baseFileName + String.format("%03d", (i*maxPageCount) + 1)+"-"; | |
if (i == numSubFiles-1) { | |
outputFileName = outputFileName+String.format("%03d", numPages); | |
} | |
else { | |
outputFileName = outputFileName+String.format("%03d", (i*maxPageCount)+maxPageCount + 1); | |
} | |
outputFileName = outputFileName+".pdf"; | |
return outputFileName; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment