Last active
June 10, 2016 21:29
-
-
Save JoelGeraci-Datalogics/6800e3ab5e4e308ea4e3 to your computer and use it in GitHub Desktop.
Splits a document based on number of pages
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Copyright Datalogics, Inc. 2015 | |
*/ | |
package pdfjt.cookbook.document; | |
import com.adobe.internal.io.ByteReader; | |
import com.adobe.internal.io.ByteWriter; | |
import com.adobe.internal.io.InputStreamByteReader; | |
import com.adobe.pdfjt.pdf.document.PDFDocument; | |
import com.adobe.pdfjt.pdf.document.PDFOpenOptions; | |
import com.adobe.pdfjt.pdf.document.PDFSaveFullOptions; | |
import com.adobe.pdfjt.pdf.document.PDFSaveOptions; | |
import com.adobe.pdfjt.pdf.page.PDFPage; | |
import com.adobe.pdfjt.services.manipulations.PMMOptions; | |
import com.adobe.pdfjt.services.manipulations.PMMService; | |
import java.io.FileInputStream; | |
import java.io.InputStream; | |
import java.net.URL; | |
import java.net.URLConnection; | |
import pdfjt.util.SampleFileServices; | |
/** | |
* Splits a document based on number of pages. | |
* | |
* What you need to know first: In order to accurately calculate the file size, | |
* the PDF file must be saved to disk between each page append. | |
*/ | |
public class SplitDocumentBasedOnFileSize { | |
private static final String inputPDFURL = "http://dev.datalogics.com/cookbook/document/AcrobatDC_PDFCreationSettings.pdf"; | |
private static final String outputDir = "cookbook/Document/output/split/"; | |
private static final String baseFileName = "Extracted_Pages"; | |
private static final long MB = 1024L * 1024L; | |
private static double maxFileSizeinMB = .5; | |
public static void main(String[] args) throws Exception { | |
/* | |
* Read in PDF input file | |
*/ | |
URLConnection connection = new URL(inputPDFURL).openConnection(); | |
connection.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11"); | |
connection.connect(); | |
InputStream fis = connection.getInputStream(); | |
ByteReader byteReader = new InputStreamByteReader(fis); | |
PDFDocument pdfDocument = PDFDocument.newInstance(byteReader, PDFOpenOptions.newInstance()); | |
// Set up the Page Manager Service | |
PMMService sourcePMMService = new PMMService(pdfDocument); | |
ByteWriter outputFile = null; | |
// calculate the maximum file size | |
long maxFileSizeInBytes = (long) (maxFileSizeinMB*MB); | |
// Get the total number of pages | |
int numPages = pdfDocument.requirePages().getNumPages(); | |
// Sequentially extract ranges of pages from beginning to end. | |
PDFSaveOptions pdfSaveOptions = PDFSaveFullOptions.newInstance(); | |
pdfSaveOptions.setForceCompress(true); | |
boolean startNewDocument = true; | |
/* | |
* The new documents will be sequentially numbered | |
*/ | |
int numExtractedDocuments = 0; | |
int i; | |
PDFPage startPage = pdfDocument.requirePages().getPage(0); | |
/* | |
* Iterate through the pages creating a new document every time the | |
* maximum file size is reached. | |
*/ | |
for (i = 0; i < numPages; i++) { | |
if (startNewDocument) { | |
startNewDocument = false; | |
numExtractedDocuments++; | |
String extractedFileName = baseFileName + "_"+String.format("%03d", numExtractedDocuments)+".pdf"; | |
outputFile = SampleFileServices.getRAFByteWriter(outputDir + extractedFileName); | |
/* | |
* Create a new, one page, document | |
*/ | |
PDFDocument outputDocument = sourcePMMService.extractPages(startPage, 1, PMMOptions.newInstanceAll(),PDFOpenOptions.newInstance()); | |
outputDocument.save(outputFile, pdfSaveOptions); | |
/* | |
* Add pages, one at a time until either the file size is less | |
* than the maximum or we run out of pages. | |
*/ | |
while (i <= numPages && outputDocument.getFileSize() <= maxFileSizeInBytes) { | |
FileInputStream fileInputStream = new FileInputStream(outputDir + extractedFileName); | |
byteReader = new InputStreamByteReader(fileInputStream); | |
outputDocument = PDFDocument.newInstance(byteReader,PDFOpenOptions.newInstance()); | |
PMMService targetPMMService = new PMMService(outputDocument); | |
targetPMMService.insertPages( | |
outputDocument.requirePages().getLastPage(), | |
pdfDocument.requirePages().getPage(i+1), | |
1, | |
null, null); | |
i++; | |
startPage = pdfDocument.requirePages().getPage(i+1); | |
outputDocument.save(outputFile, pdfSaveOptions); | |
} | |
startNewDocument = true; | |
} | |
} | |
System.out.println("Done!"); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment