Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save JoelGeraci-Datalogics/6800e3ab5e4e308ea4e3 to your computer and use it in GitHub Desktop.
Save JoelGeraci-Datalogics/6800e3ab5e4e308ea4e3 to your computer and use it in GitHub Desktop.
Splits a document based on number of pages
/*
* Copyright Datalogics, Inc. 2015
*/
package pdfjt.cookbook.document;
import com.adobe.internal.io.ByteReader;
import com.adobe.internal.io.ByteWriter;
import com.adobe.internal.io.InputStreamByteReader;
import com.adobe.pdfjt.pdf.document.PDFDocument;
import com.adobe.pdfjt.pdf.document.PDFOpenOptions;
import com.adobe.pdfjt.pdf.document.PDFSaveFullOptions;
import com.adobe.pdfjt.pdf.document.PDFSaveOptions;
import com.adobe.pdfjt.pdf.page.PDFPage;
import com.adobe.pdfjt.services.manipulations.PMMOptions;
import com.adobe.pdfjt.services.manipulations.PMMService;
import java.io.FileInputStream;
import java.io.InputStream;
import java.net.URL;
import java.net.URLConnection;
import pdfjt.util.SampleFileServices;
/**
* Splits a document based on number of pages.
*
* What you need to know first: In order to accurately calculate the file size,
* the PDF file must be saved to disk between each page append.
*/
public class SplitDocumentBasedOnFileSize {
private static final String inputPDFURL = "http://dev.datalogics.com/cookbook/document/AcrobatDC_PDFCreationSettings.pdf";
private static final String outputDir = "cookbook/Document/output/split/";
private static final String baseFileName = "Extracted_Pages";
private static final long MB = 1024L * 1024L;
private static double maxFileSizeinMB = .5;
public static void main(String[] args) throws Exception {
/*
* Read in PDF input file
*/
URLConnection connection = new URL(inputPDFURL).openConnection();
connection.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11");
connection.connect();
InputStream fis = connection.getInputStream();
ByteReader byteReader = new InputStreamByteReader(fis);
PDFDocument pdfDocument = PDFDocument.newInstance(byteReader, PDFOpenOptions.newInstance());
// Set up the Page Manager Service
PMMService sourcePMMService = new PMMService(pdfDocument);
ByteWriter outputFile = null;
// calculate the maximum file size
long maxFileSizeInBytes = (long) (maxFileSizeinMB*MB);
// Get the total number of pages
int numPages = pdfDocument.requirePages().getNumPages();
// Sequentially extract ranges of pages from beginning to end.
PDFSaveOptions pdfSaveOptions = PDFSaveFullOptions.newInstance();
pdfSaveOptions.setForceCompress(true);
boolean startNewDocument = true;
/*
* The new documents will be sequentially numbered
*/
int numExtractedDocuments = 0;
int i;
PDFPage startPage = pdfDocument.requirePages().getPage(0);
/*
* Iterate through the pages creating a new document every time the
* maximum file size is reached.
*/
for (i = 0; i < numPages; i++) {
if (startNewDocument) {
startNewDocument = false;
numExtractedDocuments++;
String extractedFileName = baseFileName + "_"+String.format("%03d", numExtractedDocuments)+".pdf";
outputFile = SampleFileServices.getRAFByteWriter(outputDir + extractedFileName);
/*
* Create a new, one page, document
*/
PDFDocument outputDocument = sourcePMMService.extractPages(startPage, 1, PMMOptions.newInstanceAll(),PDFOpenOptions.newInstance());
outputDocument.save(outputFile, pdfSaveOptions);
/*
* Add pages, one at a time until either the file size is less
* than the maximum or we run out of pages.
*/
while (i <= numPages && outputDocument.getFileSize() <= maxFileSizeInBytes) {
FileInputStream fileInputStream = new FileInputStream(outputDir + extractedFileName);
byteReader = new InputStreamByteReader(fileInputStream);
outputDocument = PDFDocument.newInstance(byteReader,PDFOpenOptions.newInstance());
PMMService targetPMMService = new PMMService(outputDocument);
targetPMMService.insertPages(
outputDocument.requirePages().getLastPage(),
pdfDocument.requirePages().getPage(i+1),
1,
null, null);
i++;
startPage = pdfDocument.requirePages().getPage(i+1);
outputDocument.save(outputFile, pdfSaveOptions);
}
startNewDocument = true;
}
}
System.out.println("Done!");
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment