JoelGeraci-Datalogics/SplitDocumentBasedOnFileSize.java

## SplitDocumentBasedOnFileSize.java
/*
 * Copyright Datalogics, Inc. 2015
 */

package pdfjt.cookbook.document;

import com.adobe.internal.io.ByteReader;
import com.adobe.internal.io.ByteWriter;
import com.adobe.internal.io.InputStreamByteReader;
import com.adobe.pdfjt.pdf.document.PDFDocument;
import com.adobe.pdfjt.pdf.document.PDFOpenOptions;
import com.adobe.pdfjt.pdf.document.PDFSaveFullOptions;
import com.adobe.pdfjt.pdf.document.PDFSaveOptions;
import com.adobe.pdfjt.pdf.page.PDFPage;
import com.adobe.pdfjt.services.manipulations.PMMOptions;
import com.adobe.pdfjt.services.manipulations.PMMService;

import java.io.FileInputStream;
import java.io.InputStream;
import java.net.URL;
import java.net.URLConnection;

import pdfjt.util.SampleFileServices;

/**
 * Splits a document based on number of pages.
 *
 * What you need to know first: In order to accurately calculate the file size,
 * the PDF file must be saved to disk between each page append.
 */
public class SplitDocumentBasedOnFileSize {

    private static final String inputPDFURL = "http://dev.datalogics.com/cookbook/document/AcrobatDC_PDFCreationSettings.pdf";
    private static final String outputDir = "cookbook/Document/output/split/";
    private static final String baseFileName = "Extracted_Pages";
    private static final long  MB = 1024L * 1024L;
    private static double maxFileSizeinMB = .5;

    public static void main(String[] args) throws Exception {
        /*
         * Read in PDF input file
         */
        URLConnection connection = new URL(inputPDFURL).openConnection();
        connection.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11");
        connection.connect();
        InputStream fis = connection.getInputStream();
        ByteReader byteReader = new InputStreamByteReader(fis);
        PDFDocument pdfDocument = PDFDocument.newInstance(byteReader, PDFOpenOptions.newInstance());
        // Set up the Page Manager Service
        PMMService sourcePMMService = new PMMService(pdfDocument);
        ByteWriter outputFile = null;
        // calculate the maximum file size
        long maxFileSizeInBytes = (long) (maxFileSizeinMB*MB);
        // Get the total number of pages
        int numPages = pdfDocument.requirePages().getNumPages();
        // Sequentially extract ranges of pages from beginning to end.
        PDFSaveOptions pdfSaveOptions = PDFSaveFullOptions.newInstance();
        pdfSaveOptions.setForceCompress(true);
        boolean startNewDocument = true;
        /*
         * The new documents will be sequentially numbered
         */
        int numExtractedDocuments = 0;
        int i;
        PDFPage startPage = pdfDocument.requirePages().getPage(0);
        /*
         * Iterate through the pages creating a new document every time the
         * maximum file size is reached.
         */
        for (i = 0; i < numPages; i++) {
            if (startNewDocument) {
                startNewDocument = false;
                numExtractedDocuments++;
                String extractedFileName = baseFileName + "_"+String.format("%03d", numExtractedDocuments)+".pdf";
                outputFile = SampleFileServices.getRAFByteWriter(outputDir + extractedFileName);
                /*
                 * Create a new, one page, document
                 */
                PDFDocument outputDocument = sourcePMMService.extractPages(startPage, 1, PMMOptions.newInstanceAll(),PDFOpenOptions.newInstance());
                outputDocument.save(outputFile, pdfSaveOptions);
                /*
                 * Add pages, one at a time until either the file size is less
                 * than the maximum or we run out of pages.
                 */
                while (i <= numPages && outputDocument.getFileSize() <= maxFileSizeInBytes) {
                    FileInputStream fileInputStream = new FileInputStream(outputDir + extractedFileName);
                    byteReader = new InputStreamByteReader(fileInputStream);
                    outputDocument = PDFDocument.newInstance(byteReader,PDFOpenOptions.newInstance());
                    PMMService targetPMMService = new PMMService(outputDocument);
                    targetPMMService.insertPages(
                            outputDocument.requirePages().getLastPage(),
                            pdfDocument.requirePages().getPage(i+1),
                            1,
                            null, null);
                    i++;
                    startPage = pdfDocument.requirePages().getPage(i+1);
                    outputDocument.save(outputFile, pdfSaveOptions);
                }
                startNewDocument = true;
            }
        }
        System.out.println("Done!");
    }
}
	/*
	* Copyright Datalogics, Inc. 2015
	*/

	package pdfjt.cookbook.document;

	import com.adobe.internal.io.ByteReader;
	import com.adobe.internal.io.ByteWriter;
	import com.adobe.internal.io.InputStreamByteReader;
	import com.adobe.pdfjt.pdf.document.PDFDocument;
	import com.adobe.pdfjt.pdf.document.PDFOpenOptions;
	import com.adobe.pdfjt.pdf.document.PDFSaveFullOptions;
	import com.adobe.pdfjt.pdf.document.PDFSaveOptions;
	import com.adobe.pdfjt.pdf.page.PDFPage;
	import com.adobe.pdfjt.services.manipulations.PMMOptions;
	import com.adobe.pdfjt.services.manipulations.PMMService;

	import java.io.FileInputStream;
	import java.io.InputStream;
	import java.net.URL;
	import java.net.URLConnection;

	import pdfjt.util.SampleFileServices;

	/**
	* Splits a document based on number of pages.
	*
	* What you need to know first: In order to accurately calculate the file size,
	* the PDF file must be saved to disk between each page append.
	*/
	public class SplitDocumentBasedOnFileSize {

	private static final String inputPDFURL = "http://dev.datalogics.com/cookbook/document/AcrobatDC_PDFCreationSettings.pdf";
	private static final String outputDir = "cookbook/Document/output/split/";
	private static final String baseFileName = "Extracted_Pages";
	private static final long MB = 1024L * 1024L;
	private static double maxFileSizeinMB = .5;

	public static void main(String[] args) throws Exception {
	/*
	* Read in PDF input file
	*/
	URLConnection connection = new URL(inputPDFURL).openConnection();
	connection.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11");
	connection.connect();
	InputStream fis = connection.getInputStream();
	ByteReader byteReader = new InputStreamByteReader(fis);
	PDFDocument pdfDocument = PDFDocument.newInstance(byteReader, PDFOpenOptions.newInstance());
	// Set up the Page Manager Service
	PMMService sourcePMMService = new PMMService(pdfDocument);
	ByteWriter outputFile = null;
	// calculate the maximum file size
	long maxFileSizeInBytes = (long) (maxFileSizeinMB*MB);
	// Get the total number of pages
	int numPages = pdfDocument.requirePages().getNumPages();
	// Sequentially extract ranges of pages from beginning to end.
	PDFSaveOptions pdfSaveOptions = PDFSaveFullOptions.newInstance();
	pdfSaveOptions.setForceCompress(true);
	boolean startNewDocument = true;
	/*
	* The new documents will be sequentially numbered
	*/
	int numExtractedDocuments = 0;
	int i;
	PDFPage startPage = pdfDocument.requirePages().getPage(0);
	/*
	* Iterate through the pages creating a new document every time the
	* maximum file size is reached.
	*/
	for (i = 0; i < numPages; i++) {
	if (startNewDocument) {
	startNewDocument = false;
	numExtractedDocuments++;
	String extractedFileName = baseFileName + "_"+String.format("%03d", numExtractedDocuments)+".pdf";
	outputFile = SampleFileServices.getRAFByteWriter(outputDir + extractedFileName);
	/*
	* Create a new, one page, document
	*/
	PDFDocument outputDocument = sourcePMMService.extractPages(startPage, 1, PMMOptions.newInstanceAll(),PDFOpenOptions.newInstance());
	outputDocument.save(outputFile, pdfSaveOptions);
	/*
	* Add pages, one at a time until either the file size is less
	* than the maximum or we run out of pages.
	*/
	while (i <= numPages && outputDocument.getFileSize() <= maxFileSizeInBytes) {
	FileInputStream fileInputStream = new FileInputStream(outputDir + extractedFileName);
	byteReader = new InputStreamByteReader(fileInputStream);
	outputDocument = PDFDocument.newInstance(byteReader,PDFOpenOptions.newInstance());
	PMMService targetPMMService = new PMMService(outputDocument);
	targetPMMService.insertPages(
	outputDocument.requirePages().getLastPage(),
	pdfDocument.requirePages().getPage(i+1),
	1,
	null, null);
	i++;
	startPage = pdfDocument.requirePages().getPage(i+1);
	outputDocument.save(outputFile, pdfSaveOptions);
	}
	startNewDocument = true;
	}
	}
	System.out.println("Done!");
	}
	}