Skip to content

Instantly share code, notes, and snippets.

@archagon
Last active November 20, 2018 00:14
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save archagon/5737603 to your computer and use it in GitHub Desktop.
Save archagon/5737603 to your computer and use it in GitHub Desktop.
A quick bit of code to generate PDFs from scans, using the iText open-source library.
import java.io.IOException;
import java.io.FileOutputStream;
import java.io.File;
import java.io.FilenameFilter;
import java.util.Map;
import java.util.HashMap;
import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.Image;
import com.itextpdf.text.Rectangle;
import com.itextpdf.text.pdf.PdfWriter;
import com.itextpdf.text.pdf.PdfTemplate;
import com.itextpdf.text.pdf.PdfContentByte;
// A note to programming pedantics: yes, I should have subclassed the "Normal" and "Zine"
// functionality, but I wanted to fit the whole thing onto one page for Gist compatibility.
// Hence the somewhat crappy logic.
// As with all my Gists, this was done quick & dirty for completing the task at hand.
public class PDFGenComplex
{
public enum ScanType
{
Normal,
Zine // zines have 2 pages per scan, with [last, first][first+1, last-1], [last-2, first+2][first+3, last-3], etc., ordering
}
public enum ImageType
{
PNG, // I might be wrong, but it looks like PDFs support JPG natively, but not PNG
JPG
}
public enum PageLocation
{
None, // default for full page scans
Left,
Right
}
// these get initialized on startup
protected String inputDirectory;
protected String baseInputFilename;
protected String outputFilename;
protected ScanType scanType;
protected ImageType imageType;
protected boolean sideBySide;
// these get initialized when CreatePDF is called; I won't bother to throw exceptions if accessed before then
protected Document document;
protected PdfWriter writer;
protected Map<String, Image> imageDictionary;
public PDFGenComplex(String inputDirectory, String baseInputFilename, String outputFilename, ScanType scanType, ImageType imageType, boolean sideBySide)
{
this.inputDirectory = inputDirectory;
this.baseInputFilename = baseInputFilename;
this.outputFilename = outputFilename;
this.scanType = scanType;
this.imageType = imageType;
this.sideBySide = sideBySide;
}
public void CreatePDF() throws IOException, DocumentException
{
this.imageDictionary = new HashMap<String, Image>();
// PDF setup
this.document = new Document();
this.writer = PdfWriter.getInstance(this.document, new FileOutputStream(this.outputFilename));
this.document.open();
System.out.println("Beginning file processing...");
File dir = new File(this.inputDirectory);
if (dir.isDirectory())
{
final String extension = "." + this.imageType.toString().toLowerCase();
// get a list of all the PNGs in the given directory
File files[] = dir.listFiles(new FilenameFilter()
{
@Override
public boolean accept(File dir, String name)
{
return name.toLowerCase().endsWith(extension);
}
});
int numPages = files.length * (this.scanType == ScanType.Zine ? 2 : 1);
System.out.println("Number of pages: " + numPages);
// for use with sideBySide
File tempPage = null;
for(int i = 1; i <= numPages; i++)
{
final String filename = GetBaseFilenameFromPageNumber(i, numPages) + extension;
// try to find the file with the page name provided
File pageFile[] = dir.listFiles(new FilenameFilter()
{
@Override
public boolean accept(File dir, String name)
{
return name.toLowerCase().equals(filename.toLowerCase());
}
});
// make sure the results have exactly one file
if (pageFile.length == 1)
{
System.out.println("Processing " + filename + "...");
}
else
{
System.err.println("Count for " + filename + " is incorrect: " + pageFile.length);
continue;
}
if (this.sideBySide)
{
if (i == 1)
{
// front cover
AddPageToDocument(pageFile[0].getAbsolutePath(), PageLocation.Right);
}
else if (i == numPages)
{
// back cover
AddPageToDocument(pageFile[0].getAbsolutePath(), PageLocation.Left);
}
else
{
// two pages at a time for the rest
if (tempPage == null)
{
tempPage = pageFile[0];
}
else
{
AddPagesToDocument(tempPage.getAbsolutePath(), pageFile[0].getAbsolutePath(), PageLocation.Left, PageLocation.Right);
tempPage = null;
}
}
}
else
{
AddPageToDocument(pageFile[0].getAbsolutePath(), (i % 2 == 0 ? PageLocation.Left : PageLocation.Right));
}
}
}
System.out.println("File processing ended!");
this.document.close();
}
// pageLocation is ignored if the scanType is not Zine.
protected void AddPageToDocument(String imageFilename, PageLocation pageLocation) throws IOException, DocumentException
{
Image img = GetImageForFilename(imageFilename);
SetupNewPage(img, false);
AddImageToCurrentPage(img, pageLocation, PageLocation.None, false);
}
protected void AddPagesToDocument(String imageFilename1, String imageFilename2, PageLocation pageLocation1, PageLocation pageLocation2) throws IOException, DocumentException
{
Image img1 = GetImageForFilename(imageFilename1);
Image img2 = GetImageForFilename(imageFilename2);
SetupNewPage(img1, true); //assuming img1 and img2 are the same size
AddImageToCurrentPage(img1, pageLocation1, PageLocation.Left, true);
AddImageToCurrentPage(img2, pageLocation2, PageLocation.Right, true);
}
protected void SetupNewPage(Image image, boolean sideBySide)
{
float width;
if (sideBySide)
{
width = GetSinglePageWidth(image) * 2;
}
else
{
width = GetSinglePageWidth(image);
}
this.document.setPageSize(new Rectangle(0, 0, width, image.getHeight()));
this.document.newPage();
}
// inputPageLocation is ignored if scanType is not Zine. outputPageLocation is ignored if sideBySide is false.
protected void AddImageToCurrentPage(Image image, PageLocation inputPageLocation, PageLocation outputPageLocation, boolean sideBySide) throws DocumentException
{
float imgX = 0;
// even pages are to the left, odd pages are to the right
if (this.scanType == ScanType.Zine && inputPageLocation == PageLocation.Right)
{
imgX = -GetSinglePageWidth(image);
}
image.setAbsolutePosition(imgX, 0);
float outputX = 0;
if (sideBySide && outputPageLocation == PageLocation.Right)
{
outputX = GetSinglePageWidth(image);
}
// this stuff allows you to "crop" images (which really means that a certain part of the image, which is included in full, is rendered)
PdfContentByte contentByte = this.writer.getDirectContent();
PdfTemplate template = contentByte.createTemplate(GetSinglePageWidth(image), image.getHeight());
template.addImage(image);
contentByte.addTemplate(template, outputX, 0);
}
protected float GetSinglePageWidth(Image image)
{
if (this.scanType == ScanType.Zine)
{
return image.getWidth()/2.0f;
}
else
{
return image.getWidth();
}
}
// Caching the images ensures that each scanned double-page of a zine gets added to the PDF only once, halving the file size.
protected Image GetImageForFilename(String filename) throws IOException, DocumentException
{
if (!this.imageDictionary.containsKey(filename))
{
Image image = Image.getInstance(filename);
this.imageDictionary.put(filename, image);
}
return this.imageDictionary.get(filename);
}
// Pages are 1-indexed. Assumes default Apple scan naming format: BaseFilename, BaseFilename 1, BaseFilename 2, etc.
protected String GetBaseFilenameFromPageNumber(int pageNumber, int maxPages)
{
int actualPage;
if (this.scanType == ScanType.Zine)
{
if (pageNumber <= maxPages/2)
{
actualPage = pageNumber - 1;
}
else
{
int negativePageNumber = maxPages/2 - pageNumber;
actualPage = maxPages/2 + negativePageNumber;
}
}
else
{
actualPage = pageNumber - 1;
}
if (actualPage == 0)
{
return this.baseInputFilename;
}
else
{
return this.baseInputFilename + " " + actualPage;
}
}
public static void main(String[] args) throws IOException, DocumentException
{
PDFGen pdfGen = new PDFGen(
"/Directory/To/Scanned/Pages",
"ScannedPage",
"/Directory/To/Scanned/Pages/Output.pdf",
ScanType.Zine,
ImageType.PNG,
true);
pdfGen.CreatePDF();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment