Skip to content

Instantly share code, notes, and snippets.

@dyllanwli
Created July 23, 2019 07:39
Show Gist options
  • Star 5 You must be signed in to star a gist
  • Fork 4 You must be signed in to fork a gist
  • Save dyllanwli/58fe0db862c823ebfc2fde1f8678d14c to your computer and use it in GitHub Desktop.
Save dyllanwli/58fe0db862c823ebfc2fde1f8678d14c to your computer and use it in GitHub Desktop.
convert PDF to JPEG/multi-page tiff/image-pdf files
package org.doc2pdf.lambda;
import java.awt.image.BufferedImage;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import javax.imageio.ImageIO;
// import org.apache.pdfbox.io.IOUtils;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.ImageType;
import org.apache.pdfbox.rendering.PDFRenderer;
public class Pdf2JpegConverter {
private String tmpFilePath = "/tmp/tmp.pdf";
public ByteArrayOutputStream[] converter2jpeg(ByteArrayOutputStream baos, float DPI) throws Exception {
System.out.println("Needs to convert pdf to jpeg...");
// Write pdf to file
try (OutputStream outputStream = new FileOutputStream(this.tmpFilePath)) {
baos.writeTo(outputStream);
} catch (Exception e) {
e.printStackTrace();
throw new Exception(e);
}
// return format
ByteArrayOutputStream[] imageBaos;
// Read pdf file from tmp file
try (final PDDocument document = PDDocument.load(new File(this.tmpFilePath))) {
// read file from local
PDFRenderer pdfRenderer = new PDFRenderer(document);
// set up page and array length
int pageCount = document.getNumberOfPages();
imageBaos = new ByteArrayOutputStream[pageCount];
for (int page = 0; page < pageCount; page++) {
BufferedImage image = pdfRenderer.renderImageWithDPI(page, DPI, ImageType.RGB);
ByteArrayOutputStream stream = new ByteArrayOutputStream();
ImageIO.write(image, "jpeg", stream);
imageBaos[page] = stream;
}
document.close();
} catch (IOException e) {
e.printStackTrace();
throw new Exception(e);
}
return imageBaos;
}
}
package org.doc2pdf.lambda;
import java.io.File;
import java.awt.AlphaComposite;
import java.awt.Dimension;
import java.awt.Graphics2D;
import java.awt.Image;
import java.awt.RenderingHints;
import java.awt.image.BufferedImage;
import java.io.FileOutputStream;
import java.io.IOException;
// import org.apache.pdfbox.io.IOUtils;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
// import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.rendering.ImageType;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.apache.pdfbox.pdmodel.graphics.image.LosslessFactory;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import java.io.ByteArrayOutputStream;
import java.io.OutputStream;
public class Pdf2SamplePdfConverter {
private float defaultDPI = 300;
private String tmpFilePath = "/tmp/tmp.pdf";
public ByteArrayOutputStream converter2samplepdf(ByteArrayOutputStream baos, float DPI) throws Exception {
System.out.println("Needs to convert pdf to sample pdf...");
this.defaultDPI = DPI;
// Write pdf to file
try (OutputStream outputStream = new FileOutputStream(this.tmpFilePath)) {
baos.writeTo(outputStream);
} catch (Exception e) {
e.printStackTrace();
throw new Exception(e);
}
// new pdf to storage tiffpdf
PDDocument newDoc = new PDDocument();
ByteArrayOutputStream newBaos = new ByteArrayOutputStream();
// Read pdf file from tmp file
try (final PDDocument oldDoc = PDDocument.load(new File(this.tmpFilePath))) {
PDFRenderer pdfRenderer = new PDFRenderer(oldDoc);
int pageCount = oldDoc.getNumberOfPages();
for (int pageIdx = 0; pageIdx < pageCount; pageIdx++) {
// wrapping each page as image
this.appendEachPage(newDoc, oldDoc, pageIdx, pdfRenderer);
}
newDoc.save(newBaos);
newDoc.close();
oldDoc.close();
} catch (IOException e) {
e.printStackTrace();
throw new Exception(e);
}
return newBaos;
}
private static Dimension getScaledDimension(Dimension imgSize, Dimension boundary) {
int originalWidth = imgSize.width;
int originalHeight = imgSize.height;
int boundWidth = boundary.width;
int boundHeight = boundary.height;
int newWidth = originalWidth;
int newHeight = originalHeight;
// first check if we need to scale width
if (originalWidth > boundWidth) {
// scale width to fit
newWidth = boundWidth;
// scale height to maintain aspect ratio
newHeight = (newWidth * originalHeight) / originalWidth;
}
// then check if we need to scale even with the new height
if (newHeight > boundHeight) {
// scale height to fit instead
newHeight = boundHeight;
// scale width to maintain aspect ratio
newWidth = (newHeight * originalWidth) / originalHeight;
}
return new Dimension(newWidth, newHeight);
}
private static BufferedImage resize(BufferedImage img, int newW, int newH) {
Image tmp = img.getScaledInstance(newW, newH, Image.SCALE_AREA_AVERAGING);
BufferedImage dimg = new BufferedImage(newW, newH, BufferedImage.TYPE_INT_ARGB);
Graphics2D g2d = dimg.createGraphics();
g2d.setRenderingHint(RenderingHints.KEY_INTERPOLATION, RenderingHints.VALUE_INTERPOLATION_BILINEAR);
g2d.drawImage(tmp, 0, 0, null);
g2d.dispose();
return dimg;
}
private static BufferedImage resizeWithoutScaledInstance(BufferedImage img, int newW, int newH) {
// Image tmp = img.getScaledInstance(newW, newH, Image.SCALE_AREA_AVERAGING);
BufferedImage dimg = new BufferedImage(newW, newH, img.getType());
Graphics2D g2d = dimg.createGraphics();
g2d.setComposite(AlphaComposite.Src);
g2d.setRenderingHint(RenderingHints.KEY_INTERPOLATION, RenderingHints.VALUE_INTERPOLATION_BILINEAR);
g2d.setRenderingHint(RenderingHints.KEY_RENDERING, RenderingHints.VALUE_RENDER_QUALITY);
g2d.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON);
g2d.drawImage(img, 0, 0, newW, newH, null);
g2d.dispose();
return dimg;
}
private void appendEachPage(PDDocument newDoc, PDDocument oldDoc, int pageIdx, PDFRenderer pdfRenderer)
throws IOException {
PDPage page = oldDoc.getPage(pageIdx);
Float pw = page.getMediaBox().getWidth();
Float ph = page.getMediaBox().getHeight();
Dimension pageSize = new Dimension(pw.intValue(), ph.intValue());
// System.out.println("Original Page size: " + pageSize);
// get page and rendered it to image
BufferedImage image = pdfRenderer.renderImageWithDPI(pageIdx, this.defaultDPI, ImageType.RGB);
Dimension imgSize = new Dimension(image.getWidth(), image.getHeight());
// System.out.println("Rendered image size: " + imgSize);
Dimension scaled = getScaledDimension(imgSize, pageSize);
BufferedImage resizedImg = image;
if (imgSize.width == pageSize.width && imgSize.height == pageSize.height) {
System.out.print("No need to resize image. Continue.");
} else {
System.out.println("Resizing rendered image page to " + scaled);
resizedImg = resizeWithoutScaledInstance(image, scaled.width, scaled.height);
}
PDRectangle rec = new PDRectangle(scaled.width, scaled.height);
PDPage newPage = new PDPage(rec);
newDoc.addPage(newPage);
// Factory for creating a PDImageXObject containing a lossless compressed image.
PDImageXObject pdImage = LosslessFactory.createFromImage(newDoc, resizedImg);
PDPageContentStream contents = new PDPageContentStream(newDoc, newPage);
contents.drawImage(pdImage, 0, 0);
contents.close();
}
}
package org.doc2pdf.lambda;
import java.io.File;
import java.awt.image.BufferedImage;
import java.io.FileOutputStream;
import java.io.IOException;
// import org.apache.pdfbox.io.IOUtils;
import org.apache.pdfbox.pdmodel.PDDocument;
// import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.rendering.ImageType;
import org.apache.pdfbox.rendering.PDFRenderer;
import java.io.ByteArrayOutputStream;
import java.io.EOFException;
import java.io.OutputStream;
import javax.imageio.IIOImage;
import javax.imageio.ImageIO;
import javax.imageio.ImageTypeSpecifier;
import javax.imageio.ImageWriteParam;
import javax.imageio.ImageWriter;
import javax.imageio.metadata.IIOMetadata;
import javax.imageio.stream.ImageOutputStream;
public class Pdf2TiffConverter {
private String tmpFilePath = "/tmp/tmp.pdf";
public ByteArrayOutputStream converter2tiff(ByteArrayOutputStream baos, float DPI) throws Exception {
// got the solution from
System.out.println("Needs to convert pdf to tiff...");
// Write pdf to file
try (OutputStream outputStream = new FileOutputStream(this.tmpFilePath)) {
baos.writeTo(outputStream);
} catch (Exception e) {
e.printStackTrace();
throw new Exception(e);
}
// return format
ByteArrayOutputStream imageBaos = new ByteArrayOutputStream();
ImageOutputStream output = ImageIO.createImageOutputStream(imageBaos);
// Obtain a tiffer writer for continue writing tiff files into a multi-page file
ImageWriter writer = ImageIO.getImageWritersByFormatName("TIFF").next();
// Read pdf file from tmp file
try (final PDDocument document = PDDocument.load(new File(this.tmpFilePath))) {
PDFRenderer pdfRenderer = new PDFRenderer(document);
int pageCount = document.getNumberOfPages();
BufferedImage[] images = new BufferedImage[pageCount];
// ByteArrayOutputStream[] baosArray = new ByteArrayOutputStream[pageCount];
writer.setOutput(output);
ImageWriteParam params = writer.getDefaultWriteParam();
params.setCompressionMode(ImageWriteParam.MODE_EXPLICIT);
// Compression: None, PackBits, ZLib, Deflate, LZW, JPEG and CCITT
// variants allowed
// needs to decided which compression mode is the best
// cuz the tiff is too large
params.setCompressionType("Deflate");
// params.setCompressionType("CCITT T.6");
writer.prepareWriteSequence(null);
for (int page = 0; page < pageCount; page++) {
BufferedImage image = pdfRenderer.renderImageWithDPI(page, DPI, ImageType.RGB);
images[page] = image;
IIOMetadata metadata = writer.getDefaultImageMetadata(new ImageTypeSpecifier(image), params);
writer.writeToSequence(new IIOImage(image, null, metadata), params);
// ImageIO.write(image, "tiff", baosArray[page]);
}
System.out.println("imageBaos size: " + imageBaos.size());
// Finished write to output
writer.endWriteSequence();
document.close();
} catch (IOException e) {
e.printStackTrace();
throw new Exception(e);
} finally {
// avoid memory leaks
writer.dispose();
}
return this.getOutput(output);
}
private ByteArrayOutputStream getOutput(ImageOutputStream ios) {
ByteArrayOutputStream bos = new ByteArrayOutputStream();
// long counter = 0;
try {
// System.out.println("getStreamPosition()[BEFORE]=" + ios.getStreamPosition());
ios.seek(0);
// System.out.println("getStreamPosition()[AFTER]=" + ios.getStreamPosition());
} catch (IOException e1) {
e1.printStackTrace();
}
while (true) {
try {
bos.write(ios.readByte());
// counter++;
} catch (EOFException e) {
System.out.println("End of Image Stream");
break;
} catch (IOException e) {
System.out.println("Error processing the Image Stream");
break;
}
}
// System.out.println("Total bytes read=" + counter);
return bos;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment