Skip to content

Instantly share code, notes, and snippets.

@JoelGeraci-Datalogics
Created June 2, 2016 17:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save JoelGeraci-Datalogics/d7fc3faa1112345201b6ed1e62aeb049 to your computer and use it in GitHub Desktop.
Save JoelGeraci-Datalogics/d7fc3faa1112345201b6ed1e62aeb049 to your computer and use it in GitHub Desktop.
This sample locates words and adds Polygon annotations over the word quads.
/*
* Copyright Datalogics, Inc. 2015
*/
package pdfjt.cookbook.document;
import com.adobe.fontengine.font.Font;
import com.adobe.internal.io.ByteReader;
import com.adobe.internal.io.InputStreamByteReader;
import com.adobe.pdfjt.core.types.ASName;
import com.adobe.pdfjt.core.types.ASQuad;
import com.adobe.pdfjt.pdf.document.PDFDocument;
import com.adobe.pdfjt.pdf.document.PDFOpenOptions;
import com.adobe.pdfjt.pdf.graphics.font.PDFFont;
import com.adobe.pdfjt.pdf.interactive.annotation.PDFAnnotationEnum;
import com.adobe.pdfjt.pdf.interactive.annotation.PDFAnnotationPolygon;
import com.adobe.pdfjt.pdf.interactive.annotation.PDFBorder;
import com.adobe.pdfjt.pdf.page.PDFPage;
import com.adobe.pdfjt.services.ap.AppearanceService;
import com.adobe.pdfjt.services.ap.spi.APContext;
import com.adobe.pdfjt.services.ap.spi.APResources;
import com.adobe.pdfjt.services.textextraction.TextExtractor;
import com.adobe.pdfjt.services.textextraction.Word;
import com.adobe.pdfjt.services.textextraction.WordsIterator;
import com.datalogics.pdf.document.DocumentHelper;
import com.datalogics.pdf.document.FontSetLoader;
import java.io.InputStream;
import java.net.URL;
import java.net.URLConnection;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.List;
import org.apache.commons.lang3.ArrayUtils;
/**
* This sample locates words and adds Polygon annotations over the word quads.
*/
public class Quadrilaterals {
private static final String inputPDFURL = "http://dev.datalogics.com/cookbook/document/Quadrilaterals.pdf";
private static final String outputDir = "cookbook/Document/output/";
private static final double red[] = { 1.0, 0, 0 }; //RGB Red
static public void main(String[] args) throws Exception {
// First read in the PDF file
URLConnection connection = new URL(inputPDFURL).openConnection();
connection.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11");
connection.connect();
InputStream fis = connection.getInputStream();
ByteReader byteReader = new InputStreamByteReader(fis);
PDFDocument pdfDocument = PDFDocument.newInstance(byteReader, PDFOpenOptions.newInstance());
// Then get the first (and only) page in the file. We'll need this object in order to add annotations to it.
PDFPage pdfPageOne = pdfDocument.requirePages().getPage(0);
TextExtractor textExtractor = TextExtractor.newInstance(pdfDocument, FontSetLoader.newInstance().getFontSet());
WordsIterator wordsIterator = textExtractor.getWordsIterator();
System.out.println("Words Found:");
while (wordsIterator.hasNext()) {
/*
* A Word object contains Unicode string representing the word
* along with a list of bounding boxes as needed to enclose all
* of the characters. If the characters of the word are
* colinear, then there might only be one bounding box covering
* the entire word. If the characters follow a curved path, then
* a list of bounding boxes, potentially one for each character,
* will be returned.
*/
Word word = wordsIterator.next();
System.out.println(word.toString());
List<ASQuad> quads = word.getBoundingQuads();
/*
* Now we iterate over each word quad and create a Polygon annotation using the same coordinates.
*/
for (ASQuad quad : quads) {
double[] verticies = quad.getValues();
ArrayUtils.add(verticies, quad.p1().x());
ArrayUtils.add(verticies, quad.p1().y());
PDFAnnotationPolygon pdfAnnotationPolygon = PDFAnnotationPolygon.newInstance(pdfDocument);
pdfAnnotationPolygon.setDictionaryArrayValue(ASName.k_Vertices, verticies);
pdfAnnotationPolygon.setColor(red);
PDFBorder pdfBorder = PDFBorder.newInstance(pdfDocument);
pdfBorder.setWidth(0);
pdfAnnotationPolygon.setBorder(pdfBorder);
pdfPageOne.addAnnotation(pdfAnnotationPolygon);
}
}
// Now create the appearances of the Polygon annotations
APResources apResources = new APResources(pdfDocument.getCosDocument().getOptions().getFontSet(),
pdfDocument.getCosDocument().getOptions().getDocLocale(),
new HashMap<Font, PDFFont>());
APContext apContext = new APContext(apResources, true, null);
apContext.setAnnotationsToBeProcessed(EnumSet.of(PDFAnnotationEnum.Polygon));
AppearanceService.generateAppearances(pdfDocument, apContext, null);
// Save and close
DocumentHelper.saveFullAndClose(pdfDocument, outputDir+"Quadrilaterals.pdf");
// Save the file.
System.out.println("Done!");
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment