Skip to content

Instantly share code, notes, and snippets.

@JoelGeraci-Datalogics
Created August 18, 2015 23:39
Show Gist options
  • Save JoelGeraci-Datalogics/8417b555ae604a92a1e5 to your computer and use it in GitHub Desktop.
Save JoelGeraci-Datalogics/8417b555ae604a92a1e5 to your computer and use it in GitHub Desktop.
This sample will find all the words on the first page of the document and add links over them. The links will use the quads of the word rather than the rectangular bounding box. This allows for the creation of links that are not parallel to the edges of the page.
/*
* Copyright Datalogics, Inc. 2015
*/
package pdfjt.cookbook.document;
import com.adobe.internal.io.ByteReader;
import com.adobe.internal.io.ByteWriter;
import com.adobe.internal.io.InputStreamByteReader;
import com.adobe.pdfjt.core.fontset.PDFFontSet;
import com.adobe.pdfjt.core.types.ASQuad;
import com.adobe.pdfjt.pdf.document.PDFDocument;
import com.adobe.pdfjt.pdf.document.PDFOpenOptions;
import com.adobe.pdfjt.pdf.document.PDFSaveFullOptions;
import com.adobe.pdfjt.pdf.document.PDFText;
import com.adobe.pdfjt.pdf.interactive.action.PDFActionJavaScript;
import com.adobe.pdfjt.pdf.interactive.annotation.PDFAnnotationLink;
import com.adobe.pdfjt.pdf.interactive.annotation.PDFBorder;
import com.adobe.pdfjt.pdf.page.PDFPage;
import com.adobe.pdfjt.services.fontresources.PDFFontSetUtil;
import com.adobe.pdfjt.services.textextraction.TextExtractor;
import com.adobe.pdfjt.services.textextraction.Word;
import com.adobe.pdfjt.services.textextraction.WordsIterator;
import java.io.InputStream;
import java.net.URL;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import pdfjt.util.SampleFileServices;
import pdfjt.util.SampleFontLoaderUtil;
/**
* This sample will find all the words on the first page of the document and add
* links over them. The links will use the quads of the word rather than the
* rectangular bounding box. This allows for the creation of links that are not
* parallel to the edges of the page.
*/
public class SetLinkHitArea {
private static final String inputPDF = "http://dev.datalogics.com/cookbook/document/LinksInput.pdf";
private static final String outputDir = "cookbook/Document/output/";
static public void main(String[] args) throws Exception {
try {
/*
* Read in the input file and get the first page
*/
InputStream fis = new URL(inputPDF).openStream();
ByteReader byteReader = new InputStreamByteReader(fis);
PDFDocument pdfDocument = PDFDocument.newInstance(byteReader, PDFOpenOptions.newInstance());
PDFPage pdfPage = pdfDocument.requirePages().getPage(0);
/*
* Create a fontset and set up the text extractor.
*/
PDFFontSet sysFontSet = SampleFontLoaderUtil.loadSampleFontSet();
PDFFontSet fontset = PDFFontSetUtil.buildWorkingFontSet(pdfDocument, sysFontSet, pdfDocument.getDocumentLocale(), null);
TextExtractor textExtractor = TextExtractor.newInstance(pdfDocument, fontset);
WordsIterator wordsIterator = textExtractor.getROTEWordsIterator();
/*
* Iterate over the words and use the geometry of their
* quadrilaterals to create a link that precisely fits over the
* word. Though most only have one, a word can have multiple
* quadrilaterals. The QuadPoints key is an array of 8 × n numbers
* specifying the coordinates of n quadrilaterals in default user
* space that comprise the region in which the link should be
* activated. The coordinates for each quadrilateral are given in
* the order x1 y1 x2 y2 x3 y3 x4 y4 specifying the four vertices of
* the quadrilateral in counterclockwise order.
*/
while (wordsIterator.hasNext()) {
Word word = wordsIterator.next();
if (word.toString().matches(" ") == false) {
System.out.println("Found: " + word.toString());
List<ASQuad> quads = word.getBoundingQuads();
if (quads != null) {
Iterator<ASQuad> quadIter = quads.iterator();
while (quadIter.hasNext()) {
ASQuad wordQuad = quadIter.next();
/*
* QuadPoints are ignored if any coordinate in the
* array lies outside the region specified by Rect
* so we need to find the lower left and upper right
* coordinates of the Quad and then expand it by one
* point in each direction just to be safe and then
* use those values as our Rect.
*/
double[] x = { wordQuad.p1().x(), wordQuad.p2().x(), wordQuad.p3().x(), wordQuad.p4().x() };
double[] y = { wordQuad.p1().y(), wordQuad.p2().y(), wordQuad.p3().y(), wordQuad.p4().y() };
Arrays.sort(x);
Arrays.sort(y);
double llx = x[0] - 1;
double lly = y[0] - 1;
double urx = x[3] + 1;
double ury = y[3] + 1;
/*
* Now we create the link annotation setting both
* the QuadPoints and the Rect keys.
*/
PDFAnnotationLink pdfAnnotationLink = PDFAnnotationLink.newInstance(pdfDocument);
pdfAnnotationLink.setQuadPoints(wordQuad.getValues());
pdfAnnotationLink.setRect(llx, lly, urx, ury);
/*
* We want the link to be invisible so we set the
* border width to zero.
*/
PDFBorder pdfBorder = PDFBorder.newInstance(pdfDocument);
pdfBorder.setWidth(0);
pdfAnnotationLink.setBorder(pdfBorder);
/*
* Now We set a simple JavaScript as the action for
* the link so we can see the hit area working
* properly in Acrobat and Reader and finally add it
* to the page.
*/
PDFActionJavaScript pdfActionJavaScript = PDFActionJavaScript.newInstance(pdfDocument,
PDFText.createString(pdfDocument, "app.alert('" + word.toString() + "')"));
pdfAnnotationLink.setAction(pdfActionJavaScript);
pdfPage.addAnnotation(pdfAnnotationLink);
}
}
}
}
// Save the file.
SampleFileServices.createDir(outputDir);
ByteWriter outputFile = SampleFileServices.getRAFByteWriter(outputDir + "LinksOuput.pdf");
pdfDocument.save(outputFile, PDFSaveFullOptions.newInstance());
} finally {
//
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment