Created
March 12, 2012 07:16
-
-
Save andreyvit/2020422 to your computer and use it in GitHub Desktop.
FPDI extension to preserve external hyperlinks
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
// FPDI extension that preserves hyperlinks when copying PDF pages. | |
// | |
// (c) 2012, Andrey Tarantsov <andrey@tarantsov.com>, provided under the MIT license. | |
// | |
// Published at: https://gist.github.com/2020422 | |
// | |
// Note: the free version of FPDI requires unprotected PDFs conforming to spec version 1.4. | |
// I use qpdf (http://qpdf.sourceforge.net/) to preprocess PDFs before running through this | |
// code, invoking it like this: | |
// | |
// qpdf --decrypt --stream-data=uncompress --force-version=1.4 src.pdf temp.pdf | |
// | |
// then, after processing temp.pdf into out.pdf with FPDI, I run the following to re-establish | |
// protection: | |
// | |
// qpdf --encrypt "" "" 40 --extract=n -- out.pdf final.pdf | |
// | |
class FPDI_with_annots extends FPDI { | |
// default maxdepth prevents an infinite recursion on malformed PDFs (not theoretical, actually found in the wild) | |
function resolve(&$parser, $smt, $maxdepth=10) { | |
if ($maxdepth == 0) | |
return $smt; | |
if ($smt[0] == PDF_TYPE_OBJREF) { | |
$result = $parser->pdf_resolve_object($parser->c, $smt); | |
return $this->resolve($parser, $result, $maxdepth-1); | |
} else if ($smt[0] == PDF_TYPE_OBJECT) { | |
return $this->resolve($parser, $smt[1], $maxdepth-1); | |
} else if ($smt[0] == PDF_TYPE_ARRAY) { | |
$result = array(); | |
foreach ($smt[1] as $item) { | |
$result[] = $this->resolve($parser, $item, $maxdepth-1); | |
} | |
$smt[1] = $result; | |
return $smt; | |
} else if ($smt[0] == PDF_TYPE_DICTIONARY) { | |
$result = array(); | |
foreach ($smt[1] as $key => $item) { | |
$result[$key] = $this->resolve($parser, $item, $maxdepth-1); | |
} | |
$smt[1] = $result; | |
return $smt; | |
} else { | |
return $smt; | |
} | |
} | |
function findPageNoForRef(&$parser, $pageRef) { | |
$ref_obj = $pageRef[1]; $ref_gen = $pageRef[2]; | |
foreach ($parser->pages as $index => $page) { | |
$page_obj = $page['obj']; $page_gen = $page['gen']; | |
if ($page_obj == $ref_obj && $page_gen == $ref_gen) { | |
return $index + 1; | |
} | |
} | |
return -1; | |
} | |
function importPage($pageno, $boxName = '/CropBox') { | |
$tplidx = parent::importPage($pageno, $boxName); | |
$tpl =& $this->tpls[$tplidx]; | |
$parser =& $tpl['parser']; | |
// look for hyperlink annotations and store them in the template | |
if (isset($parser->pages[$pageno - 1][1][1]['/Annots'])) { | |
$annots = $parser->pages[$pageno - 1][1][1]['/Annots']; | |
$annots = $this->resolve($parser, $annots); | |
$links = array(); | |
foreach ($annots[1] as $annot) if ($annot[0] == PDF_TYPE_DICTIONARY) { | |
// all links look like: << /Type /Annot /Subtype /Link /Rect [...] ... >> | |
if ($annot[1]['/Type'][1] == '/Annot' && $annot[1]['/Subtype'][1] == '/Link') { | |
$rect = $annot[1]['/Rect']; | |
if ($rect[0] == PDF_TYPE_ARRAY && count($rect[1]) == 4) { | |
$x = $rect[1][0][1]; $y = $rect[1][1][1]; | |
$x2 = $rect[1][2][1]; $y2 = $rect[1][3][1]; | |
$w = $x2 - $x; $h = $y2 - $y; | |
$h = -$h; | |
} | |
if (isset($annot[1]['/A'])) { | |
$A = $annot[1]['/A']; | |
if ($A[0] == PDF_TYPE_DICTIONARY && isset($A[1]['/S'])) { | |
$S = $A[1]['/S']; | |
// << /Type /Annot ... /A << /S /URI /URI ... >> >> | |
if ($S[1] == '/URI' && isset($A[1]['/URI'])) { | |
$URI = $A[1]['/URI']; | |
if (is_string($URI[1])) { | |
$uri = str_replace("\\000", '', trim($URI[1])); | |
if (!empty($uri)) { | |
$links[] = array($x, $y, $w, $h, $uri); | |
} | |
} | |
// << /Type /Annot ... /A << /S /GoTo /D [%d 0 R /Fit] >> >> | |
} else if ($S[1] == '/GoTo' && isset($A[1]['/D'])) { | |
$D = $A[1]['/D']; | |
if ($D[0] == PDF_TYPE_ARRAY && count($D[1]) > 0 && $D[1][0][0] == PDF_TYPE_OBJREF) { | |
$target_pageno = $this->findPageNoForRef($parser, $D[1][0]); | |
if ($target_pageno >= 0) { | |
$links[] = array($x, $y, $w, $h, $target_pageno); | |
} | |
} | |
} | |
} | |
} else if (isset($annot[1]['/Dest'])) { | |
$Dest = $annot[1]['/Dest']; | |
// << /Type /Annot ... /Dest [42 0 R ...] >> | |
if ($Dest[0] == PDF_TYPE_ARRAY && $Dest[0][1][0] == PDF_TYPE_OBJREF) { | |
$target_pageno = $this->findPageNoForRef($parser, $Dest[0][1][0]); | |
if ($target_pageno >= 0) { | |
$links[] = array($x, $y, $w, $h, $target_pageno); | |
} | |
} | |
} | |
} | |
} | |
$tpl['links'] = $links; | |
} | |
// echo "Links on page $pageno:\n"; | |
// print_r($links); | |
return $tplidx; | |
} | |
function useTemplate($tplidx, $_x = null, $_y = null, $_w = 0, $_h = 0, $adjustPageSize = false) { | |
$result = parent::useTemplate($tplidx, $_x, $_y, $_w, $_h, $adjustPageSize); | |
// apply links from the template | |
$tpl =& $this->tpls[$tplidx]; | |
if (isset($tpl['links'])) { | |
foreach ($tpl['links'] as $link) { | |
// $link[4] is either a string (external URL) or an integer (page number) | |
if (is_int($link[4])) { | |
$l = $this->AddLink(); | |
$this->SetLink($l, 0, $link[4]); | |
$link[4] = $l; | |
} | |
$this->PageLinks[$this->page][] = $link; | |
} | |
} | |
return $result; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
// uses FPDI to append another PDF file, watermarking each page with a message | |
class FPDI_AppendWithWatermark extends FPDI_with_annots { | |
function AppendPDFWithWatermarkMessage($file, $message) { | |
$pagecount = $this->setSourceFile($file); | |
for ($i = 1; $i <= $pagecount; $i++) { | |
$tplidx = $this->ImportPage($i); | |
$s = $this->getTemplatesize($tplidx); | |
$this->AddPage('P', array($s['w'], $s['h'])); | |
$this->useTemplate($tplidx); | |
// watermark (a message printed vertically along the left margin) | |
$this->SetAutoPageBreak(FALSE); | |
$this->SetXY(6, -28); | |
$this->Rotate(90); | |
$this->SetTextColor(102, 102, 102); | |
$this->SetFont('Arial', '', 8); | |
$this->Cell(0, 5, utf8_decode($message),'',1,'L'); | |
$this->Rotate(0); // outputs Q to balance "q" added by the previous call to Rotate | |
} | |
} | |
} | |
// combines FPDI_AppendWithWatermark and qpdf to watermark existing PDF files | |
function personalize_pdf($source_file, $output_file, $temp_file, $message, $debug_mode = FALSE) { | |
setlocale(LC_CTYPE, "en_US.UTF-8"); // otherwise escapeshellarg() strips non-ASCII characters | |
// see the comments in FPDI_with_annots as to why we have to run this | |
$cmd = sprintf('qpdf --decrypt --stream-data=uncompress --force-version=1.4 %s %s', escapeshellarg($source_file), escapeshellarg($temp_file)); | |
$output = shell_exec($cmd); | |
if (!file_exists($temp_file) || filesize($temp_file) == 0) { | |
if ($debug_mode) die("Error occurred while running:\n$cmd\n\nOutput:\n$output"); | |
return FALSE; | |
} | |
$pdf = new FPDI_AppendWithWatermark(); | |
// make debugging easier by leaving the output file uncompressed | |
if ($debug_mode) $pdf->SetCompression(FALSE); | |
$pdf->AppendPDFWithWatermarkMessage($temp_file, $message); | |
$pdf->Output($temp_file, 'F'); | |
if ($debug_mode) { | |
// make debugging easier by omitting the final processing step | |
copy($temp_file, $output_file); | |
} else { | |
$cmd = sprintf('qpdf --encrypt "" "" 40 --extract=n -- %s %s', escapeshellarg($temp_file), escapeshellarg($output_file)); | |
$output = shell_exec($cmd); | |
if (!file_exists($output_file) || filesize($output_file) == 0) { | |
if ($debug_mode) die("Error occurred while running:\n$cmd\n\nOutput:\n$output"); | |
return FALSE; | |
} | |
} | |
return TRUE; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
If you guys are looking for a newer solution using FPDI 2.3 and TCPDF 6.4+, take a look at this answer:
https://stackoverflow.com/a/67071744/5397846
Supporting both internal and external annotation links (hyperlinks)