andreyvit/fpdi_with_annots.php

## fpdi_with_annots.php
<?php

// FPDI extension that preserves hyperlinks when copying PDF pages.
//
// (c) 2012, Andrey Tarantsov <andrey@tarantsov.com>, provided under the MIT license.
//
// Published at: https://gist.github.com/2020422
//
// Note: the free version of FPDI requires unprotected PDFs conforming to spec version 1.4.
// I use qpdf (http://qpdf.sourceforge.net/) to preprocess PDFs before running through this
// code, invoking it like this:
//
//     qpdf --decrypt --stream-data=uncompress --force-version=1.4 src.pdf temp.pdf
//
// then, after processing temp.pdf into out.pdf with FPDI, I run the following to re-establish
// protection:
//
//     qpdf --encrypt "" "" 40 --extract=n -- out.pdf final.pdf
//
class FPDI_with_annots extends FPDI {

    // default maxdepth prevents an infinite recursion on malformed PDFs (not theoretical, actually found in the wild)
    function resolve(&$parser, $smt, $maxdepth=10) {
        if ($maxdepth == 0)
            return $smt;

        if ($smt[0] == PDF_TYPE_OBJREF) {
            $result = $parser->pdf_resolve_object($parser->c, $smt);
            return $this->resolve($parser, $result, $maxdepth-1);

        } else if ($smt[0] == PDF_TYPE_OBJECT) {
            return $this->resolve($parser, $smt[1], $maxdepth-1);

        } else if ($smt[0] == PDF_TYPE_ARRAY) {
            $result = array();
            foreach ($smt[1] as $item) {
                $result[] = $this->resolve($parser, $item, $maxdepth-1);
            }
            $smt[1] = $result;
            return $smt;

        } else if ($smt[0] == PDF_TYPE_DICTIONARY) {
            $result = array();
            foreach ($smt[1] as $key => $item) {
                $result[$key] = $this->resolve($parser, $item, $maxdepth-1);
            }
            $smt[1] = $result;
            return $smt;

        } else {
            return $smt;
        }
    }

    function findPageNoForRef(&$parser, $pageRef) {
        $ref_obj = $pageRef[1]; $ref_gen = $pageRef[2];

        foreach ($parser->pages as $index => $page) {
          $page_obj = $page['obj']; $page_gen = $page['gen'];
          if ($page_obj == $ref_obj && $page_gen == $ref_gen) {
              return $index + 1;
          }
        }

        return -1;
    }

    function importPage($pageno, $boxName = '/CropBox') {
        $tplidx = parent::importPage($pageno, $boxName);

        $tpl =& $this->tpls[$tplidx];
        $parser =& $tpl['parser'];

        // look for hyperlink annotations and store them in the template
        if (isset($parser->pages[$pageno - 1][1][1]['/Annots'])) {
            $annots = $parser->pages[$pageno - 1][1][1]['/Annots'];
            $annots = $this->resolve($parser, $annots);

            $links = array();
            foreach ($annots[1] as $annot) if ($annot[0] == PDF_TYPE_DICTIONARY) {
                // all links look like:  << /Type /Annot /Subtype /Link /Rect [...] ... >>
                if ($annot[1]['/Type'][1] == '/Annot' && $annot[1]['/Subtype'][1] == '/Link') {
                    $rect = $annot[1]['/Rect'];
                    if ($rect[0] == PDF_TYPE_ARRAY && count($rect[1]) == 4) {
                        $x = $rect[1][0][1]; $y = $rect[1][1][1];
                        $x2 = $rect[1][2][1]; $y2 = $rect[1][3][1];
                        $w = $x2 - $x; $h = $y2 - $y;
                        $h = -$h;
                    }

                    if (isset($annot[1]['/A'])) {
                        $A = $annot[1]['/A'];

                        if ($A[0] == PDF_TYPE_DICTIONARY && isset($A[1]['/S'])) {
                            $S = $A[1]['/S'];

                            //  << /Type /Annot ... /A << /S /URI /URI ... >> >>
                            if ($S[1] == '/URI' && isset($A[1]['/URI'])) {
                                $URI = $A[1]['/URI'];

                                if (is_string($URI[1])) {
                                    $uri = str_replace("\\000", '', trim($URI[1]));
                                    if (!empty($uri)) {
                                        $links[] = array($x, $y, $w, $h, $uri);
                                    }
                                }

                            //  << /Type /Annot ... /A << /S /GoTo /D [%d 0 R /Fit] >> >>
                            } else if ($S[1] == '/GoTo' && isset($A[1]['/D'])) {
                                $D = $A[1]['/D'];
                                if ($D[0] == PDF_TYPE_ARRAY && count($D[1]) > 0 && $D[1][0][0] == PDF_TYPE_OBJREF) {
                                    $target_pageno = $this->findPageNoForRef($parser, $D[1][0]);
                                    if ($target_pageno >= 0) {
                                        $links[] = array($x, $y, $w, $h, $target_pageno);
                                    }
                                }
                            }
                        }

                    } else if (isset($annot[1]['/Dest'])) {
                        $Dest = $annot[1]['/Dest'];

                        //  << /Type /Annot ... /Dest [42 0 R ...] >>
                        if ($Dest[0] == PDF_TYPE_ARRAY && $Dest[0][1][0] == PDF_TYPE_OBJREF) {
                            $target_pageno = $this->findPageNoForRef($parser, $Dest[0][1][0]);
                            if ($target_pageno >= 0) {
                                $links[] = array($x, $y, $w, $h, $target_pageno);
                            }
                        }
                    }
                }
            }
            $tpl['links'] = $links;
        }
        // echo "Links on page $pageno:\n";
        // print_r($links);

        return $tplidx;
    }

    function useTemplate($tplidx, $_x = null, $_y = null, $_w = 0, $_h = 0, $adjustPageSize = false) {
        $result = parent::useTemplate($tplidx, $_x, $_y, $_w, $_h, $adjustPageSize);

        // apply links from the template
        $tpl =& $this->tpls[$tplidx];
        if (isset($tpl['links'])) {
            foreach ($tpl['links'] as $link) {
                // $link[4] is either a string (external URL) or an integer (page number)
                if (is_int($link[4])) {
                    $l = $this->AddLink();
                    $this->SetLink($l, 0, $link[4]);
                    $link[4] = $l;
                }
                $this->PageLinks[$this->page][] = $link;
            }
        }

        return $result;
    }

}

## pdf_personalization_example.php
<?php

// uses FPDI to append another PDF file, watermarking each page with a message
class FPDI_AppendWithWatermark extends FPDI_with_annots {

    function AppendPDFWithWatermarkMessage($file, $message) {
        $pagecount = $this->setSourceFile($file);
        for ($i = 1; $i <= $pagecount; $i++) {
            $tplidx = $this->ImportPage($i);
            $s = $this->getTemplatesize($tplidx);
            $this->AddPage('P', array($s['w'], $s['h']));
            $this->useTemplate($tplidx);

            // watermark (a message printed vertically along the left margin)
            $this->SetAutoPageBreak(FALSE);
            $this->SetXY(6, -28);
            $this->Rotate(90);
            $this->SetTextColor(102, 102, 102);
            $this->SetFont('Arial', '', 8);
            $this->Cell(0, 5, utf8_decode($message),'',1,'L');
            $this->Rotate(0); // outputs Q to balance "q" added by the previous call to Rotate
        }
    }

}

// combines FPDI_AppendWithWatermark and qpdf to watermark existing PDF files
function personalize_pdf($source_file, $output_file, $temp_file, $message, $debug_mode = FALSE) {
    setlocale(LC_CTYPE, "en_US.UTF-8"); // otherwise escapeshellarg() strips non-ASCII characters

    // see the comments in FPDI_with_annots as to why we have to run this
    $cmd = sprintf('qpdf --decrypt --stream-data=uncompress --force-version=1.4 %s %s', escapeshellarg($source_file), escapeshellarg($temp_file));
    $output = shell_exec($cmd);
    if (!file_exists($temp_file) || filesize($temp_file) == 0) {
        if ($debug_mode) die("Error occurred while running:\n$cmd\n\nOutput:\n$output");
        return FALSE;
    }

    $pdf = new FPDI_AppendWithWatermark();

    // make debugging easier by leaving the output file uncompressed
    if ($debug_mode) $pdf->SetCompression(FALSE);

    $pdf->AppendPDFWithWatermarkMessage($temp_file, $message);
    $pdf->Output($temp_file, 'F');

    if ($debug_mode) {
        // make debugging easier by omitting the final processing step
        copy($temp_file, $output_file);
    } else {
        $cmd = sprintf('qpdf --encrypt "" "" 40 --extract=n -- %s %s', escapeshellarg($temp_file), escapeshellarg($output_file));
        $output = shell_exec($cmd);
        if (!file_exists($output_file) || filesize($output_file) == 0) {
            if ($debug_mode) die("Error occurred while running:\n$cmd\n\nOutput:\n$output");
            return FALSE;
        }
    }
    return TRUE;
}
	<?php

	// FPDI extension that preserves hyperlinks when copying PDF pages.
	//
	// (c) 2012, Andrey Tarantsov <andrey@tarantsov.com>, provided under the MIT license.
	//
	// Published at: https://gist.github.com/2020422
	//
	// Note: the free version of FPDI requires unprotected PDFs conforming to spec version 1.4.
	// I use qpdf (http://qpdf.sourceforge.net/) to preprocess PDFs before running through this
	// code, invoking it like this:
	//
	// qpdf --decrypt --stream-data=uncompress --force-version=1.4 src.pdf temp.pdf
	//
	// then, after processing temp.pdf into out.pdf with FPDI, I run the following to re-establish
	// protection:
	//
	// qpdf --encrypt "" "" 40 --extract=n -- out.pdf final.pdf
	//
	class FPDI_with_annots extends FPDI {

	// default maxdepth prevents an infinite recursion on malformed PDFs (not theoretical, actually found in the wild)
	function resolve(&$parser, $smt, $maxdepth=10) {
	if ($maxdepth == 0)
	return $smt;

	if ($smt[0] == PDF_TYPE_OBJREF) {
	$result = $parser->pdf_resolve_object($parser->c, $smt);
	return $this->resolve($parser, $result, $maxdepth-1);

	} else if ($smt[0] == PDF_TYPE_OBJECT) {
	return $this->resolve($parser, $smt[1], $maxdepth-1);

	} else if ($smt[0] == PDF_TYPE_ARRAY) {
	$result = array();
	foreach ($smt[1] as $item) {
	$result[] = $this->resolve($parser, $item, $maxdepth-1);
	}
	$smt[1] = $result;
	return $smt;

	} else if ($smt[0] == PDF_TYPE_DICTIONARY) {
	$result = array();
	foreach ($smt[1] as $key => $item) {
	$result[$key] = $this->resolve($parser, $item, $maxdepth-1);
	}
	$smt[1] = $result;
	return $smt;

	} else {
	return $smt;
	}
	}

	function findPageNoForRef(&$parser, $pageRef) {
	$ref_obj = $pageRef[1]; $ref_gen = $pageRef[2];

	foreach ($parser->pages as $index => $page) {
	$page_obj = $page['obj']; $page_gen = $page['gen'];
	if ($page_obj == $ref_obj && $page_gen == $ref_gen) {
	return $index + 1;
	}
	}

	return -1;
	}

	function importPage($pageno, $boxName = '/CropBox') {
	$tplidx = parent::importPage($pageno, $boxName);

	$tpl =& $this->tpls[$tplidx];
	$parser =& $tpl['parser'];

	// look for hyperlink annotations and store them in the template
	if (isset($parser->pages[$pageno - 1][1][1]['/Annots'])) {
	$annots = $parser->pages[$pageno - 1][1][1]['/Annots'];
	$annots = $this->resolve($parser, $annots);

	$links = array();
	foreach ($annots[1] as $annot) if ($annot[0] == PDF_TYPE_DICTIONARY) {
	// all links look like: << /Type /Annot /Subtype /Link /Rect [...] ... >>
	if ($annot[1]['/Type'][1] == '/Annot' && $annot[1]['/Subtype'][1] == '/Link') {
	$rect = $annot[1]['/Rect'];
	if ($rect[0] == PDF_TYPE_ARRAY && count($rect[1]) == 4) {
	$x = $rect[1][0][1]; $y = $rect[1][1][1];
	$x2 = $rect[1][2][1]; $y2 = $rect[1][3][1];
	$w = $x2 - $x; $h = $y2 - $y;
	$h = -$h;
	}

	if (isset($annot[1]['/A'])) {
	$A = $annot[1]['/A'];

	if ($A[0] == PDF_TYPE_DICTIONARY && isset($A[1]['/S'])) {
	$S = $A[1]['/S'];

	// << /Type /Annot ... /A << /S /URI /URI ... >> >>
	if ($S[1] == '/URI' && isset($A[1]['/URI'])) {
	$URI = $A[1]['/URI'];

	if (is_string($URI[1])) {
	$uri = str_replace("\\000", '', trim($URI[1]));
	if (!empty($uri)) {
	$links[] = array($x, $y, $w, $h, $uri);
	}
	}

	// << /Type /Annot ... /A << /S /GoTo /D [%d 0 R /Fit] >> >>
	} else if ($S[1] == '/GoTo' && isset($A[1]['/D'])) {
	$D = $A[1]['/D'];
	if ($D[0] == PDF_TYPE_ARRAY && count($D[1]) > 0 && $D[1][0][0] == PDF_TYPE_OBJREF) {
	$target_pageno = $this->findPageNoForRef($parser, $D[1][0]);
	if ($target_pageno >= 0) {
	$links[] = array($x, $y, $w, $h, $target_pageno);
	}
	}
	}
	}

	} else if (isset($annot[1]['/Dest'])) {
	$Dest = $annot[1]['/Dest'];

	// << /Type /Annot ... /Dest [42 0 R ...] >>
	if ($Dest[0] == PDF_TYPE_ARRAY && $Dest[0][1][0] == PDF_TYPE_OBJREF) {
	$target_pageno = $this->findPageNoForRef($parser, $Dest[0][1][0]);
	if ($target_pageno >= 0) {
	$links[] = array($x, $y, $w, $h, $target_pageno);
	}
	}
	}
	}
	}
	$tpl['links'] = $links;
	}
	// echo "Links on page $pageno:\n";
	// print_r($links);

	return $tplidx;
	}

	function useTemplate($tplidx, $_x = null, $_y = null, $_w = 0, $_h = 0, $adjustPageSize = false) {
	$result = parent::useTemplate($tplidx, $_x, $_y, $_w, $_h, $adjustPageSize);

	// apply links from the template
	$tpl =& $this->tpls[$tplidx];
	if (isset($tpl['links'])) {
	foreach ($tpl['links'] as $link) {
	// $link[4] is either a string (external URL) or an integer (page number)
	if (is_int($link[4])) {
	$l = $this->AddLink();
	$this->SetLink($l, 0, $link[4]);
	$link[4] = $l;
	}
	$this->PageLinks[$this->page][] = $link;
	}
	}

	return $result;
	}

	}
	<?php

	// uses FPDI to append another PDF file, watermarking each page with a message
	class FPDI_AppendWithWatermark extends FPDI_with_annots {

	function AppendPDFWithWatermarkMessage($file, $message) {
	$pagecount = $this->setSourceFile($file);
	for ($i = 1; $i <= $pagecount; $i++) {
	$tplidx = $this->ImportPage($i);
	$s = $this->getTemplatesize($tplidx);
	$this->AddPage('P', array($s['w'], $s['h']));
	$this->useTemplate($tplidx);

	// watermark (a message printed vertically along the left margin)
	$this->SetAutoPageBreak(FALSE);
	$this->SetXY(6, -28);
	$this->Rotate(90);
	$this->SetTextColor(102, 102, 102);
	$this->SetFont('Arial', '', 8);
	$this->Cell(0, 5, utf8_decode($message),'',1,'L');
	$this->Rotate(0); // outputs Q to balance "q" added by the previous call to Rotate
	}
	}

	}

	// combines FPDI_AppendWithWatermark and qpdf to watermark existing PDF files
	function personalize_pdf($source_file, $output_file, $temp_file, $message, $debug_mode = FALSE) {
	setlocale(LC_CTYPE, "en_US.UTF-8"); // otherwise escapeshellarg() strips non-ASCII characters

	// see the comments in FPDI_with_annots as to why we have to run this
	$cmd = sprintf('qpdf --decrypt --stream-data=uncompress --force-version=1.4 %s %s', escapeshellarg($source_file), escapeshellarg($temp_file));
	$output = shell_exec($cmd);
	if (!file_exists($temp_file) \|\| filesize($temp_file) == 0) {
	if ($debug_mode) die("Error occurred while running:\n$cmd\n\nOutput:\n$output");
	return FALSE;
	}

	$pdf = new FPDI_AppendWithWatermark();

	// make debugging easier by leaving the output file uncompressed
	if ($debug_mode) $pdf->SetCompression(FALSE);

	$pdf->AppendPDFWithWatermarkMessage($temp_file, $message);
	$pdf->Output($temp_file, 'F');

	if ($debug_mode) {
	// make debugging easier by omitting the final processing step
	copy($temp_file, $output_file);
	} else {
	$cmd = sprintf('qpdf --encrypt "" "" 40 --extract=n -- %s %s', escapeshellarg($temp_file), escapeshellarg($output_file));
	$output = shell_exec($cmd);
	if (!file_exists($output_file) \|\| filesize($output_file) == 0) {
	if ($debug_mode) die("Error occurred while running:\n$cmd\n\nOutput:\n$output");
	return FALSE;
	}
	}
	return TRUE;
	}