Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
FPDI extension to preserve external hyperlinks
<?php
// FPDI extension that preserves hyperlinks when copying PDF pages.
//
// (c) 2012, Andrey Tarantsov <andrey@tarantsov.com>, provided under the MIT license.
//
// Published at: https://gist.github.com/2020422
//
// Note: the free version of FPDI requires unprotected PDFs conforming to spec version 1.4.
// I use qpdf (http://qpdf.sourceforge.net/) to preprocess PDFs before running through this
// code, invoking it like this:
//
// qpdf --decrypt --stream-data=uncompress --force-version=1.4 src.pdf temp.pdf
//
// then, after processing temp.pdf into out.pdf with FPDI, I run the following to re-establish
// protection:
//
// qpdf --encrypt "" "" 40 --extract=n -- out.pdf final.pdf
//
class FPDI_with_annots extends FPDI {
// default maxdepth prevents an infinite recursion on malformed PDFs (not theoretical, actually found in the wild)
function resolve(&$parser, $smt, $maxdepth=10) {
if ($maxdepth == 0)
return $smt;
if ($smt[0] == PDF_TYPE_OBJREF) {
$result = $parser->pdf_resolve_object($parser->c, $smt);
return $this->resolve($parser, $result, $maxdepth-1);
} else if ($smt[0] == PDF_TYPE_OBJECT) {
return $this->resolve($parser, $smt[1], $maxdepth-1);
} else if ($smt[0] == PDF_TYPE_ARRAY) {
$result = array();
foreach ($smt[1] as $item) {
$result[] = $this->resolve($parser, $item, $maxdepth-1);
}
$smt[1] = $result;
return $smt;
} else if ($smt[0] == PDF_TYPE_DICTIONARY) {
$result = array();
foreach ($smt[1] as $key => $item) {
$result[$key] = $this->resolve($parser, $item, $maxdepth-1);
}
$smt[1] = $result;
return $smt;
} else {
return $smt;
}
}
function findPageNoForRef(&$parser, $pageRef) {
$ref_obj = $pageRef[1]; $ref_gen = $pageRef[2];
foreach ($parser->pages as $index => $page) {
$page_obj = $page['obj']; $page_gen = $page['gen'];
if ($page_obj == $ref_obj && $page_gen == $ref_gen) {
return $index + 1;
}
}
return -1;
}
function importPage($pageno, $boxName = '/CropBox') {
$tplidx = parent::importPage($pageno, $boxName);
$tpl =& $this->tpls[$tplidx];
$parser =& $tpl['parser'];
// look for hyperlink annotations and store them in the template
if (isset($parser->pages[$pageno - 1][1][1]['/Annots'])) {
$annots = $parser->pages[$pageno - 1][1][1]['/Annots'];
$annots = $this->resolve($parser, $annots);
$links = array();
foreach ($annots[1] as $annot) if ($annot[0] == PDF_TYPE_DICTIONARY) {
// all links look like: << /Type /Annot /Subtype /Link /Rect [...] ... >>
if ($annot[1]['/Type'][1] == '/Annot' && $annot[1]['/Subtype'][1] == '/Link') {
$rect = $annot[1]['/Rect'];
if ($rect[0] == PDF_TYPE_ARRAY && count($rect[1]) == 4) {
$x = $rect[1][0][1]; $y = $rect[1][1][1];
$x2 = $rect[1][2][1]; $y2 = $rect[1][3][1];
$w = $x2 - $x; $h = $y2 - $y;
$h = -$h;
}
if (isset($annot[1]['/A'])) {
$A = $annot[1]['/A'];
if ($A[0] == PDF_TYPE_DICTIONARY && isset($A[1]['/S'])) {
$S = $A[1]['/S'];
// << /Type /Annot ... /A << /S /URI /URI ... >> >>
if ($S[1] == '/URI' && isset($A[1]['/URI'])) {
$URI = $A[1]['/URI'];
if (is_string($URI[1])) {
$uri = str_replace("\\000", '', trim($URI[1]));
if (!empty($uri)) {
$links[] = array($x, $y, $w, $h, $uri);
}
}
// << /Type /Annot ... /A << /S /GoTo /D [%d 0 R /Fit] >> >>
} else if ($S[1] == '/GoTo' && isset($A[1]['/D'])) {
$D = $A[1]['/D'];
if ($D[0] == PDF_TYPE_ARRAY && count($D[1]) > 0 && $D[1][0][0] == PDF_TYPE_OBJREF) {
$target_pageno = $this->findPageNoForRef($parser, $D[1][0]);
if ($target_pageno >= 0) {
$links[] = array($x, $y, $w, $h, $target_pageno);
}
}
}
}
} else if (isset($annot[1]['/Dest'])) {
$Dest = $annot[1]['/Dest'];
// << /Type /Annot ... /Dest [42 0 R ...] >>
if ($Dest[0] == PDF_TYPE_ARRAY && $Dest[0][1][0] == PDF_TYPE_OBJREF) {
$target_pageno = $this->findPageNoForRef($parser, $Dest[0][1][0]);
if ($target_pageno >= 0) {
$links[] = array($x, $y, $w, $h, $target_pageno);
}
}
}
}
}
$tpl['links'] = $links;
}
// echo "Links on page $pageno:\n";
// print_r($links);
return $tplidx;
}
function useTemplate($tplidx, $_x = null, $_y = null, $_w = 0, $_h = 0, $adjustPageSize = false) {
$result = parent::useTemplate($tplidx, $_x, $_y, $_w, $_h, $adjustPageSize);
// apply links from the template
$tpl =& $this->tpls[$tplidx];
if (isset($tpl['links'])) {
foreach ($tpl['links'] as $link) {
// $link[4] is either a string (external URL) or an integer (page number)
if (is_int($link[4])) {
$l = $this->AddLink();
$this->SetLink($l, 0, $link[4]);
$link[4] = $l;
}
$this->PageLinks[$this->page][] = $link;
}
}
return $result;
}
}
<?php
// uses FPDI to append another PDF file, watermarking each page with a message
class FPDI_AppendWithWatermark extends FPDI_with_annots {
function AppendPDFWithWatermarkMessage($file, $message) {
$pagecount = $this->setSourceFile($file);
for ($i = 1; $i <= $pagecount; $i++) {
$tplidx = $this->ImportPage($i);
$s = $this->getTemplatesize($tplidx);
$this->AddPage('P', array($s['w'], $s['h']));
$this->useTemplate($tplidx);
// watermark (a message printed vertically along the left margin)
$this->SetAutoPageBreak(FALSE);
$this->SetXY(6, -28);
$this->Rotate(90);
$this->SetTextColor(102, 102, 102);
$this->SetFont('Arial', '', 8);
$this->Cell(0, 5, utf8_decode($message),'',1,'L');
$this->Rotate(0); // outputs Q to balance "q" added by the previous call to Rotate
}
}
}
// combines FPDI_AppendWithWatermark and qpdf to watermark existing PDF files
function personalize_pdf($source_file, $output_file, $temp_file, $message, $debug_mode = FALSE) {
setlocale(LC_CTYPE, "en_US.UTF-8"); // otherwise escapeshellarg() strips non-ASCII characters
// see the comments in FPDI_with_annots as to why we have to run this
$cmd = sprintf('qpdf --decrypt --stream-data=uncompress --force-version=1.4 %s %s', escapeshellarg($source_file), escapeshellarg($temp_file));
$output = shell_exec($cmd);
if (!file_exists($temp_file) || filesize($temp_file) == 0) {
if ($debug_mode) die("Error occurred while running:\n$cmd\n\nOutput:\n$output");
return FALSE;
}
$pdf = new FPDI_AppendWithWatermark();
// make debugging easier by leaving the output file uncompressed
if ($debug_mode) $pdf->SetCompression(FALSE);
$pdf->AppendPDFWithWatermarkMessage($temp_file, $message);
$pdf->Output($temp_file, 'F');
if ($debug_mode) {
// make debugging easier by omitting the final processing step
copy($temp_file, $output_file);
} else {
$cmd = sprintf('qpdf --encrypt "" "" 40 --extract=n -- %s %s', escapeshellarg($temp_file), escapeshellarg($output_file));
$output = shell_exec($cmd);
if (!file_exists($output_file) || filesize($output_file) == 0) {
if ($debug_mode) die("Error occurred while running:\n$cmd\n\nOutput:\n$output");
return FALSE;
}
}
return TRUE;
}
@sadeep

This comment has been minimized.

Copy link

@sadeep sadeep commented May 14, 2012

Its a good one. its relay working with fpdf parser . but when trying with tcpdf parser its not working. can you give a code which will work with tcpdf parser.
Thanks..

@andreyvit

This comment has been minimized.

Copy link
Owner Author

@andreyvit andreyvit commented May 14, 2012

@sadeep Nope, I'm not using tcpdf, sorry.

@andreyvit

This comment has been minimized.

Copy link
Owner Author

@andreyvit andreyvit commented May 17, 2012

Note: published a new version that handles internal hyperlinks (like a table of contents) in addition to external ones. Also added an extended usage example that implements personalization of PDFs, watermarking each page with a specified message.

@alesl

This comment has been minimized.

Copy link

@alesl alesl commented Oct 9, 2012

Actually it is quite simple fix to make it work with tcpdf:

<?php

// FPDI extension that preserves hyperlinks when copying PDF pages.
//
// (c) 2012, Andrey Tarantsov <andrey@tarantsov.com>, provided under the MIT license.
//
// Published at: https://gist.github.com/2020422
//
// Note: the free version of FPDI requires unprotected PDFs conforming to spec version 1.4.
// I use qpdf (http://qpdf.sourceforge.net/) to preprocess PDFs before running through this
// code, invoking it like this:
//
//     qpdf --decrypt --stream-data=uncompress --force-version=1.4 src.pdf temp.pdf
//
// then, after processing temp.pdf into out.pdf with FPDI, I run the following to re-establish
// protection:
//
//     qpdf --encrypt "" "" 40 --extract=n -- out.pdf final.pdf
//
class FPDI_with_annots extends FPDI {

    // default maxdepth prevents an infinite recursion on malformed PDFs (not theoretical, actually found in the wild)
    function resolve(&$parser, $smt, $maxdepth=10) {
        if ($maxdepth == 0)
            return $smt;

        if ($smt[0] == PDF_TYPE_OBJREF) {
            $result = $parser->pdf_resolve_object($parser->c, $smt);
            return $this->resolve($parser, $result, $maxdepth-1);

        } else if ($smt[0] == PDF_TYPE_OBJECT) {
            return $this->resolve($parser, $smt[1], $maxdepth-1);

        } else if ($smt[0] == PDF_TYPE_ARRAY) {
            $result = array();
            foreach ($smt[1] as $item) {
                $result[] = $this->resolve($parser, $item, $maxdepth-1);
            }
            $smt[1] = $result;
            return $smt;

        } else if ($smt[0] == PDF_TYPE_DICTIONARY) {
            $result = array();
            foreach ($smt[1] as $key => $item) {
                $result[$key] = $this->resolve($parser, $item, $maxdepth-1);
            }
            $smt[1] = $result;
            return $smt;

        } else {
            return $smt;
        }
    }

    function findPageNoForRef(&$parser, $pageRef) {
        $ref_obj = $pageRef[1]; $ref_gen = $pageRef[2];

        foreach ($parser->pages as $index => $page) {
          $page_obj = $page['obj']; $page_gen = $page['gen'];
          if ($page_obj == $ref_obj && $page_gen == $ref_gen) {
              return $index + 1;
          }
        }

        return -1;
    }

    function importPage($pageno, $boxName = '/CropBox') {
        $tplidx = parent::importPage($pageno, $boxName);

        $tpl =& $this->tpls[$tplidx];
        $parser =& $tpl['parser'];

        // look for hyperlink annotations and store them in the template
        if (isset($parser->pages[$pageno - 1][1][1]['/Annots'])) {
            $annots = $parser->pages[$pageno - 1][1][1]['/Annots'];
            $annots = $this->resolve($parser, $annots);

            $links = array();
            foreach ($annots[1] as $annot) if ($annot[0] == PDF_TYPE_DICTIONARY) {
                // all links look like:  << /Type /Annot /Subtype /Link /Rect [...] ... >>
                if ($annot[1]['/Type'][1] == '/Annot' && $annot[1]['/Subtype'][1] == '/Link') {
                    $rect = $annot[1]['/Rect'];
                    if ($rect[0] == PDF_TYPE_ARRAY && count($rect[1]) == 4) {
                        $x = $rect[1][0][1]; $y = $rect[1][1][1];
                        $x2 = $rect[1][2][1]; $y2 = $rect[1][3][1];
                        $w = $x2 - $x; $h = $y2 - $y;
                        $h = -$h;
                    }

                    if (isset($annot[1]['/A'])) {
                        $A = $annot[1]['/A'];

                        if ($A[0] == PDF_TYPE_DICTIONARY && isset($A[1]['/S'])) {
                            $S = $A[1]['/S'];

                            //  << /Type /Annot ... /A << /S /URI /URI ... >> >>
                            if ($S[1] == '/URI' && isset($A[1]['/URI'])) {
                                $URI = $A[1]['/URI'];

                                if (is_string($URI[1])) {
                                    $uri = str_replace("\\000", '', trim($URI[1]));
                                    if (!empty($uri)) {
                                        $links[] = array($x, $y, $w, $h, $uri);
                                    }
                                }

                            //  << /Type /Annot ... /A << /S /GoTo /D [%d 0 R /Fit] >> >>
                            } else if ($S[1] == '/GoTo' && isset($A[1]['/D'])) {
                                $D = $A[1]['/D'];
                                if ($D[0] == PDF_TYPE_ARRAY && count($D[1]) > 0 && $D[1][0][0] == PDF_TYPE_OBJREF) {
                                    $target_pageno = $this->findPageNoForRef($parser, $D[1][0]);
                                    if ($target_pageno >= 0) {
                                        $links[] = array($x, $y, $w, $h, $target_pageno);
                                    }
                                }
                            }
                        }

                    } else if (isset($annot[1]['/Dest'])) {
                        $Dest = $annot[1]['/Dest'];

                        //  << /Type /Annot ... /Dest [42 0 R ...] >>
                        if ($Dest[0] == PDF_TYPE_ARRAY && $Dest[0][1][0] == PDF_TYPE_OBJREF) {
                            $target_pageno = $this->findPageNoForRef($parser, $Dest[0][1][0]);
                            if ($target_pageno >= 0) {
                                $links[] = array($x, $y, $w, $h, $target_pageno);
                            }
                        }
                    }
                }
            }
        }
        // echo "Links on page $pageno:\n";
        // print_r($links);
        $tpl['links'] = $links;

        return $tplidx;
    }

    function useTemplate($tplidx, $_x = null, $_y = null, $_w = 0, $_h = 0, $adjustPageSize = false) {
        $result = parent::useTemplate($tplidx, $_x, $_y, $_w, $_h, $adjustPageSize);

        // apply links from the template
        $tpl =& $this->tpls[$tplidx];
        if (isset($tpl['links'])) {
            foreach ($tpl['links'] as $link) {
                // $link[4] is either a string (external URL) or an integer (page number)
                if (is_int($link[4])) {
                    $l = $this->AddLink();
                    $this->SetLink($l, 0, $link[4]);
                    $link[4] = $l;
                }

                $this->Link(
                    $link[0]/$this->k,
                    ($this->fhPt-$link[1]+$link[3])/$this->k, 
                    $link[2]/$this->k, 
                    -$link[3]/$this->k, 
                    $link[4]
                );
            }
        }

        return $result;
    }

}
@alesl

This comment has been minimized.

Copy link

@alesl alesl commented Oct 9, 2012

Actually its quite simple fix to make it work w/ tcpdf:

<?php

// FPDI extension that preserves hyperlinks when copying PDF pages.
//
// (c) 2012, Andrey Tarantsov <andrey@tarantsov.com>, provided under the MIT license.
//
// Published at: https://gist.github.com/2020422
//
// Note: the free version of FPDI requires unprotected PDFs conforming to spec version 1.4.
// I use qpdf (http://qpdf.sourceforge.net/) to preprocess PDFs before running through this
// code, invoking it like this:
//
//     qpdf --decrypt --stream-data=uncompress --force-version=1.4 src.pdf temp.pdf
//
// then, after processing temp.pdf into out.pdf with FPDI, I run the following to re-establish
// protection:
//
//     qpdf --encrypt "" "" 40 --extract=n -- out.pdf final.pdf
//
class FPDI_with_annots extends FPDI {

    // default maxdepth prevents an infinite recursion on malformed PDFs (not theoretical, actually found in the wild)
    function resolve(&$parser, $smt, $maxdepth=10) {
        if ($maxdepth == 0)
            return $smt;

        if ($smt[0] == PDF_TYPE_OBJREF) {
            $result = $parser->pdf_resolve_object($parser->c, $smt);
            return $this->resolve($parser, $result, $maxdepth-1);

        } else if ($smt[0] == PDF_TYPE_OBJECT) {
            return $this->resolve($parser, $smt[1], $maxdepth-1);

        } else if ($smt[0] == PDF_TYPE_ARRAY) {
            $result = array();
            foreach ($smt[1] as $item) {
                $result[] = $this->resolve($parser, $item, $maxdepth-1);
            }
            $smt[1] = $result;
            return $smt;

        } else if ($smt[0] == PDF_TYPE_DICTIONARY) {
            $result = array();
            foreach ($smt[1] as $key => $item) {
                $result[$key] = $this->resolve($parser, $item, $maxdepth-1);
            }
            $smt[1] = $result;
            return $smt;

        } else {
            return $smt;
        }
    }

    function findPageNoForRef(&$parser, $pageRef) {
        $ref_obj = $pageRef[1]; $ref_gen = $pageRef[2];

        foreach ($parser->pages as $index => $page) {
          $page_obj = $page['obj']; $page_gen = $page['gen'];
          if ($page_obj == $ref_obj && $page_gen == $ref_gen) {
              return $index + 1;
          }
        }

        return -1;
    }

    function importPage($pageno, $boxName = '/CropBox') {
        $tplidx = parent::importPage($pageno, $boxName);

        $tpl =& $this->tpls[$tplidx];
        $parser =& $tpl['parser'];

        // look for hyperlink annotations and store them in the template
        if (isset($parser->pages[$pageno - 1][1][1]['/Annots'])) {
            $annots = $parser->pages[$pageno - 1][1][1]['/Annots'];
            $annots = $this->resolve($parser, $annots);

            $links = array();
            foreach ($annots[1] as $annot) if ($annot[0] == PDF_TYPE_DICTIONARY) {
                // all links look like:  << /Type /Annot /Subtype /Link /Rect [...] ... >>
                if ($annot[1]['/Type'][1] == '/Annot' && $annot[1]['/Subtype'][1] == '/Link') {
                    $rect = $annot[1]['/Rect'];
                    if ($rect[0] == PDF_TYPE_ARRAY && count($rect[1]) == 4) {
                        $x = $rect[1][0][1]; $y = $rect[1][1][1];
                        $x2 = $rect[1][2][1]; $y2 = $rect[1][3][1];
                        $w = $x2 - $x; $h = $y2 - $y;
                        $h = -$h;
                    }

                    if (isset($annot[1]['/A'])) {
                        $A = $annot[1]['/A'];

                        if ($A[0] == PDF_TYPE_DICTIONARY && isset($A[1]['/S'])) {
                            $S = $A[1]['/S'];

                            //  << /Type /Annot ... /A << /S /URI /URI ... >> >>
                            if ($S[1] == '/URI' && isset($A[1]['/URI'])) {
                                $URI = $A[1]['/URI'];

                                if (is_string($URI[1])) {
                                    $uri = str_replace("\\000", '', trim($URI[1]));
                                    if (!empty($uri)) {
                                        $links[] = array($x, $y, $w, $h, $uri);
                                    }
                                }

                            //  << /Type /Annot ... /A << /S /GoTo /D [%d 0 R /Fit] >> >>
                            } else if ($S[1] == '/GoTo' && isset($A[1]['/D'])) {
                                $D = $A[1]['/D'];
                                if ($D[0] == PDF_TYPE_ARRAY && count($D[1]) > 0 && $D[1][0][0] == PDF_TYPE_OBJREF) {
                                    $target_pageno = $this->findPageNoForRef($parser, $D[1][0]);
                                    if ($target_pageno >= 0) {
                                        $links[] = array($x, $y, $w, $h, $target_pageno);
                                    }
                                }
                            }
                        }

                    } else if (isset($annot[1]['/Dest'])) {
                        $Dest = $annot[1]['/Dest'];

                        //  << /Type /Annot ... /Dest [42 0 R ...] >>
                        if ($Dest[0] == PDF_TYPE_ARRAY && $Dest[0][1][0] == PDF_TYPE_OBJREF) {
                            $target_pageno = $this->findPageNoForRef($parser, $Dest[0][1][0]);
                            if ($target_pageno >= 0) {
                                $links[] = array($x, $y, $w, $h, $target_pageno);
                            }
                        }
                    }
                }
            }
        }
        // echo "Links on page $pageno:\n";
        // print_r($links);
        $tpl['links'] = $links;

        return $tplidx;
    }

    function useTemplate($tplidx, $_x = null, $_y = null, $_w = 0, $_h = 0, $adjustPageSize = false) {
        $result = parent::useTemplate($tplidx, $_x, $_y, $_w, $_h, $adjustPageSize);

        // apply links from the template
        $tpl =& $this->tpls[$tplidx];
        if (isset($tpl['links'])) {
            foreach ($tpl['links'] as $link) {
                // $link[4] is either a string (external URL) or an integer (page number)
                if (is_int($link[4])) {
                    $l = $this->AddLink();
                    $this->SetLink($l, 0, $link[4]);
                    $link[4] = $l;
                }

                $this->Link(
                    $link[0]/$this->k,
                    ($this->fhPt-$link[1]+$link[3])/$this->k, 
                    $link[2]/$this->k, 
                    -$link[3]/$this->k, 
                    $link[4]
                );
            }
        }

        return $result;
    }

}

Regards,
Ales

@alesl

This comment has been minimized.

Copy link

@alesl alesl commented Oct 9, 2012

Actually its quite simple fix to make it work w/ tcpdf:

<?php

// FPDI extension that preserves hyperlinks when copying PDF pages.
//
// (c) 2012, Andrey Tarantsov <andrey@tarantsov.com>, provided under the MIT license.
//
// Published at: https://gist.github.com/2020422
//
// Note: the free version of FPDI requires unprotected PDFs conforming to spec version 1.4.
// I use qpdf (http://qpdf.sourceforge.net/) to preprocess PDFs before running through this
// code, invoking it like this:
//
//     qpdf --decrypt --stream-data=uncompress --force-version=1.4 src.pdf temp.pdf
//
// then, after processing temp.pdf into out.pdf with FPDI, I run the following to re-establish
// protection:
//
//     qpdf --encrypt "" "" 40 --extract=n -- out.pdf final.pdf
//
class FPDI_with_annots extends FPDI {

    // default maxdepth prevents an infinite recursion on malformed PDFs (not theoretical, actually found in the wild)
    function resolve(&$parser, $smt, $maxdepth=10) {
        if ($maxdepth == 0)
            return $smt;

        if ($smt[0] == PDF_TYPE_OBJREF) {
            $result = $parser->pdf_resolve_object($parser->c, $smt);
            return $this->resolve($parser, $result, $maxdepth-1);

        } else if ($smt[0] == PDF_TYPE_OBJECT) {
            return $this->resolve($parser, $smt[1], $maxdepth-1);

        } else if ($smt[0] == PDF_TYPE_ARRAY) {
            $result = array();
            foreach ($smt[1] as $item) {
                $result[] = $this->resolve($parser, $item, $maxdepth-1);
            }
            $smt[1] = $result;
            return $smt;

        } else if ($smt[0] == PDF_TYPE_DICTIONARY) {
            $result = array();
            foreach ($smt[1] as $key => $item) {
                $result[$key] = $this->resolve($parser, $item, $maxdepth-1);
            }
            $smt[1] = $result;
            return $smt;

        } else {
            return $smt;
        }
    }

    function findPageNoForRef(&$parser, $pageRef) {
        $ref_obj = $pageRef[1]; $ref_gen = $pageRef[2];

        foreach ($parser->pages as $index => $page) {
          $page_obj = $page['obj']; $page_gen = $page['gen'];
          if ($page_obj == $ref_obj && $page_gen == $ref_gen) {
              return $index + 1;
          }
        }

        return -1;
    }

    function importPage($pageno, $boxName = '/CropBox') {
        $tplidx = parent::importPage($pageno, $boxName);

        $tpl =& $this->tpls[$tplidx];
        $parser =& $tpl['parser'];

        // look for hyperlink annotations and store them in the template
        if (isset($parser->pages[$pageno - 1][1][1]['/Annots'])) {
            $annots = $parser->pages[$pageno - 1][1][1]['/Annots'];
            $annots = $this->resolve($parser, $annots);

            $links = array();
            foreach ($annots[1] as $annot) if ($annot[0] == PDF_TYPE_DICTIONARY) {
                // all links look like:  << /Type /Annot /Subtype /Link /Rect [...] ... >>
                if ($annot[1]['/Type'][1] == '/Annot' && $annot[1]['/Subtype'][1] == '/Link') {
                    $rect = $annot[1]['/Rect'];
                    if ($rect[0] == PDF_TYPE_ARRAY && count($rect[1]) == 4) {
                        $x = $rect[1][0][1]; $y = $rect[1][1][1];
                        $x2 = $rect[1][2][1]; $y2 = $rect[1][3][1];
                        $w = $x2 - $x; $h = $y2 - $y;
                        $h = -$h;
                    }

                    if (isset($annot[1]['/A'])) {
                        $A = $annot[1]['/A'];

                        if ($A[0] == PDF_TYPE_DICTIONARY && isset($A[1]['/S'])) {
                            $S = $A[1]['/S'];

                            //  << /Type /Annot ... /A << /S /URI /URI ... >> >>
                            if ($S[1] == '/URI' && isset($A[1]['/URI'])) {
                                $URI = $A[1]['/URI'];

                                if (is_string($URI[1])) {
                                    $uri = str_replace("\\000", '', trim($URI[1]));
                                    if (!empty($uri)) {
                                        $links[] = array($x, $y, $w, $h, $uri);
                                    }
                                }

                            //  << /Type /Annot ... /A << /S /GoTo /D [%d 0 R /Fit] >> >>
                            } else if ($S[1] == '/GoTo' && isset($A[1]['/D'])) {
                                $D = $A[1]['/D'];
                                if ($D[0] == PDF_TYPE_ARRAY && count($D[1]) > 0 && $D[1][0][0] == PDF_TYPE_OBJREF) {
                                    $target_pageno = $this->findPageNoForRef($parser, $D[1][0]);
                                    if ($target_pageno >= 0) {
                                        $links[] = array($x, $y, $w, $h, $target_pageno);
                                    }
                                }
                            }
                        }

                    } else if (isset($annot[1]['/Dest'])) {
                        $Dest = $annot[1]['/Dest'];

                        //  << /Type /Annot ... /Dest [42 0 R ...] >>
                        if ($Dest[0] == PDF_TYPE_ARRAY && $Dest[0][1][0] == PDF_TYPE_OBJREF) {
                            $target_pageno = $this->findPageNoForRef($parser, $Dest[0][1][0]);
                            if ($target_pageno >= 0) {
                                $links[] = array($x, $y, $w, $h, $target_pageno);
                            }
                        }
                    }
                }
            }
        }
        // echo "Links on page $pageno:\n";
        // print_r($links);
        $tpl['links'] = $links;

        return $tplidx;
    }

    function useTemplate($tplidx, $_x = null, $_y = null, $_w = 0, $_h = 0, $adjustPageSize = false) {
        $result = parent::useTemplate($tplidx, $_x, $_y, $_w, $_h, $adjustPageSize);

        // apply links from the template
        $tpl =& $this->tpls[$tplidx];
        if (isset($tpl['links'])) {
            foreach ($tpl['links'] as $link) {
                // $link[4] is either a string (external URL) or an integer (page number)
                if (is_int($link[4])) {
                    $l = $this->AddLink();
                    $this->SetLink($l, 0, $link[4]);
                    $link[4] = $l;
                }

                $this->Link(
                    $link[0]/$this->k,
                    ($this->fhPt-$link[1]+$link[3])/$this->k, 
                    $link[2]/$this->k, 
                    -$link[3]/$this->k, 
                    $link[4]
                );
            }
        }

        return $result;
    }

}

Regards,
Ales

@alesl

This comment has been minimized.

Copy link

@alesl alesl commented Oct 9, 2012

Actually its quite simple fix to make it work w/ tcpdf:

<?php

// FPDI extension that preserves hyperlinks when copying PDF pages.
//
// (c) 2012, Andrey Tarantsov <andrey@tarantsov.com>, provided under the MIT license.
//
// Published at: https://gist.github.com/2020422
//
// Note: the free version of FPDI requires unprotected PDFs conforming to spec version 1.4.
// I use qpdf (http://qpdf.sourceforge.net/) to preprocess PDFs before running through this
// code, invoking it like this:
//
//     qpdf --decrypt --stream-data=uncompress --force-version=1.4 src.pdf temp.pdf
//
// then, after processing temp.pdf into out.pdf with FPDI, I run the following to re-establish
// protection:
//
//     qpdf --encrypt "" "" 40 --extract=n -- out.pdf final.pdf
//
class FPDI_with_annots extends FPDI {

    // default maxdepth prevents an infinite recursion on malformed PDFs (not theoretical, actually found in the wild)
    function resolve(&$parser, $smt, $maxdepth=10) {
        if ($maxdepth == 0)
            return $smt;

        if ($smt[0] == PDF_TYPE_OBJREF) {
            $result = $parser->pdf_resolve_object($parser->c, $smt);
            return $this->resolve($parser, $result, $maxdepth-1);

        } else if ($smt[0] == PDF_TYPE_OBJECT) {
            return $this->resolve($parser, $smt[1], $maxdepth-1);

        } else if ($smt[0] == PDF_TYPE_ARRAY) {
            $result = array();
            foreach ($smt[1] as $item) {
                $result[] = $this->resolve($parser, $item, $maxdepth-1);
            }
            $smt[1] = $result;
            return $smt;

        } else if ($smt[0] == PDF_TYPE_DICTIONARY) {
            $result = array();
            foreach ($smt[1] as $key => $item) {
                $result[$key] = $this->resolve($parser, $item, $maxdepth-1);
            }
            $smt[1] = $result;
            return $smt;

        } else {
            return $smt;
        }
    }

    function findPageNoForRef(&$parser, $pageRef) {
        $ref_obj = $pageRef[1]; $ref_gen = $pageRef[2];

        foreach ($parser->pages as $index => $page) {
          $page_obj = $page['obj']; $page_gen = $page['gen'];
          if ($page_obj == $ref_obj && $page_gen == $ref_gen) {
              return $index + 1;
          }
        }

        return -1;
    }

    function importPage($pageno, $boxName = '/CropBox') {
        $tplidx = parent::importPage($pageno, $boxName);

        $tpl =& $this->tpls[$tplidx];
        $parser =& $tpl['parser'];

        // look for hyperlink annotations and store them in the template
        if (isset($parser->pages[$pageno - 1][1][1]['/Annots'])) {
            $annots = $parser->pages[$pageno - 1][1][1]['/Annots'];
            $annots = $this->resolve($parser, $annots);

            $links = array();
            foreach ($annots[1] as $annot) if ($annot[0] == PDF_TYPE_DICTIONARY) {
                // all links look like:  << /Type /Annot /Subtype /Link /Rect [...] ... >>
                if ($annot[1]['/Type'][1] == '/Annot' && $annot[1]['/Subtype'][1] == '/Link') {
                    $rect = $annot[1]['/Rect'];
                    if ($rect[0] == PDF_TYPE_ARRAY && count($rect[1]) == 4) {
                        $x = $rect[1][0][1]; $y = $rect[1][1][1];
                        $x2 = $rect[1][2][1]; $y2 = $rect[1][3][1];
                        $w = $x2 - $x; $h = $y2 - $y;
                        $h = -$h;
                    }

                    if (isset($annot[1]['/A'])) {
                        $A = $annot[1]['/A'];

                        if ($A[0] == PDF_TYPE_DICTIONARY && isset($A[1]['/S'])) {
                            $S = $A[1]['/S'];

                            //  << /Type /Annot ... /A << /S /URI /URI ... >> >>
                            if ($S[1] == '/URI' && isset($A[1]['/URI'])) {
                                $URI = $A[1]['/URI'];

                                if (is_string($URI[1])) {
                                    $uri = str_replace("\\000", '', trim($URI[1]));
                                    if (!empty($uri)) {
                                        $links[] = array($x, $y, $w, $h, $uri);
                                    }
                                }

                            //  << /Type /Annot ... /A << /S /GoTo /D [%d 0 R /Fit] >> >>
                            } else if ($S[1] == '/GoTo' && isset($A[1]['/D'])) {
                                $D = $A[1]['/D'];
                                if ($D[0] == PDF_TYPE_ARRAY && count($D[1]) > 0 && $D[1][0][0] == PDF_TYPE_OBJREF) {
                                    $target_pageno = $this->findPageNoForRef($parser, $D[1][0]);
                                    if ($target_pageno >= 0) {
                                        $links[] = array($x, $y, $w, $h, $target_pageno);
                                    }
                                }
                            }
                        }

                    } else if (isset($annot[1]['/Dest'])) {
                        $Dest = $annot[1]['/Dest'];

                        //  << /Type /Annot ... /Dest [42 0 R ...] >>
                        if ($Dest[0] == PDF_TYPE_ARRAY && $Dest[0][1][0] == PDF_TYPE_OBJREF) {
                            $target_pageno = $this->findPageNoForRef($parser, $Dest[0][1][0]);
                            if ($target_pageno >= 0) {
                                $links[] = array($x, $y, $w, $h, $target_pageno);
                            }
                        }
                    }
                }
            }
        }
        // echo "Links on page $pageno:\n";
        // print_r($links);
        $tpl['links'] = $links;

        return $tplidx;
    }

    function useTemplate($tplidx, $_x = null, $_y = null, $_w = 0, $_h = 0, $adjustPageSize = false) {
        $result = parent::useTemplate($tplidx, $_x, $_y, $_w, $_h, $adjustPageSize);

        // apply links from the template
        $tpl =& $this->tpls[$tplidx];
        if (isset($tpl['links'])) {
            foreach ($tpl['links'] as $link) {
                // $link[4] is either a string (external URL) or an integer (page number)
                if (is_int($link[4])) {
                    $l = $this->AddLink();
                    $this->SetLink($l, 0, $link[4]);
                    $link[4] = $l;
                }

                $this->Link(
                    $link[0]/$this->k,
                    ($this->fhPt-$link[1]+$link[3])/$this->k, 
                    $link[2]/$this->k, 
                    -$link[3]/$this->k, 
                    $link[4]
                );
            }
        }

        return $result;
    }

}

Regards,
Ales

@Brammm

This comment has been minimized.

Copy link

@Brammm Brammm commented Oct 24, 2012

Works perfectly for me, thanks!

@sadeep

This comment has been minimized.

Copy link

@sadeep sadeep commented Jan 11, 2013

Hi Andrey can you help me. I tried your code for a sample pdf file which is having hyperlinks.
But its not working. can you plz tell me why its not working.
Im attaching what i did to this
http://www.mediafire.com/?izzs6b3cfwp1c2t

@sadeep

This comment has been minimized.

Copy link

@sadeep sadeep commented Jan 11, 2013

The sample pdf that i used is have both internal and external links.
I was trying to get the external link..
Thanks..

@hadl

This comment has been minimized.

Copy link

@hadl hadl commented Apr 2, 2013

Thank you!

alesl's example for tcpdf (version: 5.9.203) works, if you calculate the link positions in this way:

$this->Link(
    $link[0]/$this->k,
    ($this->hPt - $link[1])/$this->k,
    $link[2]/$this->k,
    $link[3]/$this->k,
    $link[4]
);
@simison

This comment has been minimized.

Copy link

@simison simison commented Mar 9, 2016

Note that this extension seems to require older version of FPDI (1.4.4) from 2013 and FPDF_TPL (1.2.3).

You can download them here: https://www.setasign.com/products/fpdi/downloads/

If you're missing fpdi_bridge.php, I used version 1.6.1 from newer FPDI (1.8) and it worked fine.

See this StackOverflow answer by @Setasign: http://stackoverflow.com/a/28810275/1984644

I'm using PHP PDFMerger with this fix.

@u01jmg3

This comment has been minimized.

Copy link

@u01jmg3 u01jmg3 commented Jul 12, 2016

@andreyvit, @alesl: can this extension be updated to work with the latest FPDI (1.6.1) rather than requiring an older version in order to work (FPDI 1.4.4)?

@lbassuncao

This comment has been minimized.

Copy link

@lbassuncao lbassuncao commented Sep 6, 2016

I am sorry for my ignorance, but how to use this php classes?

@krzyc

This comment has been minimized.

Copy link

@krzyc krzyc commented Mar 30, 2018

Quick fix for FPDI 1.6.1 (based on TCPDF code). Needs testing.

<?php

// FPDI extension that preserves hyperlinks when copying PDF pages.
// probably fixed for FPDI 1.6.1 with TCPDF
//
// (c) 2012, Andrey Tarantsov <andrey@tarantsov.com>, provided under the MIT license.
//
// Published at: https://gist.github.com/2020422
//
// Note: the free version of FPDI requires unprotected PDFs conforming to spec version 1.4.
// I use qpdf (http://qpdf.sourceforge.net/) to preprocess PDFs before running through this
// code, invoking it like this:
//
//     qpdf --decrypt --stream-data=uncompress --force-version=1.4 src.pdf temp.pdf
//
// then, after processing temp.pdf into out.pdf with FPDI, I run the following to re-establish
// protection:
//
//     qpdf --encrypt "" "" 40 --extract=n -- out.pdf final.pdf
//

require_once('fpdi/fpdi_pdf_parser.php');

class fpdi_pdf_parser_with_annots extends fpdi_pdf_parser {
    public function getPages() {
        return $this->_pages;
    }
}

class FPDI_with_annots extends FPDI {

    protected function _getPdfParser($filename)
    {
        return new fpdi_pdf_parser_with_annots($filename);
    }

    // default maxdepth prevents an infinite recursion on malformed PDFs (not theoretical, actually found in the wild)
    function resolve(&$parser, $smt, $maxdepth=10) {
        if ($maxdepth == 0)
            return $smt;

        if ($smt[0] == pdf_parser::TYPE_OBJREF) {
            $result = $parser->resolveObject($smt);
            return $this->resolve($parser, $result, $maxdepth-1);

        } else if ($smt[0] == pdf_parser::TYPE_OBJECT) {
            return $this->resolve($parser, $smt[1], $maxdepth-1);

        } else if ($smt[0] == pdf_parser::TYPE_ARRAY) {
            $result = array();
            foreach ($smt[1] as $item) {
                $result[] = $this->resolve($parser, $item, $maxdepth-1);
            }
            $smt[1] = $result;
            return $smt;

        } else if ($smt[0] == pdf_parser::TYPE_DICTIONARY) {
            $result = array();
            foreach ($smt[1] as $key => $item) {
                $result[$key] = $this->resolve($parser, $item, $maxdepth-1);
            }
            $smt[1] = $result;
            return $smt;

        } else {
            return $smt;
        }
    }

    function findPageNoForRef(&$parser, $pageRef) {
        $ref_obj = $pageRef[1]; $ref_gen = $pageRef[2];

        foreach ($parser->pages as $index => $page) {
          $page_obj = $page['obj']; $page_gen = $page['gen'];
          if ($page_obj == $ref_obj && $page_gen == $ref_gen) {
              return $index + 1;
          }
        }

        return -1;
    }

    function importPage($pageno, $boxName = '/CropBox', $groupXObject = true) {
        $tplidx = parent::importPage($pageno, $boxName, $groupXObject);

        $tpl =& $this->tpls[$tplidx];
        $parser =& $this->parsers[$this->currentFilename];

        $pages = $this->parsers[$this->currentFilename]->getPages();

        // look for hyperlink annotations and store them in the template
        if (isset($pages[$pageno - 1][1][1]['/Annots'])) {
            $annots = $pages[$pageno - 1][1][1]['/Annots'];
            $annots = $this->resolve($parser, $annots);

            $links = array();
            foreach ($annots[1] as $annot) if ($annot[0] == pdf_parser::TYPE_DICTIONARY) {
                // all links look like:  << /Type /Annot /Subtype /Link /Rect [...] ... >>
                if ($annot[1]['/Type'][1] == '/Annot' && $annot[1]['/Subtype'][1] == '/Link') {
                    $rect = $annot[1]['/Rect'];
                    if ($rect[0] == pdf_parser::TYPE_ARRAY && count($rect[1]) == 4) {
                        $x = $rect[1][0][1]; $y = $rect[1][1][1];
                        $x2 = $rect[1][2][1]; $y2 = $rect[1][3][1];
                        $w = $x2 - $x; $h = $y2 - $y;
                        $h = -$h;
                    }

                    if (isset($annot[1]['/A'])) {
                        $A = $annot[1]['/A'];

                        if ($A[0] == pdf_parser::TYPE_DICTIONARY && isset($A[1]['/S'])) {
                            $S = $A[1]['/S'];

                            //  << /Type /Annot ... /A << /S /URI /URI ... >> >>
                            if ($S[1] == '/URI' && isset($A[1]['/URI'])) {
                                $URI = $A[1]['/URI'];

                                if (is_string($URI[1])) {
                                    $uri = str_replace("\\000", '', trim($URI[1]));
                                    if (!empty($uri)) {
                                        $links[] = array($x, $y, $w, $h, $uri);
                                    }
                                }

                            //  << /Type /Annot ... /A << /S /GoTo /D [%d 0 R /Fit] >> >>
                            } else if ($S[1] == '/GoTo' && isset($A[1]['/D'])) {
                                $D = $A[1]['/D'];
                                if ($D[0] == pdf_parser::TYPE_ARRAY && count($D[1]) > 0 && $D[1][0][0] == pdf_parser::TYPE_OBJREF) {
                                    $target_pageno = $this->findPageNoForRef($parser, $D[1][0]);
                                    if ($target_pageno >= 0) {
                                        $links[] = array($x, $y, $w, $h, $target_pageno);
                                    }
                                }
                            }
                        }

                    } else if (isset($annot[1]['/Dest'])) {
                        $Dest = $annot[1]['/Dest'];

                        //  << /Type /Annot ... /Dest [42 0 R ...] >>
                        if ($Dest[0] == pdf_parser::TYPE_ARRAY && $Dest[0][1][0] == pdf_parser::TYPE_OBJREF) {
                            $target_pageno = $this->findPageNoForRef($parser, $Dest[0][1][0]);
                            if ($target_pageno >= 0) {
                                $links[] = array($x, $y, $w, $h, $target_pageno);
                            }
                        }
                    }
                }
            }
        }

        // echo "Links on page $pageno:\n";
        // print_r($links);
        $tpl['links'] = $links;

        return $tplidx;
    }

    function useTemplate($tplidx, $_x = null, $_y = null, $_w = 0, $_h = 0, $adjustPageSize = false) {
        $result = parent::useTemplate($tplidx, $_x, $_y, $_w, $_h, $adjustPageSize);

        // apply links from the template
        $tpl =& $this->tpls[$tplidx];
        if (isset($tpl['links'])) {
            foreach ($tpl['links'] as $link) {
                // $link[4] is either a string (external URL) or an integer (page number)
                if (is_int($link[4])) {
                    $l = $this->AddLink();
                    $this->SetLink($l, 0, $link[4]);
                    $link[4] = $l;
                }

                $this->Link(
                    $link[0]/$this->k,
                    ($this->fhPt-$link[1]+$link[3])/$this->k, 
                    $link[2]/$this->k, 
                    -$link[3]/$this->k, 
                    $link[4]
                );
            }
        }

        return $result;
    }

}
@whatthefork

This comment has been minimized.

Copy link

@whatthefork whatthefork commented Nov 15, 2018

Just a note for anyone that might be puzzled why this code works for some document links and not others: The code by @andreyvit works for page annotations that involve external links (these are URL links), and it works for some internal links ONLY IF those internal links are annotated in the document with an array of numeric coordinates, it DOES NOT work if the internal links are annotated in the document by name (name is a string, for example "appendix-a"), there's no code in the class file that cross references a name to a page number. Also note that the code will not preserve any bookmark "links" because those are stored in the document differently than links that are stored as "Annots".

I mention this because I spent several hours trying to figure out why sometimes links carried through and other times they didn't. After digging into document structure and experimenting I realized what was happening. And I don't know enough about PDF parsing to modify the code to work for named links or bookmarks.

Thank you publishing the snippets @andreyvit !

@jakubnavratil

This comment has been minimized.

Copy link

@jakubnavratil jakubnavratil commented Sep 1, 2020

For Fpdi 2+ I created this
https://gist.github.com/trubit/65c7674c4d644db107f25f021acd5dfd

Curently working only with External links. Needs testing, but works for me.

@gavin310

This comment has been minimized.

Copy link

@gavin310 gavin310 commented Nov 16, 2020

@jakubnavratil Your URL seems to have changed since you posted this. Looks like this is the correct URL for people looking for a modern solution: https://gist.github.com/jakubnavratil/65c7674c4d644db107f25f021acd5dfd

@igorsgm

This comment has been minimized.

Copy link

@igorsgm igorsgm commented Apr 13, 2021

If you guys are looking for a newer solution using FPDI 2.3 and TCPDF 6.4+, take a look at this answer:
https://stackoverflow.com/a/67071744/5397846

Supporting both internal and external annotation links (hyperlinks)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment