Last active
December 19, 2022 11:05
-
-
Save richard-wolsch/a041e438b7e4307401489b7a764c4d24 to your computer and use it in GitHub Desktop.
Preprocess a PDF form to fill it with FPDF: This will give unique names to all fields, flip checkbox options (yes needs to be before no) and makes all fields readonly.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
echo "\e[32mConvert the form in a PDF to be used by FPDF. This will rename all form fields.\e[39m\n"; | |
$filename = readline("PDF Path: "); | |
$content = file_get_contents($filename); | |
// ==== TASK 1 ==== | |
// Find all text input fields and give them unique names. | |
// Solves: PDF forms are sometimes organized in groups where a form field has children (“/Kids”). Children of different | |
// groups could have the same field names. FPDF can't handle that. | |
// Solution: Give every form field unnecessary if parent or child a unique name. | |
$textFieldMatches = []; | |
preg_match_all('/\/T\s\(.+\)/', $content, $textFieldMatches); | |
echo sprintf("\nFound \e[31m%d\e[39m form fields.\n", count($textFieldMatches[0])); | |
$countTextFields = 1; | |
foreach ($textFieldMatches[0] as $match) { | |
$find = '~' . preg_quote($match) . '~'; | |
$replacement = sprintf('/T (Field%03d)', $countTextFields); | |
$content = preg_replace($find, $replacement, $content, 1); | |
$countTextFields++; | |
} | |
// ==== TASK 2 ==== | |
// Find all checkbox fields and give them unique names. | |
// Solves: Checkbox fields may be unnamed if they are a child of a named form field. FPDF does not find those unnamed | |
// checkbox fields. | |
// Solution: Find the “/AS” tag identifying checkboxes and add a unique field name (“/T (SomeName)”). | |
$checkboxMatches = []; | |
preg_match_all('/\/AS\s\//', $content, $checkboxMatches); | |
echo sprintf("Found \e[31m%d\e[39m checkbox fields.\n", count($checkboxMatches[0])); | |
$countCheckboxes = 1; | |
foreach ($checkboxMatches[0] as $match) { | |
$find = "~(?<!\/T\s\(Checkbox\d{3}\)\n)(" . preg_quote($match) . ")~"; | |
$replacement = sprintf("/T (Checkbox%03d)\n$1", $countCheckboxes); | |
$content = preg_replace($find, $replacement, $content, 1); | |
$countCheckboxes++; | |
} | |
// ==== TASK 3 ==== | |
// Solves: My PDF files listed the checkboxes “no“ option before the checkboxes “yes” option. FPDF interprets it vice | |
// versa. | |
// Solution: Flip both options. The options are always listed below “/AP” and “/D”. | |
$countFlipOptions = 0; | |
$content = preg_replace('/(?<=\/AP\s\n<<\n\/D\s\n<<\n)(\/.+)\n(\/.+)/', "$2\n$1", $content, -1, $countFlipOptions); | |
// ==== TASK 4 ==== | |
// Make all input fields which do not already have an option definition (/Ff) readonly. | |
$countReadonlyFields = 0; | |
$content = preg_replace('/([^<>]+\/T\s\([^\)]+\)\n)(?![^<>]+\/Ff[^<>]+[<>])/ms', "$1/Ff 1\n", $content, -1 ,$countReadonlyFields); | |
file_put_contents($filename, $content); | |
echo sprintf( | |
"\nReplaced \e[31m%d\e[39m form fields and \e[31m%d\e[39m checkbox fields.\nFlipped \e[31m%d\e[39m options.\nMade \e[31m%d\e[39m fields readonly.\n", | |
$countTextFields - 1, | |
$countCheckboxes - 1, | |
$countFlipOptions, | |
$countReadonlyFields | |
); | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment