Skip to content

Instantly share code, notes, and snippets.

@foonathan
Created March 3, 2022 10:58
Show Gist options
  • Save foonathan/9375720ec6da7c23ad874014bd5e4be8 to your computer and use it in GitHub Desktop.
Save foonathan/9375720ec6da7c23ad874014bd5e4be8 to your computer and use it in GitHub Desktop.
Small fish script that cats pdf files while preserving bookmarks
#!/bin/fish
#
# Copyright (C) 2022 Jonathan Müller
# SPDX-License-Identifier: BSL-1.0
#
# Usage: pdfcat input1 input2 ... inputN output
function print_usage
echo "Usage:" (basename (status -f)) "input1 input2 ... inputN output"
end
if test $argv[1] = "-h" || test $argv[1] = "--help"
print_usage
exit 0
else if test (count $argv) -lt 2
print_usage >/dev/stderr
exit 1
end
set input $argv[1..-2]
set output $argv[-1]
test -f $output && echo "output exists: $output" >/dev/stderr && exit 1
set bookmark_file (mktemp "pdfunite-meta-XXXXX.delete-me")
set tmp_output (mktemp "$output.XXXXX.delete-me")
function cleanup -p $fish_pid
rm -f $tmp_output $bookmark_file
end
# Get all the metadata from each file and collect into a single file.
for file in $input
test -f $file || begin; echo "file not found: $file" >/dev/stderr; exit 1; end
pdftk $file dump_data output >> $bookmark_file
end
# Merge all pdf files into a single one.
pdftk $input cat output $tmp_output
# Update the BookmarkPageNumber by adding the page count of all previous files.
awk -F ": " '\
BEGIN { offset = 0; next_offset = 0; }
/NumberOfPages/ { offset = next_offset; next_offset += $2 }
/BookmarkPageNumber/ { print "BookmarkPageNumber: " ($2 + offset) }
/BookmarkBegin|BookmarkTitle|BookmarkLevel/ { print $0 }
' $bookmark_file \
# And adding the correct bookmarks to the output.
| pdftk $tmp_output update_info - output $output
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment