Skip to content

Instantly share code, notes, and snippets.

@moritz
Created April 15, 2022 12:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save moritz/ea14aa095b3bd2e72a5667421808b911 to your computer and use it in GitHub Desktop.
Save moritz/ea14aa095b3bd2e72a5667421808b911 to your computer and use it in GitHub Desktop.
pdf-autosplit, for splitting a PDF based on annotations (chapters).
#!/usr/bin/env raku
# Written in Raku, former Perl6: https://raku.org/
sub page-numbers-from-chapters($filename) {
my $proc = run :out, 'pdftk', $filename, 'dump_data_utf8';
my %bookmark;
my @borders;
sub handle-bookmark() {
if %bookmark && (%bookmark<BookmarkLevel> // 0) == 1 {
@borders.push(%bookmark<BookmarkPageNumber>);
}
}
for $proc.out.lines -> $line {
if $line eq 'BookmarkBegin' {
handle-bookmark();
%bookmark = ();
}
else {
my @elems = $line.split(': ', 2);
if @elems == 2 {
%bookmark{@elems[0]} = @elems[1];
}
}
}
handle-bookmark();
return @borders;
}
sub MAIN($filename) {
sub extract-chapter($from, $to, $number) {
run 'pdftk', $filename, 'cat', $from ~ '-' ~ $to,
'output', sprintf('chapter%02d.pdf', $number);
}
my $chapter = 0;
my @borders = page-numbers-from-chapters($filename);
for @borders.rotor(2 => -1) -> ($lower, $upper) {
extract-chapter($lower, $upper - 1, $chapter);
$chapter++;
}
# pdftk uses "r1" for the last page, "r2" for the
# second-to-last etc.
extract-chapter(@borders[*-1], 'r1', $chapter++);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment