Skip to content

Instantly share code, notes, and snippets.

@iemcd
Created April 7, 2019 21:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save iemcd/683c379e0bc2cb5eb007a8fefff3fe64 to your computer and use it in GitHub Desktop.
Save iemcd/683c379e0bc2cb5eb007a8fefff3fe64 to your computer and use it in GitHub Desktop.
PDF Annotation Tools
#!/bin/perl
use strict;
use warnings;
use 5.22.3;
use List::Util qw( min max );
# Fields are: char, x1, y1, x2, y2, pp
open(my $table, "<", $ARGV[0]) or die "Can't open $ARGV[0]: $!";
my $text;
my @table;
while (<$table>)
{
chomp;
$text .= lc((split("\t"))[0]);
push @table, $_;
}
while (<STDIN>)
{
chomp(my $word = $_);
my $wordre = '\b'.$word.'\b';
if ($text =~ /$wordre/)
{
my $position = $-[0];
my (@x1, @x2, @y1, @y2, $pp);
for (my $i = 0; $i < length($word); $i++)
{
my @line = split("\t",$table[$position+$i]);
push @x1, $line[1];
push @x2, $line[3];
push @y1, $line[2];
push @y2, $line[4];
$pp = $line[5];
}
# x1=xll, x2=xur, y1=yll, & y2=yur in the pdfmark reference
# annotations not rotated, so this fully describes rectangle
my $x1 = min @x1;
my $x2 = max @x2;
my $y1 = min @y1;
my $y2 = max @y2;
say "[ /Rect [$x1 $y1 $x2 $y2]";
say " /Subtype /Highlight";
say " /QuadPoints [$x1 $y2 $x2 $y2 $x1 $y1 $x2 $y1]";
# QuadPoints does not work as specified, instead see: https://stackoverflow.com/questions/9855814/pdf-spec-vs-acrobat-creation-quadpoints
say " /SrcPg $pp";
say " /Contents ($word)"; # mostly for troubleshooting
say " /Color [0 1 1]";
say " /ANN pdfmark\n"; # extra newline is intentional
}
}
#!/bin/perl
use strict;
use warnings;
use 5.22.3;
my $page=1;
while (<>)
{
if (/^<page id=\"(\d+)\"/)
{
$page = $1;
}
elsif (/^<text .+ bbox=\"([\d.]+),([\d.]+),([\d.]+),([\d.]+)\" .+>(.)<\/text>$/)
{
say "$5\t$1\t$2\t$3\t$4\t$page";
};
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment