Skip to content

Instantly share code, notes, and snippets.

@pramsey
Last active August 29, 2015 14:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pramsey/9497206ee0e282fdd651 to your computer and use it in GitHub Desktop.
Save pramsey/9497206ee0e282fdd651 to your computer and use it in GitHub Desktop.
Brittle perl for parsing SCOTUS coordinates into WKT
use Switch;
# Download the SCOTUS decision from here
# http://www.supremecourt.gov/opinions/14pdf/no5orig_d18e.pdf
# Convert it from PDF to a raw text form using this service
# http://convertonlinefree.com/PDFToTXTEN.aspx
# Then run this script on it
my $first = 1;
my @curve = ();
my @cmd = ();
my @x = ();
my @y = ();
my $count = 1;
while(<>)
{
# Remove Carriage-returns
s/\r//g;
# Skip white-space-only lines
next if( ! /\S/ );
# Strip duplicative white space
s/^\s*//g;
s/\t/ /g;
s/ +/ /g;
# Tokenize the command strings */
s/BEGINNING AT/BEGINNING_AT/g;
s/BY ARC CENTERED AT/BY_ARC_CENTERED_AT/g;
s/BY STRAIGHT LINE TO/BY_STRAIGHT_LINE_TO/g;
my @commentary = ();
if ( @cmd && @x && @y )
{
@cmd = ();
@x = ();
@y = ();
}
# Key trick: Once we've changed the command tokens into single words
# we can split the text up on spaces to get nice tokens ready for
# further processing.
my @line = split(/ /);
# Convert each line of the raw text into three lists: one of command
# tokens, one of x values and one of y values.
# Fortunately UTM x/y values are so different, we can use simple magnitude to
# distinguish them (y's are > 1 million)
foreach my $l (@line)
{
switch ( $l )
{
case "BEGINNING_AT" { push(@cmd, $l); }
case "BY_ARC_CENTERED_AT" { push(@cmd, $l); }
case "BY_STRAIGHT_LINE_TO" { push(@cmd, $l); }
case "TO" { push(@cmd, $l); }
else {
if ( $l =~ /^\d+\.\d+/ )
{
if ( $l > 1000000 )
{
push(@y, $l);
}
else
{
push(@x, $l);
}
}
else
{
push(@commentary, $l);
}
}
}
}
# printf "RAW %s\n", $_;
# whenever we have all three buffers full and of the same size
# that means we're ready to empty them out again and process
# the next raw line of stuff
if ( @cmd && @x && @y )
{
# printf "COMMANDS: %s\n", join(" ", @cmd);
# printf " X: %s\n", join(" ", @x);
# printf " Y: %s\n", join(" ", @y);
my $csz = @cmd;
my $xsz = @x;
my $ysz = @y;
# if ( ! (($csz == $xsz) && ($csz == $ysz)) )
# {
# printf " ERROR: commands=%d x=%d y=%d\n", $csz, $xsz, $ysz;
# printf " RAW: %s\n", $_;
# }
#
# Handle the inevitable special case, for one line string they ended an
# arc, not on a coordinate but on "the intersection with the
# California-Oregon state lateral boundary"
# In that case we throw away the terminal arc, sorry. Real
# programmers can go and research what that coordinate actually is.
my $nrecords = @x;
$nrecords-- if ( @cmd > @x ) ;
# Convert three arrays of tokens/coordinates into a single array with
# one entry for each command/x/y combination.
for ( my $i = 0; $i < $nrecords; $i++ )
{
if ( @cmd[$i] eq "BEGINNING_AT" && ! $first )
{
&output(\@curve);
@curve = ();
printf "\n\n";
}
else
{
$first = 0;
}
push(@curve, { cmd => $cmd[$i], x => $x[$i], y => $y[$i] } );
}
}
}
&output(\@curve);
exit;
######################################################################
# Taking in an array of (cmd/y/x) records, output a COMPOUNDCURVE
# for that array.
sub output
{
$curve = shift();
# foreach $cmd (@$curve)
# {
# printf "%s %s %s\n", $cmd->{cmd}, $cmd->{x}, $cmd->{y};
# }
my $i = 1;
@sub = ();
my $j = 0;
# Really ugly magic to deal with the coordinate systems. The first object
# in the scotus decision is UTM 11, the next three UTM 10, the final ones
# back to UTM 11
my $srid=26911;
$srid = 26910 if ( $count >= 2 && $count <= 4 );
$count++;
printf "INSERT INTO causa (geom) VALUES ('SRID=%d;COMPOUNDCURVE(\n", $srid;
# The WKT for a compound curve has duplicated vertices in places
# where the CIRCULARSTRINGS join up to the LINESTRINGS, so we have
# to turn our mixed list of commands into homogenous collections of
# just lines and just arcs.
while ( $i < @$curve )
{
my $c0 = $curve->[$i-1];
my $c1 = $curve->[$i];
my $c2 = $curve->[$i+1];
if ( $c1->{cmd} eq "BY_ARC_CENTERED_AT" )
{
if ( $c0->{cmd} ne "TO" )
{
print ",\n" if $j++ > 1;
&output_sub(\@sub);
@sub = ();
push(@sub, $c0);
push(@sub, $c1);
push(@sub, $c2);
}
else
{
push(@sub, $c1);
push(@sub, $c2);
}
$i += 2;
next;
}
if ( $c1->{cmd} eq "BY_STRAIGHT_LINE_TO" )
{
if ( $c0->{cmd} ne "BY_STRAIGHT_LINE_TO" )
{
print ",\n" if $j++ > 1;
&output_sub(\@sub);
@sub = ();
push(@sub, $c0);
push(@sub, $c1);
}
else
{
push(@sub, $c1);
}
$i += 1;
next;
}
}
print ",\n" if $j++ > 1;
&output_sub(\@sub);
print ")');\n";
return;
}
# Taking in an homogenious (all arcs or all line) array of cmd/x/y
# output the appropriate WKT object
sub output_sub
{
my $sub = shift();
my @a = @$sub;
my $type = "";
my $i = 0;
return if ! @a;
if ( $a[1]->{cmd} eq "BY_ARC_CENTERED_AT" )
{
$type = "CIRCULARSTRING";
}
printf "%s(", $type;
for ( my $i = 0; $i < @a; $i++ )
{
print "," if $i;
# Convert center point to mid-arc point
if ( $type eq "CIRCULARSTRING" && $i % 2 )
{
my $x0 = @a[$i-1]->{x};
my $y0 = @a[$i-1]->{y};
my $x1 = @a[$i]->{x};
my $y1 = @a[$i]->{y};
my $x2 = @a[$i+1]->{x};
my $y2 = @a[$i+1]->{y};
$x0 -= $x1;
$y0 -= $y1;
$x2 -= $x1;
$y2 -= $y1;
my $x1n = ($x0 + $x2)/2;
my $y1n = ($y0 + $y2)/2;
my $r1 = sqrt($x1n**2 + $y1n**2);
$x1n /= $r1;
$y1n /= $r1;
my $r = sqrt($x0**2 + $y0**2);
$x1n *= $r;
$y1n *= $r;
$x1n += $x1;
$y1n += $y1;
@a[$i]->{x} = $x1n;
@a[$i]->{y} = $y1n;
}
printf "%s %s", @a[$i]->{x}, @a[$i]->{y};
}
printf ")";
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment