Last active
June 16, 2021 03:05
-
-
Save briandfoy/3cb38067e4fe8a983abe66e26fb376d2 to your computer and use it in GitHub Desktop.
(Perl) JSONify the US National Park sites that have passport stickers, by year and region
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
use v5.28; | |
use open qw(:std :utf8); | |
use utf8; | |
use Mojo::JSON qw(encode_json); | |
use Mojo::UserAgent; | |
use Mojo::Util qw(dumper trim); | |
use Text::CSV_XS; | |
my $ua = Mojo::UserAgent->new; | |
my $tx = $ua->get( 'https://en.wikipedia.org/wiki/National_Park_Passport_Stamps' ); | |
my %result = $tx->result->dom | |
->find( 'tbody > tr' ) | |
->map( \&process_tr ) | |
->each; | |
my $json_file = 'nps_passport_stamps.json'; | |
open my $json_fh, '>:raw', $json_file or die "$json_file: $!"; | |
say { $json_fh } encode_json( \%result ); | |
close $json_fh; | |
my $csv = Text::CSV_XS->new; | |
my $file = "nps_passport_stamps.csv"; | |
open my $fh, ">:encoding(utf8)", $file or die "$file: $!"; | |
$csv->say($fh, ['Year', regions()]); | |
foreach my $year ( sort { $a <=> $b } keys %result ) { | |
my @row = ($year); | |
foreach my $region ( regions() ) { | |
push @row, $result{$year}{$region} | |
} | |
$csv->say($fh, \@row) | |
} | |
close $fh; | |
sub process_tr { | |
my $at = $_->at('td'); # skip the <th> line | |
return unless $at; | |
my $year = trim( $_->at( 'td:first-child' )->all_text ); | |
my $national = trim( $_->at( 'td:nth-child(2)' )->all_text ); | |
$national =~ s/:\K\v+/ /g; | |
$national =~ s/\v+/, /g; | |
my $ul = $_->at( 'td:last-child ul' ); | |
my $hash = { | |
$ul->find( 'li' ) | |
->map( 'all_text' ) | |
->map( sub { reverse /(\S.*?) \s \( (.*?) \)/x } ) | |
->each | |
}; | |
$hash->{National} = $national; | |
if( exists $hash->{West} ) { | |
$hash->{Western} = delete $hash->{West}; | |
} | |
if( exists $hash->{'Pacific Northwest and Alaska'} ) { | |
$hash->{'Pacific Northwest & Alaska'} = delete $hash->{'Pacific Northwest and Alaska'} | |
} | |
$year => $hash; | |
} | |
sub regions { | |
( | |
'National', | |
'North Atlantic', | |
'Mid-Atlantic', | |
'National Capital', | |
'Southeast', | |
'Midwest', | |
'Southwest', | |
'Rocky Mountain', | |
'Western', | |
'Pacific Northwest & Alaska', | |
) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment