Skip to content

Instantly share code, notes, and snippets.

@briandfoy
Last active June 16, 2021 03:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save briandfoy/3cb38067e4fe8a983abe66e26fb376d2 to your computer and use it in GitHub Desktop.
Save briandfoy/3cb38067e4fe8a983abe66e26fb376d2 to your computer and use it in GitHub Desktop.
(Perl) JSONify the US National Park sites that have passport stickers, by year and region
#!/usr/bin/perl
use v5.28;
use open qw(:std :utf8);
use utf8;
use Mojo::JSON qw(encode_json);
use Mojo::UserAgent;
use Mojo::Util qw(dumper trim);
use Text::CSV_XS;
my $ua = Mojo::UserAgent->new;
my $tx = $ua->get( 'https://en.wikipedia.org/wiki/National_Park_Passport_Stamps' );
my %result = $tx->result->dom
->find( 'tbody > tr' )
->map( \&process_tr )
->each;
my $json_file = 'nps_passport_stamps.json';
open my $json_fh, '>:raw', $json_file or die "$json_file: $!";
say { $json_fh } encode_json( \%result );
close $json_fh;
my $csv = Text::CSV_XS->new;
my $file = "nps_passport_stamps.csv";
open my $fh, ">:encoding(utf8)", $file or die "$file: $!";
$csv->say($fh, ['Year', regions()]);
foreach my $year ( sort { $a <=> $b } keys %result ) {
my @row = ($year);
foreach my $region ( regions() ) {
push @row, $result{$year}{$region}
}
$csv->say($fh, \@row)
}
close $fh;
sub process_tr {
my $at = $_->at('td'); # skip the <th> line
return unless $at;
my $year = trim( $_->at( 'td:first-child' )->all_text );
my $national = trim( $_->at( 'td:nth-child(2)' )->all_text );
$national =~ s/:\K\v+/ /g;
$national =~ s/\v+/, /g;
my $ul = $_->at( 'td:last-child ul' );
my $hash = {
$ul->find( 'li' )
->map( 'all_text' )
->map( sub { reverse /(\S.*?) \s \( (.*?) \)/x } )
->each
};
$hash->{National} = $national;
if( exists $hash->{West} ) {
$hash->{Western} = delete $hash->{West};
}
if( exists $hash->{'Pacific Northwest and Alaska'} ) {
$hash->{'Pacific Northwest & Alaska'} = delete $hash->{'Pacific Northwest and Alaska'}
}
$year => $hash;
}
sub regions {
(
'National',
'North Atlantic',
'Mid-Atlantic',
'National Capital',
'Southeast',
'Midwest',
'Southwest',
'Rocky Mountain',
'Western',
'Pacific Northwest & Alaska',
)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment