Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
OCTranspo Alert Scraper
#!/usr/bin/perl
use strict;
use warnings;
use WWW::Mechanize;
use Data::Dump qw/ dump /;
use JSON::Any;
# Define the routes that I actually care about
my @routes = qw| |;
# Set up necessary variables and the Mechanize object
my $mech = WWW::Mechanize->new(autocheck => 1, cookie_jar => {}, agent => 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; MDDRJS; rv:11.0) like Gecko');
my (%alerts,$html,@lines,$period,$type,$route,$notice);
# Fetch both the 'today' and 'week' updates
foreach my $period ( qw| today week | ) {
$alerts{$period} = {};
$mech->get('http://www.octranspo.com/updates-' . $period);
$html = $mech->content();
@lines = split '\n', $html;
$type = '';
foreach (@lines) {
# If the type is defined, this means that I've already identified the start of the 'Cancelled', 'Delayed', or 'Detours' heading.
if ($type) {
# Check to see if I've hit a new heading.
if ($_ =~ m/^<h2>.*Icon/) {
$type = $_;
$type =~ s/^.*alt="([^TI]*) (Trips )?Icon".*$/${1}/;
# If not a new heading, look for a route heading.
} elsif ($_ =~ m/h3 class="accordion_header/) {
$route = $_;
$route =~ s/.*class="detail">([^<]*)<.*/${1}/;
# If routes have been listed, only add those
if (scalar @routes) {
my $found = 0;
foreach (@routes) {
if ($route =~ m/Route ([0-9]+\, )*$_\b/) {
$found = 1;
}
}
# If it is not a route I care about, forget it
if (!$found) {
$route = '';
}
}
# If there is a route stored, I am expecting to see the details for that route next
} elsif ($route && $_ =~ m/<\/p>$/) {
$notice = $_;
# Clear the HTML out of the line
if ($notice =~ m/<a href="/) {
$notice =~ s/^\t+([^<]*)<a href="[^"]*">([^<]*)<\/a>([^<]*)<\/p>$/${1}${2}${3}/;
} else {
$notice =~ s/^\t+([^<]*)<\/p>$/${1}/;
}
# Record the alert
$alerts{$period}{$type}{$route} = $notice;
# Reset the route so that I don't look for more details where there shouldn't be any.
$route = '';
}
# Prior to hitting the first heading, that is the only thing I'm looking for. Either capture it or jump to the next line
} else {
if ($_ =~ m/^<h2>.*Icon/) {
$type = $_;
$type =~ s/^.*alt="([^TI]*) (Trips )?Icon".*$/${1}/;
}
}
}
}
my $json = JSON::Any->new(pretty => 1);
print $json->encode(\%alerts);
@JohnMertz

This comment has been minimized.

Copy link
Owner Author

commented Feb 15, 2019

Scrapes for route cancellations, delays and detours. Prints the results as pretty JSON. To restrict results to select routs, simply add the desired route numbers between the pipes on line 11.

Note: The Data::Dump dependency is not actually necessary, I just forgot to remove it.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.