Skip to content

Instantly share code, notes, and snippets.

@doriantaylor
Created March 13, 2022 14:45
Show Gist options
  • Save doriantaylor/80cf11f8fd6b9ef1a3dcec8dd232341b to your computer and use it in GitHub Desktop.
Save doriantaylor/80cf11f8fd6b9ef1a3dcec8dd232341b to your computer and use it in GitHub Desktop.
scrape university of toronto pool schedule
#!/usr/bin/env ruby
require 'date'
require 'time'
require 'mechanize'
# do this or it complains
require 'active_support/isolated_execution_state'
# wah wah wah crybaby
require 'icalendar'
require 'icalendar/tzinfo'
require 'icalendar-recurrence'
class Scrapinator
private
def scrape_table table
table.xpath('tbody/tr').map do |tr|
tr.elements.map do |td|
text = td.text.gsub(/\u{a0}+/, ' ').strip
text.empty? ? nil : text
end
end
end
def convert_arrays tables
# { "Weekday" => [ [start, end, description, location], ... ] }
out = {}
tables.each do |t|
# skip unless the table has four cells
next unless t.first and t.first.length == 4
today = nil
t.each do |row|
# a row of all nulls signifies a new day, and subsquent days are
# in the first column *next* to the first entry in columns 2-4
next if row.all? &:nil?
# the first row has the day in the first column and labels for
# subsequent columns which we can discard
has_data = true
unless row.first.nil?
has_data = false unless today
today = row.first
out[today] ||= []
end
if has_data
time, desc, location = row.drop 1
start, finish = time.split(?-).map(&:strip)
unless /[ap]m/i.match?(start)
if m = /([ap]m)/i.match(finish)
start << m.captures.first
end
end
out[today] << [start, finish, desc, location]
end
end
end
out
end
def make_ical basis, struct
ical = Icalendar::Calendar.new
tz = TZInfo::Timezone.get @tzid
timezone = tz.ical_timezone DateTime.now
ical.add_timezone timezone
(0..6).each do |i|
day = (basis + i).strftime '%A'
if struct[day]
struct[day].each do |record|
start, finish, desc, location = record
start, finish = [start, finish].map { |t| Time.parse(t, basis + i) }
ical.event do |e|
e.dtstart = start
e.dtend = finish
e.summary = desc
e.location = location
e.rrule = 'FREQ=WEEKLY;COUNT=12'
end
end
end
end
ical
end
public
def initialize tz
@tzid = tz
@mech = Mechanize.new
end
def run basis, url
# fetch the webpage
page = @mech.get url
# find the stupid tables
tables = page.search('//table').map { |t| scrape_table t }
warn tables.inspect
# turn them into a struct
struct = convert_arrays tables
# punt out ical file
make_ical basis, struct
end
end
if $0 == __FILE__
basis = Date.new 2022, 1, 31
url = 'https://kpe.utoronto.ca/sport-recreationrecreational-workouts-activitiesdrop-sports-activities/lane-swimming'
ical = Scrapinator.new('America/Toronto').run basis, url
ical.publish
print ical.to_ical
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment