Skip to content

Instantly share code, notes, and snippets.

@LouisStAmour
Created December 23, 2009 09:54
Show Gist options
  • Save LouisStAmour/262436 to your computer and use it in GitHub Desktop.
Save LouisStAmour/262436 to your computer and use it in GitHub Desktop.
require 'rubygems' # A script to pull and parse just routedetails
require 'jdbc/sqlite3' # Written in jRuby with jdbc-sqlite3 and celerity installed
require 'celerity' # Trust me, you'd prefer http://toronto.ca/open to this
# I'm using this to test the open dataset given as of Oct 27
# Assuming the following structure for database ...
# # DROP TABLE ttcca_routedetails; DROP TABLE ttcca_scheduleitem; DROP TABLE ttcca_routestop; DROP TABLE ttcca_stopdetails;
#
# sql = <<SQL
# create table ttcca_routedetails (
# routeid TEXT, -- apparently not unique?
# branchcode TEXT, -- "_" if nil
# version INT,
# description TEXT,
# routename TEXT
# );
# create table ttcca_scheduleitem (
# routeid TEXT, -- apparently not unique?
# branchcode TEXT,
# version INT,
# stopid INT, -- apparently not unique?
# day INT, -- should index
# time INT -- time converted to int
# );
# create table ttcca_routestop (
# routeid TEXT, -- apparently not unique?
# branchcode TEXT,
# version INT,
# stopid INT, -- apparently not unique?
# routestopid INT,
# routestopdirection INT
# );
# create table ttcca_stopdetails (
# stopid INT, -- apparently not unique?
# station INT,
# name TEXT,
# accessible INT,
# subwayconnection INT,
# nearestnode INT -- guessed; not TTC provided, this links to the centreline data from toronto.ca/open
# -- nearestnode is -1 if data is unavailable.
# );
# SQL
database_path = "jdbc:sqlite:db.sqlite"
Java::org.sqlite.JDBC #init the driver
conn = java.sql.DriverManager.getConnection(database_path)
puts "Loading routedetails: routeid, branchcode, version, description, routename"
prep = conn.prepareStatement("insert into ttcca_routedetails values (?, ?, 200912, ?, ?);")
browser = Celerity::Browser.new({ :javascript_enabled => true, :css => false })
browser.goto("http://www3.ttc.ca/Routes/Buses.jsp")
bus_links = browser.elements_by_xpath "//div[@id='ttc-main-content']/div[1]/div/ul/li/a"
# returns an array of <Celerity::Element>s, all links with contents like:
# 353 Steeles East <span class="access">Accessible</span> <span class="access">Bicycle Rack</span>
bus_links.each do |b|
b.click
h1 = browser.element_by_xpath "//h1[@id='ttc-route-name']"
puts h1.inner_text
parsed = h1.inner_text.split(" ") # => ["5", "Avenue", "Rd", "Northbound"]
routeid = parsed[0]+parsed[-1][0].chr # eg. 5N
routename = parsed[1,parsed.length-2].join(" ").upcase # => "AVENUE RD"
routes = browser.elements_by_xpath "//div[@id='ttc-dir-1']/div/ul/li"
routes.each do |r|
branchcode = r.object.children.first.getAttribute("class")[-1].chr # => "_"
description = r.object.children.first.getNextSibling.asText # => "5 AVENUE RD To EGLINTON STN"
prep.setString(1, routeid)
prep.setString(2, branchcode)
prep.setString(3, description)
prep.setString(4, routename)
prep.addBatch
end
reverse_direction_link = browser.element_by_xpath "//ul[@id='ttc-route-tabs']/li[2]/a"
puts reverse_direction_link.inner_text
routeid = parsed[0]+reverse_direction_link.inner_text[0].chr
reverse_direction_link.click
routes = browser.elements_by_xpath "//div[@id='ttc-dir-1']/div/ul/li"
routes.each do |r|
branchcode = r.object.children.first.getAttribute("class")[-1].chr # => "_"
description = r.object.children.first.getNextSibling.asText # => "5 AVENUE RD To EGLINTON STN"
prep.setString(1, routeid)
prep.setString(2, branchcode)
prep.setString(3, description)
prep.setString(4, routename)
prep.addBatch
end
end
browser.goto("http://www3.ttc.ca/Routes/Streetcars.jsp")
bus_links = browser.elements_by_xpath "//div[@id='ttc-main-content']/div[1]/div/ul/li/a"
# returns an array of <Celerity::Element>s, all links with contents like:
# 353 Steeles East <span class="access">Accessible</span> <span class="access">Bicycle Rack</span>
bus_links.each do |b|
b.click
h1 = browser.element_by_xpath "//h1[@id='ttc-route-name']"
puts h1.inner_text
parsed = h1.inner_text.split(" ") # => ["5", "Avenue", "Rd", "Northbound"]
routeid = parsed[0]+parsed[-1][0].chr # eg. 5N
routename = parsed[1,parsed.length-2].join(" ").upcase # => "AVENUE RD"
routes = browser.elements_by_xpath "//div[@id='ttc-dir-1']/div/ul/li"
routes.each do |r|
branchcode = r.object.children.first.getAttribute("class")[-1].chr # => "_"
description = r.object.children.first.getNextSibling.asText # => "5 AVENUE RD To EGLINTON STN"
prep.setString(1, routeid)
prep.setString(2, branchcode)
prep.setString(3, description)
prep.setString(4, routename)
prep.addBatch
end
reverse_direction_link = browser.element_by_xpath "//ul[@id='ttc-route-tabs']/li[2]/a"
puts reverse_direction_link.inner_text
routeid = parsed[0]+reverse_direction_link.inner_text[0].chr
reverse_direction_link.click
routes = browser.elements_by_xpath "//div[@id='ttc-dir-1']/div/ul/li"
routes.each do |r|
branchcode = r.object.children.first.getAttribute("class")[-1].chr # => "_"
description = r.object.children.first.getNextSibling.asText # => "5 AVENUE RD To EGLINTON STN"
prep.setString(1, routeid)
prep.setString(2, branchcode)
prep.setString(3, description)
prep.setString(4, routename)
prep.addBatch
end
end
puts "Committing ..."
conn.setAutoCommit false
prep.executeBatch
conn.setAutoCommit true
conn.close
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment