Skip to content

Instantly share code, notes, and snippets.

@o-sam-o
Created September 10, 2011 08:13
Show Gist options
  • Save o-sam-o/1208099 to your computer and use it in GitHub Desktop.
Save o-sam-o/1208099 to your computer and use it in GitHub Desktop.
Knox Council Development Applications
require 'mechanize'
require 'date'
require "awesome_print"
base_url = "https://eservices.knox.vic.gov.au/ePathway/Production/Web/generalenquiry/"
url = "#{base_url}enquirylists.aspx"
agent = Mechanize.new
first_page = agent.get url
p first_page.title.strip
first_page_form = first_page.forms.first
first_page_form.radiobuttons.first.click
summary_page = first_page_form.click_button
p summary_page.title.strip
das_data = []
while summary_page
table = summary_page.root.at_css('.AlternateContentPanel')
headers = table.css('th').collect { |th| th.inner_text.strip }
das_data = das_data + table.css('.ContentPanel, .AlternateContentPanel').collect do |tr|
tr.css('td').collect { |td| td.inner_text.strip }
end
next_page_img = summary_page.root.at_xpath("//a/img[contains(@src, 'nextPage')]")
summary_page = nil
if next_page_img
p "Found another page"
summary_page = agent.get "#{base_url}#{next_page_img.parent['href']}"
end
end
# TODO is this the best person to email?
comment_url = 'mailto:therese.massoud@knox.vic.gov.au?Subject=Development Application Enquiry: '
das = das_data.collect do |da_item|
page_info = {}
page_info['council_reference'] = da_item[headers.index('Application Number')]
# There is a direct link but you need a session to access it :(
page_info['info_url'] = url
page_info['description'] = da_item[headers.index('Description')]
page_info['date_received'] = Date.strptime(da_item[headers.index('Date Lodged')], '%d/%m/%Y').to_s
page_info['address'] = da_item[headers.index('Location')]
page_info['date_scraped'] = Date.today.to_s
page_info['comment_url'] = comment_url + da_item[headers.index('Application Number')]
page_info
end
ap das
p 'Done'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment