#!/usr/bin/env ruby
require 'rubygems'
require 'hpricot'
require 'icalendar'
require 'date'
class ATPCalendar
def initialize(source)
@source = source
if @source =~ /^http/ # if source = website
@hp = Hpricot(open(@source))
else # if source = file
@hp = open(@source) { |f| Hpricot(f) }
end
@tourneys = prettify_output(parse_resource(@hp))
end
def parse_resource(doc)
@rows = []
# rows array will contain the raw data of evey event
(doc/"div.maincolwide//table[4]//tr").each do |row|
cells = []
(row/"td").each do |cell|
next if cell.inner_text == "" # skip empty cells
next if cell.inner_text =~ /\302\240/
raw = cell.inner_text # we just want the text
raw.gsub!(/\t/,'') # remove funny formatting
#raw.gsub!(/\\/,'') # remove funny formatting
raw.strip!
cells << raw
end
@rows << cells unless cells.size == 0
end
@rows
end
def build_ical(filename)
cal = Icalendar::Calendar.new
@tourneys.each do |t|
event = cal.event
event.start = t[:date]
event.summary = t[:tourney]
event.description = "Surface: #{t[:surface]}; Winners: #{t[:winners]}"
event.location = t[:location]
cal.add_event(event)
end
cal_file = File.new(File.join(Dir.getwd, filename), "w+")
cal_file.puts cal.to_ical
end
private
# create nicely formatted hashes out
# of each row of tournament info
def prettify_output(rows)
rows.map do |date, location, surface, price, tickets, winners|
location, tourney = location.split("\n")
{
:date => Date.parse(date),
:location => location,
:tourney => tourney.sub(/(\w)ATP/,"\\1\nATP"),
:surface => surface,
:price => price,
:tickets => tickets,
:winners => winners || "no info available"
}
end
end
end
calendar = ATPCalendar.new("atp_cal2009.html")
calendar.build_ical("atp_tourneys_2009.ics")