#!/usr/bin/env ruby require 'rubygems' require 'hpricot' require 'icalendar' require 'date' class ATPCalendar def initialize(source) @source = source if @source =~ /^http/ # if source = website @hp = Hpricot(open(@source)) else # if source = file @hp = open(@source) { |f| Hpricot(f) } end @tourneys = prettify_output(parse_resource(@hp)) end def parse_resource(doc) @rows = [] # rows array will contain the raw data of evey event (doc/"div.maincolwide//table[4]//tr").each do |row| cells = [] (row/"td").each do |cell| next if cell.inner_text == "" # skip empty cells next if cell.inner_text =~ /\302\240/ raw = cell.inner_text # we just want the text raw.gsub!(/\t/,'') # remove funny formatting #raw.gsub!(/\\/,'') # remove funny formatting raw.strip! cells << raw end @rows << cells unless cells.size == 0 end @rows end def build_ical(filename) cal = Icalendar::Calendar.new @tourneys.each do |t| event = cal.event event.start = t[:date] event.summary = t[:tourney] event.description = "Surface: #{t[:surface]}; Winners: #{t[:winners]}" event.location = t[:location] cal.add_event(event) end cal_file = File.new(File.join(Dir.getwd, filename), "w+") cal_file.puts cal.to_ical end private # create nicely formatted hashes out # of each row of tournament info def prettify_output(rows) rows.map do |date, location, surface, price, tickets, winners| location, tourney = location.split("\n") { :date => Date.parse(date), :location => location, :tourney => tourney.sub(/(\w)ATP/,"\\1\nATP"), :surface => surface, :price => price, :tickets => tickets, :winners => winners || "no info available" } end end end calendar = ATPCalendar.new("atp_cal2009.html") calendar.build_ical("atp_tourneys_2009.ics")