Skip to content

Instantly share code, notes, and snippets.

@windix
Created July 31, 2012 04:21
Show Gist options
  • Save windix/3213589 to your computer and use it in GitHub Desktop.
Save windix/3213589 to your computer and use it in GitHub Desktop.
Olympic Game scraper
require "mechanize"
require "sinatra"
require "stringio"
class MedalScraper
def initialize
@sports = {
'archery' => 'Archery',
'athletics' => 'Athletics',
'badminton' => 'Badminton',
'basketball' => 'Basketball',
'beach-volleyball' => 'Beach Volleyball',
'boxing' => 'Boxing',
'canoe-slalom' => 'Canoe Slalom',
'canoe-sprint' => 'Canoe Sprint',
'cycling-bmx' => 'Cycling BMX',
'cycling-mountain-bike' => 'Cycling Mountain Bike',
'cycling-road' => 'Cycling Road',
'cycling-track' => 'Cycling Track',
'diving' => 'Diving',
'equestrian' => 'Equestrian',
'fencing' => 'Fencing',
'football' => 'Football',
'gymnastics-artistic' => 'Gymnastics Artistic',
'gymnastics-rhythmic' => 'Gymnastics Rhythmic',
'handball' => 'Handball',
'hockey' => 'Hockey',
'judo' => 'Judo',
'modern-pentathlon' => 'Modern Pentathlon',
'rowing' => 'Rowing',
'sailing' => 'Sailing',
'shooting' => 'Shooting',
'swimming' => 'Swimming',
'synchronized-swimming' => 'Synchronized Swimming',
'table-tennis' => 'Table Tennis',
'taekwondo' => 'Taekwondo',
'tennis' => 'Tennis',
'gymnastic-trampoline' => 'Trampoline',
'triathlon' => 'Triathlon',
'volleyball' => 'Volleyball',
'water-polo' => 'Water Polo',
'weightlifting' => 'Weightlifting',
'wrestling' => 'Wrestling'
};
end
def fetch(force = false)
if force || @data.nil? || Time.now - @last_run >= 3600 # one hour
@last_run = Time.now
@data = scrap
else
@data
end
end
private
def scrap
agent = Mechanize.new
# agent.set_proxy 'localhost', 3128
io = StringIO.new
io.puts "<pre>"
io.puts "Last update: #{@last_run}"
io.puts
@sports.each do |key, name|
io.puts "#{name}: "
puts "#{name}"
agent.get("http://www.london2012.com/#{key}/medals/") do |page|
medal_table = page.parser.css('table.overall_medals tbody tr')
if medal_table.length > 0
medal_table.each_with_index do |tr, i|
country = tr.at_css('span.countryName').text
gold = tr.at_css('td.gold').text.to_i
next if gold == 0
io.puts "#{country}: #{gold}"
end
else
io.puts "None"
end
end
io.puts
end
io.string
end
end
get "/" do
@@medal_scraper ||= MedalScraper.new
@@medal_scraper.fetch
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment