Skip to content

Instantly share code, notes, and snippets.

@j-manu
Created November 18, 2011 10:52
Show Gist options
  • Save j-manu/1376148 to your computer and use it in GitHub Desktop.
Save j-manu/1376148 to your computer and use it in GitHub Desktop.
require 'rubygems'
require 'hpricot'
doc = open("initiative_shortlist.html") {|f| Hpricot(f) }
counter = 0
names = []
places = []
years = []
(doc/"#table1/tr").each do |tr|
if counter > 0
tds = tr.search("td")
#names << tds[0].search("font").first.inner_html.gsub(/[^\w ]/,"")
original_place = tds[1].search("font").first.inner_html.gsub(/[^\w]/,"").gsub("nbsp","").downcase.capitalize
places << original_place if original_place.length > 0
original_year = tds[2].search("font").first.inner_html.gsub(/[^\w\/]/,"").gsub("nbsp","")
year = original_year.split("/").last
if !year
#puts "Invalid year #{original_year}"
else
if year.length == 4
years << year
elsif year.length == 2 && year != "NA" && year.to_i < 10 # 20xx xx < 10
years << "20" + year
elsif year.length == 2 && year != "NA" && year.to_i > 30 # 19xx xx > 30
years << "19" + year
else
#puts "Invalid year #{original_year}"
end
end
end
counter = counter + 1
end
sorted_places = {}
counter = 0
place = places.first
places.sort.each do |p|
if p == place
counter = counter + 1
else
sorted_places[place] = counter
counter = 1
end
place = p
end
sorted_places.sort{|a,b| a[1]<=>b[1]}.reverse.each { |elem|
puts "#{elem[0]}, #{elem[1]}"
}
puts "***********"
sorted_years = {}
counter = 0
year = years.first
years.sort.each do |p|
if p == year
counter = counter + 1
else
sorted_years[year] = counter
counter = 1
end
year = p
end
sorted_years.sort{|a,b| a[1]<=>b[1]}.reverse.each { |elem|
puts "#{elem[0]}, #{elem[1]}"
}
puts "***********"
sorted_years.sort.each { |elem|
puts "#{elem[0]}, #{elem[1]}"
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment