Skip to content

Instantly share code, notes, and snippets.

@krasnoukhov
Created April 10, 2012 15:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save krasnoukhov/2352061 to your computer and use it in GitHub Desktop.
Save krasnoukhov/2352061 to your computer and use it in GitHub Desktop.
Parse infobel search results
require 'rubygems'
require 'mechanize'
require 'rubyXL'
a = Mechanize.new { |agent|
agent.user_agent_alias = 'Mac Safari'
}
workbook = RubyXL::Workbook.new
@idx = 0
3903.times do |t|
a.get("http://www.infobel.com/de/germany/business.aspx?FirstRec=#{t*10}&QKeyword=Hotels") do |page|
puts page.uri.to_s
page.parser.xpath("//div[@class='vcard']").each do |item|
result = [@idx+1]
# title
if title = item.xpath("./h2/a/span").first
result << title.text
else
result << ""
end
# address
if address = item.xpath("./ul/li[@class='adr']/div/span").first
result << address.text
else
result << ""
end
# web
if web = item.xpath("./ul/li/span[contains(text(), 'Web:')]").first
result << web.parent.xpath("./div/a").first.attributes["href"].to_s[/url=(.*)/, 1]
else
result << ""
end
# email
if email = item.xpath("./ul/li/span[contains(text(), 'Email:')]").first
result << email.parent.xpath("./div/a").first.text
else
result << ""
end
# phone
if phone = item.xpath("./ul/li/span[contains(text(), 'Telefon:')]").first
result << phone.parent.xpath("./div").first.text
else
result << ""
end
# categories
if cats = item.parent.xpath("./div[@class='categories']").first
list = []
cats.xpath("./div/ul/li").each do |cat|
list << cat.text.downcase
end
result << list.join(", ")
else
result << ""
end
result.each_with_index { |item, idx| workbook.worksheets[0].add_cell(@idx, idx, item) }
@idx += 1
end
end
puts "COUNT: #{@idx}"
end
workbook.write("hotels.xlsx")
source 'http://rubygems.org'
gem 'mechanize'
gem 'rubyXL'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment