Skip to content

Instantly share code, notes, and snippets.

@mleszcz
Created March 30, 2011 12:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save mleszcz/894296 to your computer and use it in GitHub Desktop.
Save mleszcz/894296 to your computer and use it in GitHub Desktop.
require 'rubygems'
require 'mechanize'
FILE = 'crawler.log'
a = Mechanize.new { |agent|
agent.user_agent_alias = 'Mac Safari'
}
a.get('http://www.com/') do |page|
f = page.forms.first
# f.sictext = '162399'
f.nacetext = '45.21.1'
p = f.submit
page_number = 1
emails = []
while 1
File.open(FILE, 'a') { |f| f.write "\n---- PAGE #{page_number} ----" }
p.search('table.lista tr td a').each do |ah|
fi = a.get(ah.attr('href'))
email = fi.search('tr.email td p a').first
email_text = "#{email.text}#{email.attr('title')}" unless email.nil?
if !email.nil? and !emails.include?(email_text)
emails << email_text
nazwa = fi.search('tr.nazwa td p').first
miasto = fi.search('tr.nazwa').first
.next_element().next_element().next_element()
.search('td p').first
www = fi.search('tr.www td p a').first
File.open(FILE, 'a') do |f|
f.write "\n#{email_text}"
f.write ' ; '
f.write "MA-WWW #{www.text}" unless www.nil?
f.write ' ; '
f.write miasto.text unless miasto.nil?
f.write ' ; '
f.write nazwa.text unless nazwa.nil?
end
end
end
next_button = p.forms.first.button_with(:name => 'next')
break if next_button.nil?
p = p.forms.first.submit(next_button)
page_number += 1
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment