Skip to content

Instantly share code, notes, and snippets.

@knagode
Last active August 29, 2015 14:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save knagode/b0be5225e028d1d3c152 to your computer and use it in GitHub Desktop.
Save knagode/b0be5225e028d1d3c152 to your computer and use it in GitHub Desktop.
Rails Copybara scrap example
require 'rails_helper'
require 'rspec/rails'
feature "scrapper" do
it "gets all companies which export to austria" do
# first lets visit setting where we will include financial data inside our search results
visit "http://www.sloexport.si/default.asp?MenuID=279&Settings=true"
sleep(0.3)
find(:xpath, ".//input[@value='8']").click
find(:xpath, ".//input[@name='btnResultSettings']").click
country = "Austria"
visit "http://www.sloexport.si/default.asp?MenuID=279&advancedSearch=true"
find_by_id('dialog-drzave-link').click
sleep(0.3)
find(:xpath, ".//input[@desc='" + country + "']").click
find_by_id('btnChoose').click
find(:css, "input.button_adv_search").click
# go trough every page
url = current_url
while true do
page.all(:css, '.company').each do |el|
company = Company.new
company.name = el.all(:css, ".CompName").first.text
company.address = el.find(:css, ".address").text
objects = el.all(:css, "b")
if objects.count > 0
income = el.all(:css, "b").first.text
if income.length > 2
company.income = income.gsub(',', '').split(".").first.to_i
else
company.income = -1;
end
else
company.income = -1;
end
objs = el.all(:css, ".contacts")
if objs.count > 0
company.full_contacts = objs.first.text
emails = objs.first.all(:css, "a")
if emails.count > 0
company.email = emails.first.text
end
end
company.country = country
company.save!
puts company.id.to_s + ": " + company.name
company.reload
end
if have_css('a.searchnext')
page.all(:css, "a.searchnext").first.click
if url == current_url # it seems like we reached the last page
break
end
url = current_url
else
break
end
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment