Skip to content

Instantly share code, notes, and snippets.

@txus
Created June 26, 2011 15:05
Show Gist options
  • Save txus/1047682 to your computer and use it in GitHub Desktop.
Save txus/1047682 to your computer and use it in GitHub Desktop.
UOC scraping with standalone Capybara
require 'rubygems'
require 'capybara'
require 'capybara/dsl'
require 'akephalos'
Capybara.run_server = false
Capybara.current_driver = :akephalos
Capybara.app_host = 'http://cv.uoc.edu'
module MyCapybaraTest
class Test
include Capybara
def test_uoc
p 'Logging in...'
visit('/')
within 'form[name=loginForm]' do
fill_in 'l', with: 'xxxx'
fill_in 'p', with: 'xxxx'
end
page.execute_script("$('input.img').trigger('click');")
p 'Entering navigation frame...'
url = "http://cv.uoc.edu/rb/inici/navigation/main/35309"
visit url
p 'Entering main frame...'
url = page.find(:css, 'frame[name=main]')[:src];
visit url
p 'Entering class frame...'
url = page.find(:css, 'frame[name=aula]')[:src].split('/')
url.shift
url.unshift ''
url = url.join('/')
visit url
p 'Entering menu frame...'
url = page.find(:css, 'frame[name=cl_menu]')[:src];
visit url
_links = []
links = page.all('a').each do |link|
_links << link if link.text =~ /Lliurament i registre/
end
p 'Clicking links.....'
notes = {}
_links.compact.each do |link|
puts "about to click #{link}"
link.click
p "***"*20
puts page.body
p "***"*20
titol = page.find(:css, 'h3').text
notes[titol] = {}
trs = page.all('table.tablaNotas tbody tr')
trs -= trs[0..3]
trs.each do |tr|
pac = tr.find(:css, 'td.PacEstudiant').text
nota = tr.find(:css, 'td.Nota').text
notes[titol][pac] = nota
puts "Adding #{titol}[#{pac}] = #{nota}"
end
end
puts notes.inspect
# url = page.find(:css, 'frame[name=planaInici]')[:src];
# visit url
# link = page.find(:css, '#grid_right_navigation_old_page_item');
# link.click
# page.find(:css, 'a.fntmnsel1').click
end
end
end
t = MyCapybaraTest::Test.new
t.test_uoc
puts t.page.body
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment