Skip to content

Instantly share code, notes, and snippets.

@hsribei
Created March 25, 2009 05:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hsribei/85300 to your computer and use it in GitHub Desktop.
Save hsribei/85300 to your computer and use it in GitHub Desktop.
Script para ripar nomes, partidos, estados e votações de deputados federais.
# to read it again every time we're testing the extraction of their
# voting history (which is still buggy)
if File.exists?("legislators.yml")
legislators = File.open("legislators.yml") { |f| YAML::load(f) }
else
page = agent.get('http://www2.camara.gov.br/deputados')
form = page.form('form1')
select = form.fields[5]
legislators = []
# the first option is just the label
select.options[1..-1].each do |option|
legislator = OpenStruct.new
legislator.name = option.instance_eval("@text")
option.instance_eval("@value") =~ /\|(\d+)%/
legislator.leg_id = $1
option.instance_eval("@value") =~ /%(\d+)!/
legislator.mat = $1
option.instance_eval("@value") =~ /\!(.*)=/
legislator.uf = $1
option.instance_eval("@value") =~ /=(.*)/
legislator.part = $1
# we're trying to build this url:
# http://www.camara.gov.br/internet/deputado/deputado_atual_resp.asp?fMode=1&deputado=ABELARDO%20CAMARINHA%7C528580%2523329%21SP%3DPSB&Pesquisa=Pesquisar&rbDeputado=VP&DepID=528580&DepUF=SP&DepMat=23329&DepPart=PSB
legislator.site_metadata = OpenStruct.new
legislator.site_metadata.select_value = option.instance_eval("@value")
legislator.site_metadata.votings_url = "http://www.camara.gov.br/internet/deputado/deputado_atual_resp.asp?fMode=1&deputado=#{url_escape(legislator.site_metadata.select_value)}&Pesquisa=Pesquisar&rbDeputado=VP&DepID=#{legislator.leg_id}&DepUF=#{legislator.uf}&DepMat=#{legislator.mat}&DepPart=#{legislator.part}"
puts legislator.name
legislators << legislator
end
YAML::dump(legislators, File.open("legislators.yml", "w"))
end
field_names = ["legislator_id", "plenary_session", "date", "proposition_title", "legislator_present", "vote"]
File.open("votings.csv", "w") { |f| f << field_names.map { |field| "\"#{field}\""}.join(",") + "\n" }
# legislators_to_rip = legislators.select { |d| d.votings.nil? }
legislators_to_rip = legislators
legislators_to_rip.each_with_index do |legislator, i|
doc = Hpricot(`links -source "#{legislator.site_metadata.votings_url}" | iconv -f ISO_8859-1 -t UTF-8`)
rows = (doc / "tr")[1..-1]
legislator.votings = []
puts "(%d/%d) Scraping votings for #{legislator.name}" % [i+1, legislators_to_rip.size]
rows.each do |row|
begin
unless row.nil?
fields = (row / "td").map { |f| f.innerText.strip}
if fields.size == 5
# voting = {}
# voting['Sessao Plenaria'] = fields[0]
# voting['Data'] = fields[1]
# voting['Votacao'] = fields[2]
# voting['Frequencia na Sessao'] = fields[3][0...-1] #exclude &nbsp; at the end
# voting['Voto'] = fields[4]
# legislator.votings << voting
fields[3] = fields[3][0...-1]
voting = [legislator.leg_id.to_s] + fields.map { |field| "\"#{field}\""}
File.open("votings.csv", "a") { |f| f << voting.join(",") + "\n" }
print '.' # progress indicator
end
end
rescue Exception => e
puts "EXCEPTION while scraping votins for #{legislator.name}: "
puts e.inspect
exit
end
end
puts "\n"
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment