Skip to content

Instantly share code, notes, and snippets.

@ferblape
Created May 23, 2019 12:10
Show Gist options
  • Save ferblape/e34c6cc07b7096c86f9f8ba27349d01b to your computer and use it in GitHub Desktop.
Save ferblape/e34c6cc07b7096c86f9f8ba27349d01b to your computer and use it in GitHub Desktop.
Scrap INE > Alteraciones de los municipios en los Censos de Población desde 1842
# Fetch data from https://www.ine.es/dyngs/INEbase/es/operacion.htm?c=Estadistica_C&cid=1254736176998&menu=resultados&idp=1254735572981
require "nokogiri"
require "byebug"
require "json"
url = "https://www.ine.es/intercensal/intercensal.do"
uri = URI.parse(url)
data = {}
INE::Places::Province.all.each do |province|
res = Net::HTTP.post_form(uri, "cmbCCAA" => province.autonomous_region.id, "cmbProv" => province.id, "btnBuscarMuni" => "Consultar municipios", "search" => 2, "nocab" => "")
document = Nokogiri::HTML(res.body)
municipality_ids = document.search("select#cmbMuni option").map{ |o| o['value'] }[1..-1]
puts
puts "Province: #{province.name}"
municipality_ids.each do |place_id|
begin
res = Net::HTTP.post_form(uri, "cmbCCAA" => province.autonomous_region.id, "cmbProv" => province.id, "cmbMuni" => place_id, "btnBuscarGeo" => "Consultar selección", "search" => 2, "nocab" => "")
document = Nokogiri::HTML(res.body)
place_id = document.xpath("//span[@class='TITULOH3']").text.strip.match(/Municipio:.(\d+)./)[1]
place = INE::Places::Place.find(place_id.to_i)
next if place.nil?
puts place.name
table = document.xpath("//table[@summary='datos']").first
rows = table.css("tr")
# row 2 is years
years = rows[1].css("th").map{ |n| n.text.to_i }[1..-1]
# row 3 is Población de Hecho
ph = rows[2].css("td").map{ |n| n.text.to_i }[1..-1]
# row 4 is Población de Derecho
pd = rows[3].css("td").map{ |n| n.text.to_i }[1..-1]
# row 5 is Numero de hogares
homes = rows[4].css("td").map{ |n| n.text.to_i }[1..-1]
data[place.id] = {
homes: homes,
years: years,
ph: ph,
pd: pd
}
rescue
puts [province.id, province.name, place_id].join(' -- ')
puts $!
end
end
end
File.write("data.json", data.to_json)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment