Skip to content

Instantly share code, notes, and snippets.

@0xbepresent
Created March 26, 2014 05:24
Show Gist options
  • Save 0xbepresent/9777329 to your computer and use it in GitHub Desktop.
Save 0xbepresent/9777329 to your computer and use it in GitHub Desktop.
Obtiene un listado de objetos Regiones y Comunas con su relacion Region de Chile
# -*- coding: utf-8 -*-
"""
Obtiene un listado de objetos de Regiones y Comunas con su relacion Region de
Chile
@author: misalabs
"""
import requests
from bs4 import BeautifulSoup
class Region(object):
def __init__(self, id, name):
self.id = id
self.name = name.encode("utf-8")
class Comuna(object):
def __init__(self, id, name, region_id):
self.id = id
self.name = name.encode("utf-8")
self.region_id = region_id
# Get regions' list Chile
regions_html = requests.get('http://es.wikipedia.org/wiki/Región_de_Chile')
comunas_html = requests.get("http://es.wikipedia.org/wiki/Anexo:Comunas_de_Chile")
# Construct Soup
page_regions = BeautifulSoup(regions_html.text)
table_regions = page_regions.findAll('table')[3]
page_comuna = BeautifulSoup(comunas_html.text)
table_comunas = page_comuna.findAll('table')[0]
# Get Regions
regions = []
for idx, row in enumerate(table_regions.findAll('tr')):
cells_region = row.findAll('td')[3:4]
for region in cells_region:
regions.append(Region(id=idx, name=region.getText()))
# Get Comunas and set relations with Region
comunas = []
for idx, row in enumerate(table_comunas.findAll('tr')):
cell_comuna = row.findAll('td')[1:2]
cell_region = row.findAll('td')[4:5]
if cell_comuna and cell_region:
comuna_n = cell_comuna[:1][0].getText()
region_n = cell_region[:1][0].getText().encode('utf-8')
# Set relation with Region
for region in regions:
if region.name in region_n:
comunas.append(Comuna(id=idx, name=comuna_n, region_id=region.id))
for region in regions:
print region.name, region.id
for comuna in comunas:
print comuna.name, comuna.region_id
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment