Skip to content

Instantly share code, notes, and snippets.

@mikolajb
Created October 14, 2016 11:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mikolajb/8b4e00b23de05305eba7978564e8932c to your computer and use it in GitHub Desktop.
Save mikolajb/8b4e00b23de05305eba7978564e8932c to your computer and use it in GitHub Desktop.
import requests
from bs4 import BeautifulSoup
from collections import defaultdict
from datetime import datetime
from pymongo import MongoClient
KEYS = {
'Lista:': "lista",
'Okręg wyborczy:': "okręg",
'Liczba głosów:': "glosow",
'Staż parlamentarny:': "staz",
'Klub/koło:': "klub",
'Wykształcenie:': "wyksztalcenie",
'Ukończona szkoła:': "szkola",
'Zawód:': "zawod",
'Wygaśnięcie mandatu:': "wygasl",
}
poslowie = defaultdict(dict)
client = MongoClient()
c = client.poslowiedb.poslowie
for sejm, lenght in [(7, 518), (8, 471)]:
print("Sejm {}".format(sejm))
for i in range(1, lenght):
print("{}/{}".format(i, lenght))
resp = requests.get("http://sejm.pl/Sejm{}.nsf/posel.xsp?id={:0>3}".
format(sejm, i))
if resp.status_code == 200:
posel = {}
soup = BeautifulSoup(resp.text, "html.parser")
name = soup.h1.text
posel["name"] = name
posel["sejm"] = sejm
birth_date = soup.find(id="urodzony").text
birth_date, birth_place = birth_date.split(",\xa0")
birth_date = datetime.strptime(birth_date, "%d-%m-%Y")
posel["urodzony"] = birth_date
posel["miejsce"] = birth_place
for part in ["partia", "cv"]:
for d in soup.find("div", part).find_all("li"):
key, value, *rest = d.find_all("p")
key = key.text
if key in KEYS.keys():
posel[KEYS[key]] = value.text
c.insert_one(posel)
poslowie[name] = posel
for i in sorted(poslowie.items(), key=lambda x: x[1]["urodzony"]):
print(
"{:>20} {:>4} {:>12}".format(
i[0],
(datetime.today() - i[1]['urodzony']).days // 365,
str(i[1]['urodzony'].date()),
)
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment