Instantly share code, notes, and snippets.

@raspi /main.py
Created Jun 12, 2018

Embed
What would you like to do?
Eduskunnan spostit
import logging
import os
import sys
# 3rd party
from bs4 import BeautifulSoup
import requests
if __name__ == "__main__":
logging.basicConfig(
stream=sys.stdout,
level=logging.INFO,
#level=logging.DEBUG,
format='%(asctime)s %(levelname)s: %(message)s',
)
log = logging.getLogger(__name__)
ses = requests.session()
main_url_page = "https://www.eduskunta.fi/FI/kansanedustajat/nykyiset_kansanedustajat/Sivut/default.aspx"
log.info("Getting '{}'".format(main_url_page))
req = ses.get(main_url_page)
if req.status_code != 200:
log.info("Error getting '{}'".format(main_url_page))
sys.exit(1)
soup = BeautifulSoup(req.content, "html.parser")
_links = []
for tbl in soup.find_all('table'):
for link in tbl.find_all('a', href=True):
_url = link['href']
if _url.find('/kansanedustajat/Sivut/') != -1:
_links.append(link['href'])
_mails = []
for link in _links:
log.info("Getting '{}'".format(link))
req = ses.get(link)
if req.status_code != 200:
log.info("Error getting '{}'".format(link))
continue
soup = BeautifulSoup(req.content, "html.parser")
for tbl in soup.find_all('table'):
for l in tbl.find_all('a', href=True):
_url = l['href']
if _url.startswith('mailto:'):
_mails.append(_url.replace('mailto:', ''))
_mails = list(set(_mails))
print()
print()
for mail in _mails:
print(mail)
from setuptools import setup
PACKAGE_NAME = 'eduskunta'
PACKAGE_REQUIREMENTS = [
'requests',
'bs4',
]
setup(
name=PACKAGE_NAME,
version='0.0.1',
description='',
url='',
author='',
author_email='',
license='MIT',
packages=[PACKAGE_NAME],
install_requires=PACKAGE_REQUIREMENTS,
zip_safe=False
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment