Skip to content

Instantly share code, notes, and snippets.

Created October 1, 2017 14:18
Show Gist options
  • Save alendit/4a3b456f7092f1d6c94b5c357f944ac3 to your computer and use it in GitHub Desktop.
Save alendit/4a3b456f7092f1d6c94b5c357f944ac3 to your computer and use it in GitHub Desktop.
Bundestagswahl 2017: der Bundeswahlleister-Scrapper
"cells": [
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"import bs4\n",
"import re\n",
"from tqdm import tqdm_notebook as tqdm\n",
"import json\n",
"from csv import writer, DictWriter"
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"def parse_page(wk_url):\n",
" resp = requests.get(wk_url)\n",
" resp.encoding = 'utf-8'\n",
" wk_soup = bs4.BeautifulSoup(requests.get(wk_url).content.decode('utf8'), \"html5lib\")\n",
" votes ='.table-stimmen')[0]\n",
" rows ='tbody tr')\n",
" result = {}\n",
" for row in rows:\n",
" name ='td:nth-of-type(1)')[0].text.strip()\n",
" first ='td:nth-of-type(2)')[0].text.strip().replace('.', '')\n",
" first = 0 if first == '-' else int(first)\n",
" second ='td:nth-of-type(5)')[0].text.strip().replace('.', '')\n",
" second = first if second == '-' else int(second)\n",
" result[name] = { 'first': first, 'second': second }\n",
" return result"
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"def parse_state(state_id):\n",
" state_url = '' % state_id\n",
" state_dom = bs4.BeautifulSoup(requests.get(state_url).content.decode('utf8'), \"html5lib\")\n",
" states ='.flex .linklist .linklist__item a')\n",
" result = {}\n",
" for state in tqdm(states, leave=False):\n",
" wk_str = re.match(r'land-\\d+/wahlkreis-(\\d+).html', state.attrs['href']).group(1)\n",
" wk_id = int(wk_str)\n",
" result[wk_id] = parse_page('' % (state_id, wk_id))\n",
" return result\n"
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"state_map = {\n",
" 8: 'Baden-Württemberg',\n",
" 9: 'Bayern',\n",
" 11: 'Berlin',\n",
" 12: 'Brandenburg',\n",
" 4: 'Bremen',\n",
" 2: 'Hamburg',\n",
" 6: 'Hessen',\n",
" 13: 'Mecklenburg-Vorpommern',\n",
" 3: 'Niedersachsen',\n",
" 5: 'Nordrhein-Westfalen',\n",
" 7: 'Rheinland-Pfalz',\n",
" 10: 'Saarland',\n",
" 14: 'Sachsen',\n",
" 15: 'Sachsen-Anhalt',\n",
" 1: 'Schleswig-Holstein',\n",
" 16: 'Thüringen',\n",
"def parse_btw():\n",
" results = {}\n",
" for i in tqdm(range(1, 17)):\n",
" results[state_map[i]] = parse_state(i)\n",
" return results"
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "a391be37865240819a0f0dd7a17f0984",
"version_major": 2,
"version_minor": 0
"text/html": [
"<p>Failed to display Jupyter Widget of type <code>HBox</code>.</p>\n",
" If you're reading this message in Jupyter Notebook or JupyterLab, it may mean\n",
" that the widgets JavaScript is still loading. If this message persists, it\n",
" likely means that the widgets JavaScript library is either not installed or\n",
" not enabled. See the <a href=\"\">Jupyter\n",
" Widgets Documentation</a> for setup instructions.\n",
" If you're reading this message in another notebook frontend (for example, a static\n",
" rendering on GitHub or <a href=\"\">NBViewer</a>),\n",
" it may mean that your frontend doesn't currently support widgets.\n",
"text/plain": [
"HBox(children=(IntProgress(value=0, max=16), HTML(value='')))"
"metadata": {},
"output_type": "display_data"
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "05797f3452464cadb3d6505a67fb7ece",
"version_major": 2,
"version_minor": 0
"text/html": [
"<p>Failed to display Jupyter Widget of type <code>HBox</code>.</p>\n",
" If you're reading this message in Jupyter Notebook or JupyterLab, it may mean\n",
" that the widgets JavaScript is still loading. If this message persists, it\n",
" likely means that the widgets JavaScript library is either not installed or\n",
" not enabled. See the <a href=\"\">Jupyter\n",
" Widgets Documentation</a> for setup instructions.\n",
" If you're reading this message in another notebook frontend (for example, a static\n",
" rendering on GitHub or <a href=\"\">NBViewer</a>),\n",
" it may mean that your frontend doesn't currently support widgets.\n",
"text/plain": [
"HBox(children=(IntProgress(value=0, max=11), HTML(value='')))"
"metadata": {},
"output_type": "display_data"
"name": "stdout",
"output_type": "stream",
"text": [
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "5cc5ff238a904a7888cfe052dc58d1ab",
"version_major": 2,
"version_minor": 0
"text/html": [
"<p>Failed to display Jupyter Widget of type <code>HBox</code>.</p>\n",
" If you're reading this message in Jupyter Notebook or JupyterLab, it may mean\n",
" that the widgets JavaScript is still loading. If this message persists, it\n",
" likely means that the widgets JavaScript library is either not installed or\n",
" not enabled. See the <a href=\"\">Jupyter\n",
" Widgets Documentation</a> for setup instructions.\n",
" If you're reading this message in another notebook frontend (for example, a static\n",
" rendering on GitHub or <a href=\"\">NBViewer</a>),\n",
" it may mean that your frontend doesn't currently support widgets.\n",
"text/plain": [
"HBox(children=(IntProgress(value=0, max=6), HTML(value='')))"
"metadata": {},
"output_type": "display_data"
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "7559481292414fcb8ac083daf072d8f2",
"version_major": 2,
"version_minor": 0
"text/html": [
"<p>Failed to display Jupyter Widget of type <code>HBox</code>.</p>\n",
" If you're reading this message in Jupyter Notebook or JupyterLab, it may mean\n",
" that the widgets JavaScript is still loading. If this message persists, it\n",
" likely means that the widgets JavaScript library is either not installed or\n",
" not enabled. See the <a href=\"\">Jupyter\n",
" Widgets Documentation</a> for setup instructions.\n",
" If you're reading this message in another notebook frontend (for example, a static\n",
" rendering on GitHub or <a href=\"\">NBViewer</a>),\n",
" it may mean that your frontend doesn't currently support widgets.\n",
"text/plain": [
"HBox(children=(IntProgress(value=0, max=30), HTML(value='')))"
"metadata": {},
"output_type": "display_data"
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "f11297c509094187bb3ad48fa6a58481",
"version_major": 2,
"version_minor": 0
"text/html": [
"<p>Failed to display Jupyter Widget of type <code>HBox</code>.</p>\n",
" If you're reading this message in Jupyter Notebook or JupyterLab, it may mean\n",
" that the widgets JavaScript is still loading. If this message persists, it\n",
" likely means that the widgets JavaScript library is either not installed or\n",
" not enabled. See the <a href=\"\">Jupyter\n",
" Widgets Documentation</a> for setup instructions.\n",
" If you're reading this message in another notebook frontend (for example, a static\n",
" rendering on GitHub or <a href=\"\">NBViewer</a>),\n",
" it may mean that your frontend doesn't currently support widgets.\n",
"text/plain": [
"HBox(children=(IntProgress(value=0, max=2), HTML(value='')))"
"metadata": {},
"output_type": "display_data"
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "9165413e50034aaa8a12e0d28be96e40",
"version_major": 2,
"version_minor": 0
"text/html": [
"<p>Failed to display Jupyter Widget of type <code>HBox</code>.</p>\n",
" If you're reading this message in Jupyter Notebook or JupyterLab, it may mean\n",
" that the widgets JavaScript is still loading. If this message persists, it\n",
" likely means that the widgets JavaScript library is either not installed or\n",
" not enabled. See the <a href=\"\">Jupyter\n",
" Widgets Documentation</a> for setup instructions.\n",
" If you're reading this message in another notebook frontend (for example, a static\n",
" rendering on GitHub or <a href=\"\">NBViewer</a>),\n",
" it may mean that your frontend doesn't currently support widgets.\n",
"text/plain": [
"HBox(children=(IntProgress(value=0, max=64), HTML(value='')))"
"metadata": {},
"output_type": "display_data"
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "77c72997c4444c0093bce7b028801fd0",
"version_major": 2,
"version_minor": 0
"text/html": [
"<p>Failed to display Jupyter Widget of type <code>HBox</code>.</p>\n",
" If you're reading this message in Jupyter Notebook or JupyterLab, it may mean\n",
" that the widgets JavaScript is still loading. If this message persists, it\n",
" likely means that the widgets JavaScript library is either not installed or\n",
" not enabled. See the <a href=\"\">Jupyter\n",
" Widgets Documentation</a> for setup instructions.\n",
" If you're reading this message in another notebook frontend (for example, a static\n",
" rendering on GitHub or <a href=\"\">NBViewer</a>),\n",
" it may mean that your frontend doesn't currently support widgets.\n",
"text/plain": [
"HBox(children=(IntProgress(value=0, max=22), HTML(value='')))"
"metadata": {},
"output_type": "display_data"
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "69c8c2734b324a898767bf9f4566c86c",
"version_major": 2,
"version_minor": 0
"text/html": [
"<p>Failed to display Jupyter Widget of type <code>HBox</code>.</p>\n",
" If you're reading this message in Jupyter Notebook or JupyterLab, it may mean\n",
" that the widgets JavaScript is still loading. If this message persists, it\n",
" likely means that the widgets JavaScript library is either not installed or\n",
" not enabled. See the <a href=\"\">Jupyter\n",
" Widgets Documentation</a> for setup instructions.\n",
" If you're reading this message in another notebook frontend (for example, a static\n",
" rendering on GitHub or <a href=\"\">NBViewer</a>),\n",
" it may mean that your frontend doesn't currently support widgets.\n",
"text/plain": [
"HBox(children=(IntProgress(value=0, max=15), HTML(value='')))"
"metadata": {},
"output_type": "display_data"
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "742b8711b0a249439e33ceadb4df0453",
"version_major": 2,
"version_minor": 0
"text/html": [
"<p>Failed to display Jupyter Widget of type <code>HBox</code>.</p>\n",
" If you're reading this message in Jupyter Notebook or JupyterLab, it may mean\n",
" that the widgets JavaScript is still loading. If this message persists, it\n",
" likely means that the widgets JavaScript library is either not installed or\n",
" not enabled. See the <a href=\"\">Jupyter\n",
" Widgets Documentation</a> for setup instructions.\n",
" If you're reading this message in another notebook frontend (for example, a static\n",
" rendering on GitHub or <a href=\"\">NBViewer</a>),\n",
" it may mean that your frontend doesn't currently support widgets.\n",
"text/plain": [
"HBox(children=(IntProgress(value=0, max=38), HTML(value='')))"
"metadata": {},
"output_type": "display_data"
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "9cf318f8e096494683de3a546d82ae7a",
"version_major": 2,
"version_minor": 0
"text/html": [
"<p>Failed to display Jupyter Widget of type <code>HBox</code>.</p>\n",
" If you're reading this message in Jupyter Notebook or JupyterLab, it may mean\n",
" that the widgets JavaScript is still loading. If this message persists, it\n",
" likely means that the widgets JavaScript library is either not installed or\n",
" not enabled. See the <a href=\"\">Jupyter\n",
" Widgets Documentation</a> for setup instructions.\n",
" If you're reading this message in another notebook frontend (for example, a static\n",
" rendering on GitHub or <a href=\"\">NBViewer</a>),\n",
" it may mean that your frontend doesn't currently support widgets.\n",
"text/plain": [
"HBox(children=(IntProgress(value=0, max=46), HTML(value='')))"
"metadata": {},
"output_type": "display_data"
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "99ecea1dfcbb45dc9d0ec94a851eb378",
"version_major": 2,
"version_minor": 0
"text/html": [
"<p>Failed to display Jupyter Widget of type <code>HBox</code>.</p>\n",
" If you're reading this message in Jupyter Notebook or JupyterLab, it may mean\n",
" that the widgets JavaScript is still loading. If this message persists, it\n",
" likely means that the widgets JavaScript library is either not installed or\n",
" not enabled. See the <a href=\"\">Jupyter\n",
" Widgets Documentation</a> for setup instructions.\n",
" If you're reading this message in another notebook frontend (for example, a static\n",
" rendering on GitHub or <a href=\"\">NBViewer</a>),\n",
" it may mean that your frontend doesn't currently support widgets.\n",
"text/plain": [
"HBox(children=(IntProgress(value=0, max=4), HTML(value='')))"
"metadata": {},
"output_type": "display_data"
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "2894789fc248468f9fb40b8cc5393f31",
"version_major": 2,
"version_minor": 0
"text/html": [
"<p>Failed to display Jupyter Widget of type <code>HBox</code>.</p>\n",
" If you're reading this message in Jupyter Notebook or JupyterLab, it may mean\n",
" that the widgets JavaScript is still loading. If this message persists, it\n",
" likely means that the widgets JavaScript library is either not installed or\n",
" not enabled. See the <a href=\"\">Jupyter\n",
" Widgets Documentation</a> for setup instructions.\n",
" If you're reading this message in another notebook frontend (for example, a static\n",
" rendering on GitHub or <a href=\"\">NBViewer</a>),\n",
" it may mean that your frontend doesn't currently support widgets.\n",
"text/plain": [
"HBox(children=(IntProgress(value=0, max=12), HTML(value='')))"
"metadata": {},
"output_type": "display_data"
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "556269f2d60943158f16e18b79f41f3a",
"version_major": 2,
"version_minor": 0
"text/html": [
"<p>Failed to display Jupyter Widget of type <code>HBox</code>.</p>\n",
" If you're reading this message in Jupyter Notebook or JupyterLab, it may mean\n",
" that the widgets JavaScript is still loading. If this message persists, it\n",
" likely means that the widgets JavaScript library is either not installed or\n",
" not enabled. See the <a href=\"\">Jupyter\n",
" Widgets Documentation</a> for setup instructions.\n",
" If you're reading this message in another notebook frontend (for example, a static\n",
" rendering on GitHub or <a href=\"\">NBViewer</a>),\n",
" it may mean that your frontend doesn't currently support widgets.\n",
"text/plain": [
"HBox(children=(IntProgress(value=0, max=10), HTML(value='')))"
"metadata": {},
"output_type": "display_data"
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "dfc50a74ac6b4f3dbbb393fbe1e84fe0",
"version_major": 2,
"version_minor": 0
"text/html": [
"<p>Failed to display Jupyter Widget of type <code>HBox</code>.</p>\n",
" If you're reading this message in Jupyter Notebook or JupyterLab, it may mean\n",
" that the widgets JavaScript is still loading. If this message persists, it\n",
" likely means that the widgets JavaScript library is either not installed or\n",
" not enabled. See the <a href=\"\">Jupyter\n",
" Widgets Documentation</a> for setup instructions.\n",
" If you're reading this message in another notebook frontend (for example, a static\n",
" rendering on GitHub or <a href=\"\">NBViewer</a>),\n",
" it may mean that your frontend doesn't currently support widgets.\n",
"text/plain": [
"HBox(children=(IntProgress(value=0, max=6), HTML(value='')))"
"metadata": {},
"output_type": "display_data"
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "896d810d7d5e44729b9143fd26a4fe85",
"version_major": 2,
"version_minor": 0
"text/html": [
"<p>Failed to display Jupyter Widget of type <code>HBox</code>.</p>\n",
" If you're reading this message in Jupyter Notebook or JupyterLab, it may mean\n",
" that the widgets JavaScript is still loading. If this message persists, it\n",
" likely means that the widgets JavaScript library is either not installed or\n",
" not enabled. See the <a href=\"\">Jupyter\n",
" Widgets Documentation</a> for setup instructions.\n",
" If you're reading this message in another notebook frontend (for example, a static\n",
" rendering on GitHub or <a href=\"\">NBViewer</a>),\n",
" it may mean that your frontend doesn't currently support widgets.\n",
"text/plain": [
"HBox(children=(IntProgress(value=0, max=16), HTML(value='')))"
"metadata": {},
"output_type": "display_data"
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "f04067944b8f44948e2b6280ee3e752d",
"version_major": 2,
"version_minor": 0
"text/html": [
"<p>Failed to display Jupyter Widget of type <code>HBox</code>.</p>\n",
" If you're reading this message in Jupyter Notebook or JupyterLab, it may mean\n",
" that the widgets JavaScript is still loading. If this message persists, it\n",
" likely means that the widgets JavaScript library is either not installed or\n",
" not enabled. See the <a href=\"\">Jupyter\n",
" Widgets Documentation</a> for setup instructions.\n",
" If you're reading this message in another notebook frontend (for example, a static\n",
" rendering on GitHub or <a href=\"\">NBViewer</a>),\n",
" it may mean that your frontend doesn't currently support widgets.\n",
"text/plain": [
"HBox(children=(IntProgress(value=0, max=9), HTML(value='')))"
"metadata": {},
"output_type": "display_data"
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "9e045e487c31494eb6e17e5d8a0a3338",
"version_major": 2,
"version_minor": 0
"text/html": [
"<p>Failed to display Jupyter Widget of type <code>HBox</code>.</p>\n",
" If you're reading this message in Jupyter Notebook or JupyterLab, it may mean\n",
" that the widgets JavaScript is still loading. If this message persists, it\n",
" likely means that the widgets JavaScript library is either not installed or\n",
" not enabled. See the <a href=\"\">Jupyter\n",
" Widgets Documentation</a> for setup instructions.\n",
" If you're reading this message in another notebook frontend (for example, a static\n",
" rendering on GitHub or <a href=\"\">NBViewer</a>),\n",
" it may mean that your frontend doesn't currently support widgets.\n",
"text/plain": [
"HBox(children=(IntProgress(value=0, max=8), HTML(value='')))"
"metadata": {},
"output_type": "display_data"
"name": "stdout",
"output_type": "stream",
"text": [
"source": [
"btw = parse_btw()"
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"json.dump(btw, open('btw_results.json', 'w'))"
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"def to_districts(btw):\n",
" result = {}\n",
" for key in btw.keys():\n",
" for distr_id in btw[key].keys():\n",
" result[distr_id] = btw[key][distr_id]\n",
" return result"
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"distrs = to_districts(btw)\n",
"json.dump(distrs, open('btw_distr_results.json', 'w'))"
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"def to_flat(btw_distrs):\n",
" result = []\n",
" for idx, distr_results in btw_distrs.items():\n",
" for party, party_res in distr_results.items():\n",
" result.append((idx, party, party_res['first'], party_res['second']))\n",
" return result"
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
"with open('flat.csv', 'w') as f:\n",
" w = writer(f)\n",
" w.writerows(to_flat(distrs))"
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [],
"source": [
"fields = ['distr_id']\n",
"for distr in distrs.values():\n",
" for key in distr.keys():\n",
" fields.add('%s_first' % key)\n",
" fields.add('%s_second' % key)\n",
"with open('even_flatter.csv', 'w') as f:\n",
" \n",
" w = DictWriter(f, fieldnames=fields)\n",
" w.writeheader()\n",
" for idx, row in distrs.items():\n",
" to_write = { 'distr_id': idx }\n",
" for key, value in row.items():\n",
" to_write['%s_first' % key] = value['first']\n",
" to_write['%s_second' % key] = value['second']\n",
" w.writerow(to_write)"
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
"data": {
"text/plain": [
"{'AfD': {'first': 10581, 'second': 11647},\n",
" 'BGE': {'first': 0, 'second': 843},\n",
" 'CDU': {'first': 68102, 'second': 58307},\n",
" 'DIE LINKE': {'first': 12138, 'second': 13995},\n",
" 'Die PARTEI': {'first': 0, 'second': 2091},\n",
" 'EB: Krüger-Winands': {'first': 755, 'second': 755},\n",
" 'FDP': {'first': 11143, 'second': 18948},\n",
" 'FREIE WÄHLER': {'first': 1943, 'second': 1189},\n",
" 'GRÜNE': {'first': 17899, 'second': 22290},\n",
" 'Gültige': {'first': 170258, 'second': 170396},\n",
" 'MLPD': {'first': 0, 'second': 59},\n",
" 'NPD': {'first': 0, 'second': 354},\n",
" 'PIRATEN': {'first': 0, 'second': 0},\n",
" 'SPD': {'first': 47697, 'second': 40376},\n",
" 'Tierschutzpartei': {'first': 0, 'second': 0},\n",
" 'Ungültige': {'first': 1647, 'second': 1509},\n",
" 'Wahlberechtigte': {'first': 225659, 'second': 225659},\n",
" 'Wähler': {'first': 171905, 'second': 171905},\n",
" 'ÖDP': {'first': 0, 'second': 297},\n",
" 'Übrige': {'first': 0, 'second': 0}}"
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
"source": []
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.2"
"varInspector": {
"cols": {
"lenName": 16,
"lenType": 16,
"lenVar": 40
"kernels_config": {
"python": {
"delete_cmd_postfix": "",
"delete_cmd_prefix": "del ",
"library": "",
"varRefreshCmd": "print(var_dic_list())"
"r": {
"delete_cmd_postfix": ") ",
"delete_cmd_prefix": "rm(",
"library": "var_list.r",
"varRefreshCmd": "cat(var_dic_list()) "
"types_to_exclude": [
"window_display": false
"nbformat": 4,
"nbformat_minor": 2
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment