Skip to content

Instantly share code, notes, and snippets.

@tommycarstensen
Created November 7, 2022 20:36
Show Gist options
  • Save tommycarstensen/e2953b622b8b4de6dc4d081e5b0cc0db to your computer and use it in GitHub Desktop.
Save tommycarstensen/e2953b622b8b4de6dc4d081e5b0cc0db to your computer and use it in GitHub Desktop.
import geopandas as gpd
import requests
from bs4 import BeautifulSoup as bs
import pandas as pd
import matplotlib.pyplot as plt
def main():
# https://dk.okfn.org/index.html%3Fp=212.html
path = 'AFSTEMNINGSOMRAADE.shp'
df = gpd.read_file(path)
# df.drop('geometry', axis='columns', inplace=True)
df = df[df['KommuneNav'].isin(['København', 'Frederiksberg', 'Tårnby', 'Dragør'])]
# df = df[df['KommuneNav'].isin(['København', 'Frederiksberg', 'Dragør'])]
df.loc[2435, 'AfstemKod'] = '58' # 2022 Nørrebrohallen / 2015 5. midt (26)
# Merge Dragør
df.loc[2204, 'AfstemKod'] = '01'
df.loc[2277, 'AfstemKod'] = '01'
df.loc[2545, 'AfstemKod'] = '01'
df.loc[2737, 'AfstemKod'] = '01'
# Remove incorrect Kastrup geometry.
for i in range(2099, 2273):
try:
if df.loc[i, 'AfstemNav'] != 'Kastrup':
continue
df.drop(i, inplace=True)
except KeyError:
continue
party = 'V'
candidates = set((
'Alexandra Sasha',
'René Bredal',
'Claus Buch',
'Anders Fausbøll',
'Martin Geertsen',
'Jan E. Jørgensen',
'Michael Lange',
'Anne Rasmussen',
'Bo Sandroos',
'Sven Aage Schlosrich',
'Linea Søgaard-Lidell',
'Søren Sørensen',
))
candidate_of_interest = 'Jan E. Jørgensen'
columns = df.columns
for candidate in candidates:
df[candidate] = 0
df['most_votes'] = ''
df['second_most_votes'] = ''
df['third_most_votes'] = ''
df['candidate_of_interest'] = ''
df['sum'] = 0
for KommuneNum, k in [
] + [
(101, str(i)) for i in range(101, 109 + 1) # København
] + [
(147, str(i)) for i in range(110, 111 + 1) # Frederiksberg
] + [
(155, '112155'), # Dragør
(185, '112185'), # Tårnby
]:
OpstilNum = int(k[:3]) - 100
print()
print('OpstilNum', OpstilNum)
print(df[(df['OpstilNum'] == OpstilNum) & (df['KommuneNum'] == KommuneNum)][columns].drop('geometry', axis='columns').to_string())
url = f'https://www.kmdvalg.dk/fv/2022/F{k}.htm'
r = requests.get(url)
soup = bs(r.content,"html.parser")
div = soup.find('div', {"class": "kmd-voting-areas-list-items"})
for i, a in enumerate(div.find_all('a')):
if i == 0: continue
href = a.get('href')
assert int(href[1:4]) - 100 == OpstilNum
AfstemKod = href[8:10]
# if OpstilNum != int(href[5:7]):
# print(a.text, OpstilNum, AfstemKod)
url = f'https://www.kmdvalg.dk/fv/2022/f{href[1:-4]}{party}.htm'
print('yyy', OpstilNum, AfstemKod, a.text, df[(df['OpstilNum'] == OpstilNum) & (df['KommuneNum'] == KommuneNum) & (df['AfstemKod'] == AfstemKod)]['AfstemNav'].values)
# if a.text == 'Kastrup': continue
r = requests.get(url)
soup = bs(r.content, "html.parser")
div_candidates = soup.find_all('div', {"class": "table-like-cell col-xs-7 col-sm-6 col-md-6 col-lg-8"})
div_counts = soup.find_all('div', {"class": "table-like-cell col-xs-5 col-sm-6 col-md-6 col-lg-4 text-right roboto-bold"})
counts = [int(_.text) for _ in div_counts]
candidates = [_.text for _ in div_candidates]
most_votes = max(zip(counts, candidates))[1]
_ = list(reversed(sorted(zip(counts, candidates))))
rank = list(zip(*_))[1].index(candidate_of_interest) + 1
# most_votes = '1. {}, 2. {}'.format(_[-1][1], _[-2][1])
second_most_votes = _[1][1]
third_most_votes = _[2][1]
df.loc[(df['KommuneNum'] == KommuneNum) & (df['OpstilNum'] == OpstilNum) & (df['AfstemKod'] == AfstemKod), 'most_votes'] = most_votes
df.loc[(df['KommuneNum'] == KommuneNum) & (df['OpstilNum'] == OpstilNum) & (df['AfstemKod'] == AfstemKod), 'second_most_votes'] = second_most_votes
df.loc[(df['KommuneNum'] == KommuneNum) & (df['OpstilNum'] == OpstilNum) & (df['AfstemKod'] == AfstemKod), 'third_most_votes'] = third_most_votes
df.loc[(df['KommuneNum'] == KommuneNum) & (df['OpstilNum'] == OpstilNum) & (df['AfstemKod'] == AfstemKod), 'candidate_of_interest'] = rank
df.loc[(df['KommuneNum'] == KommuneNum) & (df['OpstilNum'] == OpstilNum) & (df['AfstemKod'] == AfstemKod), 'sum'] = sum(counts)
for candidate, count in zip(candidates, counts):
df.loc[(df['OpstilNum'] == OpstilNum) & (df['AfstemKod'] == AfstemKod), candidate] = 100 * int(count) / sum(counts)
if len(df[(df['OpstilNum'] == OpstilNum) & (df['AfstemKod'] == AfstemKod)]) == 0:
print(a.text, OpstilNum, AfstemKod)
print('Nobody with most votes due to redistricting')
print(df[df['most_votes'] == ''])
gdf = gpd.GeoDataFrame(df)
for column in ['sum', 'most_votes', 'second_most_votes', 'third_most_votes', 'candidate_of_interest'] + candidates:
print(column)
fig, ax = plt.subplots()
fig.set_size_inches(16/2, 9/2)
cmap = 'viridis'
if column in ('most_votes', 'second_most_votes', 'third_most_votes', 'candidate_of_interest'):
cmap = None
legend_kwds = None
else:
cmap = 'Blues'
if column == 'sum':
label = 'Antal personlige stemmer til Venstre'
else:
label = 'Procentdel af personlige stemmer til Venstre'
legend_kwds = {
'label': label,
'orientation': "vertical",
}
m = gdf.plot(
ax=ax,
column=column,
cmap=cmap,
# linewidth=0.1,
# edgecolor='black',
legend=True,
legend_kwds=legend_kwds,
)
ax.set_title({
'most_votes': 'Flest stemmer',
'second_most_votes': 'Næstflest stemmer',
'third_most_votes': 'Tredjeflest stemmer',
'candidate_of_interest': 'Placering',
'sum': 'Antal personlige stemmer til Venstre',
}.get(column, column))
ax.axis('off')
plt.savefig('{}.png'.format(column.replace(' ', '_')))
return
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment