tommycarstensen/fv2022_venstre_kbh.py

## fv2022_venstre_kbh.py
import geopandas as gpd
import requests
from bs4 import BeautifulSoup as bs
import pandas as pd

import matplotlib.pyplot as plt

def main():

    # https://dk.okfn.org/index.html%3Fp=212.html
    path = 'AFSTEMNINGSOMRAADE.shp'
    df = gpd.read_file(path)
    # df.drop('geometry', axis='columns', inplace=True)
    df = df[df['KommuneNav'].isin(['København', 'Frederiksberg', 'Tårnby', 'Dragør'])]
    # df = df[df['KommuneNav'].isin(['København', 'Frederiksberg', 'Dragør'])]

    df.loc[2435, 'AfstemKod'] = '58'  # 2022 Nørrebrohallen / 2015 5. midt (26)
    # Merge Dragør
    df.loc[2204, 'AfstemKod'] = '01'
    df.loc[2277, 'AfstemKod'] = '01'
    df.loc[2545, 'AfstemKod'] = '01'
    df.loc[2737, 'AfstemKod'] = '01'

    # Remove incorrect Kastrup geometry.
    for i in range(2099, 2273):
        try:
            if df.loc[i, 'AfstemNav'] != 'Kastrup':
                continue
            df.drop(i, inplace=True)
        except KeyError:
            continue

    party = 'V'

    candidates = set((
        'Alexandra Sasha',
        'René Bredal',
        'Claus Buch',
        'Anders Fausbøll',
        'Martin Geertsen',
        'Jan E. Jørgensen',
        'Michael Lange',
        'Anne Rasmussen',
        'Bo Sandroos',
        'Sven Aage Schlosrich',
        'Linea Søgaard-Lidell',
        'Søren Sørensen',
        ))

    candidate_of_interest = 'Jan E. Jørgensen'

    columns = df.columns
    for candidate in candidates:
        df[candidate] = 0
    df['most_votes'] = ''
    df['second_most_votes'] = ''
    df['third_most_votes'] = ''
    df['candidate_of_interest'] = ''
    df['sum'] = 0

    for KommuneNum, k in [
        ] + [
        (101, str(i)) for i in range(101, 109 + 1)  # København
        ] + [
        (147, str(i)) for i in range(110, 111 + 1)  # Frederiksberg
        ] + [
        (155, '112155'),  # Dragør
        (185, '112185'),  # Tårnby
        ]:
        OpstilNum = int(k[:3]) - 100
        print()
        print('OpstilNum', OpstilNum)
        print(df[(df['OpstilNum'] == OpstilNum) & (df['KommuneNum'] == KommuneNum)][columns].drop('geometry', axis='columns').to_string())
        url = f'https://www.kmdvalg.dk/fv/2022/F{k}.htm'
        r = requests.get(url)
        soup = bs(r.content,"html.parser")
        div = soup.find('div', {"class": "kmd-voting-areas-list-items"})
        for i, a in enumerate(div.find_all('a')):
            if i == 0: continue
            href = a.get('href')
            assert int(href[1:4]) - 100 == OpstilNum
            AfstemKod = href[8:10]
            # if OpstilNum != int(href[5:7]):
            #     print(a.text, OpstilNum, AfstemKod)
            url = f'https://www.kmdvalg.dk/fv/2022/f{href[1:-4]}{party}.htm'
            print('yyy', OpstilNum, AfstemKod, a.text, df[(df['OpstilNum'] == OpstilNum) & (df['KommuneNum'] == KommuneNum) & (df['AfstemKod'] == AfstemKod)]['AfstemNav'].values)
            # if a.text == 'Kastrup': continue
            r = requests.get(url)
            soup = bs(r.content, "html.parser")
            div_candidates = soup.find_all('div', {"class": "table-like-cell col-xs-7 col-sm-6 col-md-6 col-lg-8"})
            div_counts = soup.find_all('div', {"class": "table-like-cell col-xs-5 col-sm-6 col-md-6 col-lg-4 text-right roboto-bold"})
            counts = [int(_.text) for _ in div_counts]
            candidates = [_.text for _ in div_candidates]
            most_votes = max(zip(counts, candidates))[1]
            _ = list(reversed(sorted(zip(counts, candidates))))
            rank = list(zip(*_))[1].index(candidate_of_interest) + 1
            # most_votes = '1. {}, 2. {}'.format(_[-1][1], _[-2][1])
            second_most_votes = _[1][1]
            third_most_votes = _[2][1]
            df.loc[(df['KommuneNum'] == KommuneNum) & (df['OpstilNum'] == OpstilNum) & (df['AfstemKod'] == AfstemKod), 'most_votes'] = most_votes
            df.loc[(df['KommuneNum'] == KommuneNum) & (df['OpstilNum'] == OpstilNum) & (df['AfstemKod'] == AfstemKod), 'second_most_votes'] = second_most_votes
            df.loc[(df['KommuneNum'] == KommuneNum) & (df['OpstilNum'] == OpstilNum) & (df['AfstemKod'] == AfstemKod), 'third_most_votes'] = third_most_votes
            df.loc[(df['KommuneNum'] == KommuneNum) & (df['OpstilNum'] == OpstilNum) & (df['AfstemKod'] == AfstemKod), 'candidate_of_interest'] = rank
            df.loc[(df['KommuneNum'] == KommuneNum) & (df['OpstilNum'] == OpstilNum) & (df['AfstemKod'] == AfstemKod), 'sum'] = sum(counts)
            for candidate, count in zip(candidates, counts):
                df.loc[(df['OpstilNum'] == OpstilNum) & (df['AfstemKod'] == AfstemKod), candidate] = 100 * int(count) / sum(counts)
            if len(df[(df['OpstilNum'] == OpstilNum) & (df['AfstemKod'] == AfstemKod)]) == 0:
                print(a.text, OpstilNum, AfstemKod)

    print('Nobody with most votes due to redistricting')
    print(df[df['most_votes'] == ''])

    gdf = gpd.GeoDataFrame(df)
    for column in ['sum', 'most_votes', 'second_most_votes', 'third_most_votes', 'candidate_of_interest'] + candidates:

        print(column)

        fig, ax = plt.subplots()
        fig.set_size_inches(16/2, 9/2)

        cmap = 'viridis'
        if column in ('most_votes', 'second_most_votes', 'third_most_votes', 'candidate_of_interest'):
            cmap = None
            legend_kwds = None
        else:
            cmap = 'Blues'
            if column == 'sum':
                label = 'Antal personlige stemmer til Venstre'
            else:
                label = 'Procentdel af personlige stemmer til Venstre'
            legend_kwds = {
                'label': label,
                'orientation': "vertical",
                }
        m = gdf.plot(
            ax=ax,
            column=column,
            cmap=cmap,
            # linewidth=0.1,
            # edgecolor='black',
            legend=True,
            legend_kwds=legend_kwds,
            )
        ax.set_title({
            'most_votes': 'Flest stemmer',
            'second_most_votes': 'Næstflest stemmer',
            'third_most_votes': 'Tredjeflest stemmer',
            'candidate_of_interest': 'Placering',
            'sum': 'Antal personlige stemmer til Venstre',
            }.get(column, column))
        ax.axis('off')
        plt.savefig('{}.png'.format(column.replace(' ', '_')))

    return


if __name__ == '__main__':
    main()
	import geopandas as gpd
	import requests
	from bs4 import BeautifulSoup as bs
	import pandas as pd

	import matplotlib.pyplot as plt

	def main():

	# https://dk.okfn.org/index.html%3Fp=212.html
	path = 'AFSTEMNINGSOMRAADE.shp'
	df = gpd.read_file(path)
	# df.drop('geometry', axis='columns', inplace=True)
	df = df[df['KommuneNav'].isin(['København', 'Frederiksberg', 'Tårnby', 'Dragør'])]
	# df = df[df['KommuneNav'].isin(['København', 'Frederiksberg', 'Dragør'])]

	df.loc[2435, 'AfstemKod'] = '58' # 2022 Nørrebrohallen / 2015 5. midt (26)
	# Merge Dragør
	df.loc[2204, 'AfstemKod'] = '01'
	df.loc[2277, 'AfstemKod'] = '01'
	df.loc[2545, 'AfstemKod'] = '01'
	df.loc[2737, 'AfstemKod'] = '01'

	# Remove incorrect Kastrup geometry.
	for i in range(2099, 2273):
	try:
	if df.loc[i, 'AfstemNav'] != 'Kastrup':
	continue
	df.drop(i, inplace=True)
	except KeyError:
	continue

	party = 'V'

	candidates = set((
	'Alexandra Sasha',
	'René Bredal',
	'Claus Buch',
	'Anders Fausbøll',
	'Martin Geertsen',
	'Jan E. Jørgensen',
	'Michael Lange',
	'Anne Rasmussen',
	'Bo Sandroos',
	'Sven Aage Schlosrich',
	'Linea Søgaard-Lidell',
	'Søren Sørensen',
	))

	candidate_of_interest = 'Jan E. Jørgensen'

	columns = df.columns
	for candidate in candidates:
	df[candidate] = 0
	df['most_votes'] = ''
	df['second_most_votes'] = ''
	df['third_most_votes'] = ''
	df['candidate_of_interest'] = ''
	df['sum'] = 0

	for KommuneNum, k in [
	] + [
	(101, str(i)) for i in range(101, 109 + 1) # København
	] + [
	(147, str(i)) for i in range(110, 111 + 1) # Frederiksberg
	] + [
	(155, '112155'), # Dragør
	(185, '112185'), # Tårnby
	]:
	OpstilNum = int(k[:3]) - 100
	print()
	print('OpstilNum', OpstilNum)
	print(df[(df['OpstilNum'] == OpstilNum) & (df['KommuneNum'] == KommuneNum)][columns].drop('geometry', axis='columns').to_string())
	url = f'https://www.kmdvalg.dk/fv/2022/F{k}.htm'
	r = requests.get(url)
	soup = bs(r.content,"html.parser")
	div = soup.find('div', {"class": "kmd-voting-areas-list-items"})
	for i, a in enumerate(div.find_all('a')):
	if i == 0: continue
	href = a.get('href')
	assert int(href[1:4]) - 100 == OpstilNum
	AfstemKod = href[8:10]
	# if OpstilNum != int(href[5:7]):
	# print(a.text, OpstilNum, AfstemKod)
	url = f'https://www.kmdvalg.dk/fv/2022/f{href[1:-4]}{party}.htm'
	print('yyy', OpstilNum, AfstemKod, a.text, df[(df['OpstilNum'] == OpstilNum) & (df['KommuneNum'] == KommuneNum) & (df['AfstemKod'] == AfstemKod)]['AfstemNav'].values)
	# if a.text == 'Kastrup': continue
	r = requests.get(url)
	soup = bs(r.content, "html.parser")
	div_candidates = soup.find_all('div', {"class": "table-like-cell col-xs-7 col-sm-6 col-md-6 col-lg-8"})
	div_counts = soup.find_all('div', {"class": "table-like-cell col-xs-5 col-sm-6 col-md-6 col-lg-4 text-right roboto-bold"})
	counts = [int(_.text) for _ in div_counts]
	candidates = [_.text for _ in div_candidates]
	most_votes = max(zip(counts, candidates))[1]
	_ = list(reversed(sorted(zip(counts, candidates))))
	rank = list(zip(*_))[1].index(candidate_of_interest) + 1
	# most_votes = '1. {}, 2. {}'.format(_[-1][1], _[-2][1])
	second_most_votes = _[1][1]
	third_most_votes = _[2][1]
	df.loc[(df['KommuneNum'] == KommuneNum) & (df['OpstilNum'] == OpstilNum) & (df['AfstemKod'] == AfstemKod), 'most_votes'] = most_votes
	df.loc[(df['KommuneNum'] == KommuneNum) & (df['OpstilNum'] == OpstilNum) & (df['AfstemKod'] == AfstemKod), 'second_most_votes'] = second_most_votes
	df.loc[(df['KommuneNum'] == KommuneNum) & (df['OpstilNum'] == OpstilNum) & (df['AfstemKod'] == AfstemKod), 'third_most_votes'] = third_most_votes
	df.loc[(df['KommuneNum'] == KommuneNum) & (df['OpstilNum'] == OpstilNum) & (df['AfstemKod'] == AfstemKod), 'candidate_of_interest'] = rank
	df.loc[(df['KommuneNum'] == KommuneNum) & (df['OpstilNum'] == OpstilNum) & (df['AfstemKod'] == AfstemKod), 'sum'] = sum(counts)
	for candidate, count in zip(candidates, counts):
	df.loc[(df['OpstilNum'] == OpstilNum) & (df['AfstemKod'] == AfstemKod), candidate] = 100 * int(count) / sum(counts)
	if len(df[(df['OpstilNum'] == OpstilNum) & (df['AfstemKod'] == AfstemKod)]) == 0:
	print(a.text, OpstilNum, AfstemKod)

	print('Nobody with most votes due to redistricting')
	print(df[df['most_votes'] == ''])

	gdf = gpd.GeoDataFrame(df)
	for column in ['sum', 'most_votes', 'second_most_votes', 'third_most_votes', 'candidate_of_interest'] + candidates:

	print(column)

	fig, ax = plt.subplots()
	fig.set_size_inches(16/2, 9/2)

	cmap = 'viridis'
	if column in ('most_votes', 'second_most_votes', 'third_most_votes', 'candidate_of_interest'):
	cmap = None
	legend_kwds = None
	else:
	cmap = 'Blues'
	if column == 'sum':
	label = 'Antal personlige stemmer til Venstre'
	else:
	label = 'Procentdel af personlige stemmer til Venstre'
	legend_kwds = {
	'label': label,
	'orientation': "vertical",
	}
	m = gdf.plot(
	ax=ax,
	column=column,
	cmap=cmap,
	# linewidth=0.1,
	# edgecolor='black',
	legend=True,
	legend_kwds=legend_kwds,
	)
	ax.set_title({
	'most_votes': 'Flest stemmer',
	'second_most_votes': 'Næstflest stemmer',
	'third_most_votes': 'Tredjeflest stemmer',
	'candidate_of_interest': 'Placering',
	'sum': 'Antal personlige stemmer til Venstre',
	}.get(column, column))
	ax.axis('off')
	plt.savefig('{}.png'.format(column.replace(' ', '_')))

	return


	if __name__ == '__main__':
	main()