Skip to content

Instantly share code, notes, and snippets.

@MaxMatti
Created February 17, 2021 13:18
Show Gist options
  • Save MaxMatti/f06c2eefc8faa56f331055a2f5384047 to your computer and use it in GitHub Desktop.
Save MaxMatti/f06c2eefc8faa56f331055a2f5384047 to your computer and use it in GitHub Desktop.
Source code used to generate age distribution of german citizens and federal parliament members 1950-2018
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Author: Max Staff <max.staff@gmx.de>
import csv
import matplotlib.animation as animation
import matplotlib.pyplot as pyplot
import numpy
import re
import xml.etree.ElementTree as ElementTree
population = "data/14_bevoelkerungsvorausberechnung_daten.csv"
bundestag = "data/MDB_STAMMDATEN.XML"
def main():
years = {}
for yr in range(1800, 2020):
years[str(yr)] = {}
with open(population) as file:
table = csv.reader(file, delimiter=";")
for row in table:
# only use actually measured data, not predictions and not the header
if str(row[0]) != "0":
continue
if str(row[1]) not in years:
years[str(row[1])] = {}
years[str(row[1])]["pop_" + str(row[2])] = [int("0" + x, 10) for x in row[4:]] + [0]
tree = ElementTree.parse(bundestag)
root = tree.getroot()
for mdb in root:
years_in_office = []
for section in mdb:
if section.tag == "BIOGRAFISCHE_ANGABEN":
for attr in section:
if attr.tag == "GESCHLECHT":
if attr.text != None and len(attr.text) > 0:
gender = attr.text[0]
else:
print("Unknown gender: \"" + str(attr.text) + "\"")
return
elif attr.tag == "GEBURTSDATUM":
if attr.text != None and len(attr.text) == 10 and attr.text[6:].isnumeric():
birth_year = int(attr.text[6:])
else:
print("Unknown birthdate: \"" + str(attr.text) + "\"")
return
elif section.tag == "WAHLPERIODEN":
for period in section:
for attr in period:
if attr.tag == "MDBWP_VON":
if attr.text != None and len(attr.text) == 10 and attr.text[6:].isnumeric():
start_year = int(attr.text[6:])
else:
print("Unknown startdate: \"" + str(attr.text) + "\"")
return
elif attr.tag == "MDBWP_BIS":
if attr.text != None and len(attr.text) == 10 and attr.text[6:].isnumeric():
end_year = int(attr.text[6:])
elif attr.text == None:
end_year = 2021
else:
print("Unknown enddate: \"" + str(attr.text) + "\" (using 2021)")
end_year = 2021
for year in range(start_year, end_year):
years_in_office.append(year)
for year in years_in_office:
if str(year) not in years:
years[str(year)] = {}
if ("bundestag_" + gender) not in years[str(year)]:
years[str(year)]["bundestag_m"] = [0] * 101
years[str(year)]["bundestag_w"] = [0] * 101
years[str(year)]["bundestag_" + gender] = [0] * 101
years[str(year)]["bundestag_" + gender][year - birth_year] += 1
keys = [key for key in list(years.keys()) if "pop_m" in years[key] and "pop_w" in years[key] and "bundestag_m" in years[key] and "bundestag_w" in years[key]]
fig, (ax1, ax2) = pyplot.subplots(1, 2, figsize=(12, 10), gridspec_kw={"width_ratios": [8, 7]})
pyplot.subplots_adjust(wspace=0, hspace=0)
pos = numpy.arange(101)
tickpos = range(5, 96, 5)
pop_m_bar = ax1.barh(y=pos, width=years[keys[0]]["pop_m"], height=1, left=0, align="edge", label="m")
pop_w_bar = ax1.barh(y=pos, width=[-x for x in years[keys[0]]["pop_w"]], height=1, left=0, align="edge", label="w")
fig.suptitle("Altersstruktur Deutschlands im Jahr " + keys[0])
ax1.set_title("Bevölkerung (in Tsd.)")
ax1.set_xlim([-800, 800])
ax1.set_ylim([0, 100])
ax1.set_yticks(tickpos)
ax1.set_yticklabels([str(x) for x in tickpos])
ax1.tick_params(axis="y", which="both", length=0)
ax1.set_xticks([-600,-400,-200,0,200,400,600])
ax1.set_xticklabels(["600","400","200","0","200","400","600"])
ax1.spines['top'].set_visible(False)
ax1.spines['right'].set_visible(False)
ax1.spines['bottom'].set_visible(False)
ax1.spines['left'].set_visible(False)
bundestag_m_bar = ax2.barh(y=pos, width=years[keys[0]]["bundestag_m"], height=1, left=0, align="edge", label="m")
bundestag_w_bar = ax2.barh(y=pos, width=[-x for x in years[keys[0]]["bundestag_w"]], height=1, left=0, align="edge", label="w")
ax2.set_title("Bundestag")
ax2.set_xlim([-30, 40])
ax2.set_ylim([0, 100])
ax2.set_yticks(tickpos)
ax2.set_yticklabels([str(x) for x in tickpos])
ax2.tick_params(axis="y", which="both", length=0)
ax2.set_xticks([-20,-10,0,10,20,30])
ax2.set_xticklabels(["20","10","0","10","20","30"])
ax2.yaxis.tick_right()
ax2.spines['top'].set_visible(False)
ax2.spines['right'].set_visible(False)
ax2.spines['bottom'].set_visible(False)
ax2.spines['left'].set_visible(False)
ax2.legend(loc="lower right")
for i in range(10):
ax1.axhspan(i * 10, 5 + i * 10, color='#eee', alpha=1, zorder=0)
ax2.axhspan(i * 10, 5 + i * 10, color='#eee', alpha=1, zorder=0)
ax1.annotate("Datenquelle Bevölkerung: Statistisches Bundesamt (Destatis), 2019\nDatenquelle Bundestag: Open Data Portal des Bundestages, 2021", (0,0), (0,-30), xycoords="axes fraction", textcoords="offset points", fontsize="8", va="top")
ax2.annotate("© Max Staff, 2021", (1,0), (0,-30), xycoords="axes fraction", textcoords="offset points", va="top", fontsize="8", horizontalalignment="right")
def animate(i):
frame = years[keys[i]]
fig.suptitle("Altersstruktur Deutschlands im Jahr " + keys[i])
for i, b in enumerate(pop_m_bar):
b.set_width(frame["pop_m"][i])
for i, b in enumerate(pop_w_bar):
b.set_width(-frame["pop_w"][i])
for i, b in enumerate(bundestag_m_bar):
b.set_width(frame["bundestag_m"][i])
for i, b in enumerate(bundestag_w_bar):
b.set_width(-frame["bundestag_w"][i])
ani = animation.FuncAnimation(fig, animate, repeat=False, blit=False, frames=len(keys), interval=100)
pyplot.show()
ani.save("output.mp4", writer=animation.FFMpegWriter(fps=4))
if __name__ == '__main__':
main()
@MaxMatti
Copy link
Author

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment