resulumit/cabinet_size.R

## cabinet_size.R
# r code for the graph at
# https://twitter.com/ResulUmit/status/1352218275699154945?s=20

# load the packages -------------------------------------------------------

library(dataverse)
library(rio)
library(tidyverse)

# specify which installation ----------------------------------------------

Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu")

# get the data from harvard dataverse -------------------------------------

cabs <- get_file("view_cabinet.tab", "doi:10.7910/DVN/Q6CVHX")
tmp <- tempfile(fileext = ".csv")
writeBin(as.vector(cabs), tmp)
df_cabs <- import(tmp)

# tidy the data -----------------------------------------------------------

df_ends <- df_cabs %>%
        filter(start_date > as.Date("1948-12-31") & start_date < as.Date("2020-01-01")) %>%
        mutate(end_year = as.numeric(format(start_date,'%Y'))) %>%
        select(previous_cabinet_id, end_year) %>%
        distinct()

df <- df_cabs %>%
      filter(start_date > as.Date("1948-12-31") & start_date < as.Date("2020-01-01"))  %>%
      mutate(start_year = as.numeric(format(start_date,'%Y'))) %>%
      group_by(cabinet_id, start_year) %>%
      summarise(parties = sum(cabinet_party)) %>%
      ungroup() %>%
      left_join(., df_ends, by = c("cabinet_id" = "previous_cabinet_id")) %>%
      mutate(end_year = replace_na(end_year, 2019)) %>%
      pivot_longer(cols = c("start_year", "end_year"), values_to = "year") %>%
      select(-name) %>%
      group_by(cabinet_id) %>%
      complete(parties, year = full_seq(year, 1)) %>%
      group_by(year) %>%
      summarise(cabinet_size = mean(parties))

# plot the data -----------------------------------------------------------

ggplot(df, aes(x = year, y = cabinet_size)) +
        geom_line(size = 1.5) +
        theme_minimal() +
        theme(axis.title = element_text(size = 14),
              axis.text = element_text(size = 14)) +
        labs(y = "Average number of parties in government, 1949 - 2019\n", x = "",
             caption = "Data: ParlGov")
	# r code for the graph at
	# https://twitter.com/ResulUmit/status/1352218275699154945?s=20

	# load the packages -------------------------------------------------------

	library(dataverse)
	library(rio)
	library(tidyverse)

	# specify which installation ----------------------------------------------

	Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu")

	# get the data from harvard dataverse -------------------------------------

	cabs <- get_file("view_cabinet.tab", "doi:10.7910/DVN/Q6CVHX")
	tmp <- tempfile(fileext = ".csv")
	writeBin(as.vector(cabs), tmp)
	df_cabs <- import(tmp)

	# tidy the data -----------------------------------------------------------

	df_ends <- df_cabs %>%
	filter(start_date > as.Date("1948-12-31") & start_date < as.Date("2020-01-01")) %>%
	mutate(end_year = as.numeric(format(start_date,'%Y'))) %>%
	select(previous_cabinet_id, end_year) %>%
	distinct()

	df <- df_cabs %>%
	filter(start_date > as.Date("1948-12-31") & start_date < as.Date("2020-01-01")) %>%
	mutate(start_year = as.numeric(format(start_date,'%Y'))) %>%
	group_by(cabinet_id, start_year) %>%
	summarise(parties = sum(cabinet_party)) %>%
	ungroup() %>%
	left_join(., df_ends, by = c("cabinet_id" = "previous_cabinet_id")) %>%
	mutate(end_year = replace_na(end_year, 2019)) %>%
	pivot_longer(cols = c("start_year", "end_year"), values_to = "year") %>%
	select(-name) %>%
	group_by(cabinet_id) %>%
	complete(parties, year = full_seq(year, 1)) %>%
	group_by(year) %>%
	summarise(cabinet_size = mean(parties))

	# plot the data -----------------------------------------------------------

	ggplot(df, aes(x = year, y = cabinet_size)) +
	geom_line(size = 1.5) +
	theme_minimal() +
	theme(axis.title = element_text(size = 14),
	axis.text = element_text(size = 14)) +
	labs(y = "Average number of parties in government, 1949 - 2019\n", x = "",
	caption = "Data: ParlGov")