/SeniorInnen.R Secret

## SeniorInnen.R
# Copyright (C) 2015  Salim Brüggemann

# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# To get a copy of the GNU General Public License see <http://www.gnu.org/licenses/>.

###############################################################################
# Blogeintrag im UZH-Seminar "Politischer Datenjournalismus" 2015: http://pwipdm.uzh.ch/wordpress/?p=6058
# Autor: Salim Brüggemann, 08-915-126
###############################################################################

remove(list = ls(all = TRUE))

setwd("~/Dokumente/Ausbildung/Studium UZH/Master/2. Semester/Politischer Datenjournalismus/Blogeinträge/1. Blogeintrag")
library(foreign)
library(stats)
library(arm)
library(survey)
library(aod)
library(plotly)


# BFS-Daten zum Alter der ständigen Wohnbevölkerung mit CH-Pass zwischen 1971 und 2010 (jeweils am 31. Dezember) einlesen
data_age_1971_2010_total <- read.csv2("Ständige CH-Wohnbevölkerung 1971-2010 Total.csv")
data_age_1971_2010_f <- read.csv2("Ständige CH-Wohnbevölkerung 1971-2010 Frauen.csv")
data_age_1971_2010_m <- read.csv2("Ständige CH-Wohnbevölkerung 1971-2010 Männer.csv")

# BFS-Daten zum Alter der ständigen Wohnbevölkerung mit CH-Pass zwischen 2010 und 2014 (jeweils am 31. Dezember) einlesen
# Ab 2010 hat die Berechnungsgrundlage geändert (neu STATPOP, alt ESPOP), die Zahlen fürs Jahr 2010 stimmen daher nicht überein zwischen den beiden Datensätzen
data_age_2010_2014_total <- read.csv2("Ständige CH-Wohnbevölkerung 2010-2014 Total.csv")
data_age_2010_2014_f <- read.csv2("Ständige CH-Wohnbevölkerung 2010-2014 Frauen.csv")
data_age_2010_2014_m <- read.csv2("Ständige CH-Wohnbevölkerung 2010-2014 Männer.csv")


### Daten der beiden Zeitperioden zusammenführen:
# höchste Alterskategorie dem 1971-2010-Datensatz anpassen
for ( i in 2: length(data_age_2010_2014_total[101, ]) ) { data_age_2010_2014_total[101, i] <- sum(data_age_2010_2014_total[101, i], data_age_2010_2014_total[102, i]) }
for ( i in 2: length(data_age_2010_2014_m[101, ]) ) { data_age_2010_2014_m[101, i] <- sum(data_age_2010_2014_m[101, i], data_age_2010_2014_m[102, i]) }
for ( i in 2: length(data_age_2010_2014_f[101, ]) ) { data_age_2010_2014_f[101, i] <- sum(data_age_2010_2014_f[101, i], data_age_2010_2014_f[102, i]) }

# letzte Zeile löschen
data_age_2010_2014_total <- data_age_2010_2014_total[-c(102), ]
data_age_2010_2014_m <- data_age_2010_2014_m[-c(102), ]
data_age_2010_2014_f <- data_age_2010_2014_f[-c(102), ]

# letztes factor level anpassen
levels(data_age_2010_2014_total$Alter)[match("99", levels(data_age_2010_2014_total$Alter))] <- "99 und mehr"
levels(data_age_2010_2014_m$Alter)[match("99", levels(data_age_2010_2014_m$Alter))] <- "99 und mehr"
levels(data_age_2010_2014_f$Alter)[match("99", levels(data_age_2010_2014_f$Alter))] <- "99 und mehr"

# ungenutzte factor levels ("100 und mehr") entfernen
data_age_2010_2014_total$Alter <- factor(data_age_2010_2014_total$Alter)
data_age_2010_2014_m$Alter <- factor(data_age_2010_2014_m$Alter)
data_age_2010_2014_f$Alter <- factor(data_age_2010_2014_f$Alter)

# doppeltes Jahr 2010 in älterem Datensatz (ESPOP-basiert) löschen
data_age_1971_2010_total$X2010 <- NULL
data_age_1971_2010_m$X2010 <- NULL
data_age_1971_2010_f$X2010 <- NULL

# Synthese-Datensatz generieren
data_age_1971_2014_total <- cbind(data_age_1971_2010_total, data_age_2010_2014_total[, 2:length(data_age_2010_2014_total[1, ])])
data_age_1971_2014_m <- cbind(data_age_1971_2010_m, data_age_2010_2014_m[, 2:length(data_age_2010_2014_m[1, ])])
data_age_1971_2014_f <- cbind(data_age_1971_2010_f, data_age_2010_2014_f[, 2:length(data_age_2010_2014_f[1, ])])

# nicht mehr benötigte Datensätze entfernen (Speicher freigeben)
rm(data_age_1971_2010_total,
   data_age_1971_2010_m,
   data_age_1971_2010_f,
   data_age_2010_2014_total,
   data_age_2010_2014_m,
   data_age_2010_2014_f)

# neues dataframe erstellen mit Anzahl stimmberechtigter BürgerInnen pro Jahr und Durchschnittsalter (arithmetisches Mittel und Median) der Stimmberechtigten (dieses fällt hier systematisch tiefer aus als das tatsächliche, weil alle über 98-jährigen Personen zu einer Gruppe zusammengefasst werden ("99 und mehr"); die Abweichung fällt mit steigender Jahreszahl höher aus, da immer mehr Leute das 99. Lebensjahr überschreiten; sie fällt allerdings insgesamt gering aus und dürfte sich auch im Jahr 2014 noch im Bereich der 3. Nachkommastelle bewegen); mangels besserer Datenbasis (z. B. Stimmregisterdaten) weichen die hier berechneten Werte von den tatsächlichen Werten ab, weil einerseits entmündigte BürgerInnen enthalten sind, andererseits die AuslandschweizerInnen fehlen
data_eligibility_1971_2014 <- data_age_1971_2014_total[1:9, 2:length(data_age_1971_2014_total[1, ])]
rownames(data_eligibility_1971_2014) <- c("total eligible voters",
                                          "female eligible voters",
                                          "male eligible voters",
                                          "mean age of total eligible voters",
                                          "mean age of female eligible voters",
                                          "mean age of male eligible voters",
                                          "median age of total eligible voters",
                                          "median age of female eligible voters",
                                          "median age of male eligible voters")

for ( i in 2:length(data_age_1971_2014_total[1, ]) ) {

  lower_age_limit <- ifelse ( colnames(data_age_1971_2014_total)[i] < "X1991", 20, 18 )
  data_eligibility_1971_2014[1, i - 1] <- sum(data_age_1971_2014_total[, i][which(data_age_1971_2014_total$Alter == lower_age_limit):length(data_age_1971_2014_total$Alter)])
  valid_adult_age_range <- as.numeric(as.character(data_age_1971_2014_total$Alter[which(data_age_1971_2014_total$Alter == lower_age_limit):(length(data_age_1971_2014_total[, 1]) - 1)]))
  year_sums <- data_age_1971_2014_total[, i][which(data_age_1971_2014_total$Alter == lower_age_limit):(length(data_age_1971_2014_total[, 1]) - 1)] * valid_adult_age_range
  year_sums <- append(year_sums, 99 * data_age_1971_2014_total[length(data_age_1971_2014_total[, 1]), i])
  data_eligibility_1971_2014[4, i - 1] <- sum(year_sums)/data_eligibility_1971_2014[1, i - 1]
  median_eligible_voter <- (data_eligibility_1971_2014[1, i - 1]) / 2

  counter <- 0
  count_sum <- 0
  while ( count_sum < median_eligible_voter ) {
    count_sum <- count_sum + data_age_1971_2014_total[, i][which(data_age_1971_2014_total$Alter == lower_age_limit) + counter]
    counter <- counter + 1
  }
  odd_up <- ifelse ( count_sum < median_eligible_voter + 1, 0.5, 0 )
  data_eligibility_1971_2014[7, i - 1] <- ifelse ( data_eligibility_1971_2014[1, i - 1] %% 2 == 0, lower_age_limit + counter + odd_up, lower_age_limit + counter)
}

for ( i in 2:length(data_age_1971_2014_f[1, ]) ) {

  lower_age_limit <- ifelse ( colnames(data_age_1971_2014_f)[i] < "X1991", 20, 18 )
  data_eligibility_1971_2014[2, i - 1] <- sum(data_age_1971_2014_f[, i][which(data_age_1971_2014_f$Alter == lower_age_limit):length(data_age_1971_2014_f$Alter)])
  valid_adult_age_range <- as.numeric(as.character(data_age_1971_2014_f$Alter[which(data_age_1971_2014_f$Alter == lower_age_limit):(length(data_age_1971_2014_f[, 1]) - 1)]))
  year_sums <- data_age_1971_2014_f[, i][which(data_age_1971_2014_f$Alter == lower_age_limit):(length(data_age_1971_2014_f[, 1]) - 1)] * valid_adult_age_range
  year_sums <- append(year_sums, 99 * data_age_1971_2014_f[length(data_age_1971_2014_f[, 1]), i])
  data_eligibility_1971_2014[5, i - 1] <- sum(year_sums)/data_eligibility_1971_2014[2, i - 1]
  median_eligible_voter <- (data_eligibility_1971_2014[2, i - 1]) / 2

  counter <- 0
  count_sum <- 0
  while ( count_sum < median_eligible_voter ) {
    count_sum <- count_sum + data_age_1971_2014_f[, i][which(data_age_1971_2014_f$Alter == lower_age_limit) + counter]
    counter <- counter + 1
  }
  odd_up <- ifelse ( count_sum < median_eligible_voter + 1, 0.5, 0 )
  data_eligibility_1971_2014[8, i - 1] <- ifelse ( data_eligibility_1971_2014[2, i - 1] %% 2 == 0, lower_age_limit + counter + odd_up, lower_age_limit + counter)
}

for ( i in 2:length(data_age_1971_2014_m[1, ]) ) {

  lower_age_limit <- ifelse ( colnames(data_age_1971_2014_m)[i] < "X1991", 20, 18 )
  data_eligibility_1971_2014[3, i - 1] <- sum(data_age_1971_2014_m[, i][which(data_age_1971_2014_m$Alter == lower_age_limit):length(data_age_1971_2014_m$Alter)])
  valid_adult_age_range <- as.numeric(as.character(data_age_1971_2014_m$Alter[which(data_age_1971_2014_m$Alter == lower_age_limit):(length(data_age_1971_2014_m[, 1]) - 1)]))
  year_sums <- data_age_1971_2014_m[, i][which(data_age_1971_2014_m$Alter == lower_age_limit):(length(data_age_1971_2014_m[, 1]) - 1)] * valid_adult_age_range
  year_sums <- append(year_sums, 99 * data_age_1971_2014_m[length(data_age_1971_2014_m[, 1]), i])
  data_eligibility_1971_2014[6, i - 1] <- sum(year_sums)/data_eligibility_1971_2014[3, i - 1]
  median_eligible_voter <- (data_eligibility_1971_2014[3, i - 1]) / 2

  counter <- 0
  count_sum <- 0
  while ( count_sum < median_eligible_voter ) {
    count_sum <- count_sum + data_age_1971_2014_m[, i][which(data_age_1971_2014_m$Alter == lower_age_limit) + counter]
    counter <- counter + 1
  }
  odd_up <- ifelse ( count_sum < median_eligible_voter + 1, 0.5, 0 )
  data_eligibility_1971_2014[9, i - 1] <- ifelse ( data_eligibility_1971_2014[3, i - 1] %% 2 == 0, lower_age_limit + counter + odd_up, lower_age_limit + counter)
}

# neue matrix erstellen mit Anzahl Stimmberechtigter je Alterskategorie
data_age_category_1971_2014 <- matrix(nrow = 44,
                                      ncol = 8,
                                      dimnames = list(seq(1971, 2014),
                                                      c("18-24", "25-34", "35-44", "45-54", "55-64", "65-74", "75+", "total")))

for ( i in 2:length(data_age_1971_2014_total[1, ]) ) {

  cat_18_24 <- 0
  cat_25_34 <- 0
  cat_35_44 <- 0
  cat_45_54 <- 0
  cat_55_64 <- 0
  cat_65_74 <- 0
  cat_75plus <- 0

  for ( j in 18:24 ) {
    cat_18_24 <- cat_18_24 + data_age_1971_2014_total[j + 2, i]
  }
  for ( j in 25:34 ) {
    cat_25_34 <- cat_25_34 + data_age_1971_2014_total[j + 2, i]
  }
  for ( j in 35:44 ) {
    cat_35_44 <- cat_35_44 + data_age_1971_2014_total[j + 2, i]
  }
  for ( j in 45:54 ) {
    cat_45_54 <- cat_45_54 + data_age_1971_2014_total[j + 2, i]
  }
  for ( j in 55:64 ) {
    cat_55_64 <- cat_55_64 + data_age_1971_2014_total[j + 2, i]
  }
  for ( j in 65:74 ) {
    cat_65_74 <- cat_65_74 + data_age_1971_2014_total[j + 2, i]
  }
  for ( j in 75:99 ) {
    cat_75plus <- cat_75plus + data_age_1971_2014_total[j + 2, i]
  }
  data_age_category_1971_2014[as.character(i + 1969), "18-24"] <- cat_18_24
  data_age_category_1971_2014[as.character(i + 1969), "25-34"] <- cat_25_34
  data_age_category_1971_2014[as.character(i + 1969), "35-44"] <- cat_35_44
  data_age_category_1971_2014[as.character(i + 1969), "45-54"] <- cat_45_54
  data_age_category_1971_2014[as.character(i + 1969), "55-64"] <- cat_55_64
  data_age_category_1971_2014[as.character(i + 1969), "65-74"] <- cat_65_74
  data_age_category_1971_2014[as.character(i + 1969), "75+"] <- cat_75plus
  data_age_category_1971_2014[as.character(i + 1969), "total"] <- sum(cat_18_24, cat_25_34, cat_35_44, cat_45_54, cat_55_64, cat_65_74, cat_75plus)
}


### Plotten (simpel)
# Durchschnittsalter (arithmetisches Mittel) der Stimmberechtigten über die Zeit plotten; Knick 2010 ist ein Erhebungsartefakt (ESPOP vs. STATPOP, siehe http://www.bfs.admin.ch/bfs/portal/de/index/infothek/erhebungen__quellen/blank/blank/statpop/02.html); Knick 1991 ist auf die Absenkung des Stimmrechtsalters von 20 auf 18 Jahre zurückzuführen.
plot(1971:2014, data_eligibility_1971_2014[4, ], type = "o", ylim = c(44, 52), xlab = "Jahr", ylab = "Durchschnittsalter der Stimmberechtigten")
points(1971:2014, data_eligibility_1971_2014[5, ], type = "o", col = "deeppink")
points(1971:2014, data_eligibility_1971_2014[6, ], type = "o", col = "dodgerblue")

# Medianalter der Stimmberechtigten über die Zeit plotten; Knick 1991 ist auf die Absenkung des Stimmrechtsalters von 20 auf 18 Jahre zurückzuführen.
plot(1971:2014, data_eligibility_1971_2014[7, ], type = "o", ylim = c(44, 52), xlab = "Jahr", ylab = "Medianalter der Stimmberechtigten")
points(1971:2014, data_eligibility_1971_2014[8, ], type = "o", col = "deeppink")
points(1971:2014, data_eligibility_1971_2014[9, ], type = "o", col = "dodgerblue")


### Plotten mittels Plotly
# "data_eligibility_1971_2014" neu organisieren in "data_eligibility_1971_2014_v2"
data_eligibility_1971_2014_v2 <- data.frame("year" = NA,
                                            "eligible_voters_total" = NA,
                                            "eligible_voters_female" = NA,
                                            "eligible_voters_male" = NA,
                                            "eligible_voters_total_mean_age" = NA,
                                            "eligible_voters_female_mean_age" = NA,
                                            "eligible_voters_male_mean_age" = NA,
                                            "eligible_voters_total_median_age" = NA,
                                            "eligible_voters_female_median_age" = NA,
                                            "eligible_voters_male_median_age" = NA)

for ( i in 1:length(data_eligibility_1971_2014[1, ]) ) {
  data_eligibility_1971_2014_v2[i, ] <- c(i + 1970, data_eligibility_1971_2014[, i])
}

# Mittleres Alter (arithmetisches Mittel und Median) der Stimmberechtigten über die Zeit plotten; Knick 2010 ist ein Erhebungsartefakt (ESPOP vs. STATPOP, siehe http://www.bfs.admin.ch/bfs/portal/de/index/infothek/erhebungen__quellen/blank/blank/statpop/02.html); Knick 1991 ist auf die Absenkung des Stimmrechtsalters von 20 auf 18 Jahre zurückzuführen.
plot_eligibility <- plot_ly(data = data_eligibility_1971_2014_v2,
                            type = "scatter",
                            mode = "lines",
                            x = year,
                            y = round(eligible_voters_total_mean_age, digits = 2),
                            name = "Total (⌀)",
                            line = list(color = "black"))

plot_eligibility <- add_trace(data = data_eligibility_1971_2014_v2,
                              y = round(eligible_voters_female_mean_age, digits = 2),
                              name = "Frauen (⌀)",
                              line = list(color = "deeppink",
                                          dash = 2))

plot_eligibility <- add_trace(data = data_eligibility_1971_2014_v2,
                              y = round(eligible_voters_male_mean_age,
                                        digits = 2),
                              name = "Männer (⌀)",
                              line = list(color = "dodgerblue",
                                          dash = 2))

plot_eligibility <- add_trace(data = data_eligibility_1971_2014_v2,
                              mode = "lines+markers",
                              y = eligible_voters_total_median_age,
                              name = "Total (Median)",
                              line = list(color = "black"),
                              visible = "legendonly")

plot_eligibility <- add_trace(data = data_eligibility_1971_2014_v2,
                              mode = "lines+markers",
                              y = eligible_voters_female_median_age,
                              name = "Frauen (Median)",
                              line = list(color = "deeppink",
                                          dash = 2),
                              visible = "legendonly")

plot_eligibility <- add_trace(data = data_eligibility_1971_2014_v2,
                              mode = "lines+markers",
                              y = eligible_voters_male_median_age,
                              name = "Männer (Median)",
                              line = list(color = "dodgerblue",
                                          dash = 2),
                              visible = "legendonly")

plot_eligibility <- add_trace(data = data_eligibility_1971_2014_v2,
                              x = c(1991, 1991),
                              y = c(min(eligible_voters_male_mean_age), 50.35),
                              name = "Stimmrechtsalter 18",
                              line = list(color = "yellowgreen",
                                          width = 3),
                              opacity = 0.5, hoverinfo = "none",
                              showlegend = FALSE)

plot_eligibility <- add_trace(x = 1991, y = 50.4,
                              text = "Stimmrechtsalter 18",
                              textfont = list(color = "yellowgreen"),
                              hoverinfo = "none",
                              mode = "text",
                              textposition = "top middle",
                              showlegend = FALSE)

plot_eligibility <- layout(p = plot_eligibility,
                           title = "Mittleres Alter der Stimmberechtigten",
                           xaxis = list(title = "Jahr",
                                        fixedrange = TRUE),
                           yaxis = list(title = "Alter",
                                        fixedrange = TRUE),
                           margin = list(l = 55,
                                         r = 55,
                                         t = 60,
                                         b = 65,
                                         pad = 0,
                                         autoexpand = TRUE))

plot_eligibility <- config(p = plot_eligibility,
                           displaylogo = FALSE,
                           showLink = FALSE,
                           displayModeBar = FALSE)

plot_eligibility
#plotly_POST(plot_eligibility, filename = "DDJ-Blogeintrag 1/Mittleres Alter der Stimmberechtigten", fileopt = "overwrite", sharing = "public")


### Wahlbeteiligung sowie Wähleranteile der verschiedenen Parteien in Abhängigkeit der Alterskategorie mittels Regression schätzen (GLM)
# Kumulierten SELECTS-Datensatz der Jahre 1971-2011 einlesen
data_selects_1971_2011 <- read.spss("~/Dokumente/Ausbildung/Studium UZH/Master/2. Semester/Politischer Datenjournalismus/Blogbeiträge/SELECTS-Daten/1971-2011/Selects_1971_2011.sav",
                                    to.data.frame = TRUE)

# Alterskategorien bilden mit "45-54" als Referenzkategorie
data_selects_1971_2011$age_category <- factor(data_selects_1971_2011$age, levels = c("45-54", "18-24", "25-34", "35-44", "55-64", "65-74", "75+"))
data_selects_1971_2011$age_category[data_selects_1971_2011$age < 25] <- "18-24"
data_selects_1971_2011$age_category[data_selects_1971_2011$age > 24 & data_selects_1971_2011$age < 35] <- "25-34"
data_selects_1971_2011$age_category[data_selects_1971_2011$age > 34 & data_selects_1971_2011$age < 45] <- "35-44"
data_selects_1971_2011$age_category[data_selects_1971_2011$age > 44 & data_selects_1971_2011$age < 55] <- "45-54"
data_selects_1971_2011$age_category[data_selects_1971_2011$age > 54 & data_selects_1971_2011$age < 65] <- "55-64"
data_selects_1971_2011$age_category[data_selects_1971_2011$age > 64 & data_selects_1971_2011$age < 75] <- "65-74"
data_selects_1971_2011$age_category[data_selects_1971_2011$age > 74] <- "75+"

# Dummy-Variablen für die Wahl der relevanten Parteien bilden
data_selects_1971_2011$dummy_SVP <- ifelse(data_selects_1971_2011$vdn1b == "svp/udc", 1, 0)
data_selects_1971_2011$dummy_Lega <- ifelse(data_selects_1971_2011$vdn1b == "lega", 1, 0)
data_selects_1971_2011$dummy_SD <- ifelse(data_selects_1971_2011$vdn1b == "sd/ds", 1, 0)
data_selects_1971_2011$dummy_EDU <- ifelse(data_selects_1971_2011$vdn1b == "edu/udf", 1, 0)
data_selects_1971_2011$dummy_CVP <- ifelse(data_selects_1971_2011$vdn1b == "cvp/pdc", 1, 0)
data_selects_1971_2011$dummy_EVP <- ifelse(data_selects_1971_2011$vdn1b == "evp/pep", 1, 0)
data_selects_1971_2011$dummy_CSP <- ifelse(data_selects_1971_2011$vdn1b == "csp/pcs", 1, 0)
data_selects_1971_2011$dummy_SP <- ifelse(data_selects_1971_2011$vdn1b == "sps/pss", 1, 0)
data_selects_1971_2011$dummy_PdA <- ifelse(data_selects_1971_2011$vdn1b == "pda/pdt", 1, 0)
data_selects_1971_2011$dummy_SolidaritéS <- ifelse(data_selects_1971_2011$vdn1b == "sol.", 1, 0)
data_selects_1971_2011$dummy_PSA <- ifelse(data_selects_1971_2011$vdn1b == "psa (psu)", 1, 0)
data_selects_1971_2011$dummy_FGA <- ifelse(data_selects_1971_2011$vdn1b == "fga/avf", 1, 0)
data_selects_1971_2011$dummy_GPS <- ifelse(data_selects_1971_2011$vdn1b == "gps/pes", 1, 0)
data_selects_1971_2011$dummy_glp <- ifelse(data_selects_1971_2011$vdn1b == "GLP/Vert'libéraux", 1, 0)
data_selects_1971_2011$dummy_LdU <- ifelse(data_selects_1971_2011$vdn1b == "ldu/adi", 1, 0)
data_selects_1971_2011$dummy_BDP <- ifelse(data_selects_1971_2011$vdn1b == "BDP", 1, 0)
data_selects_1971_2011$dummy_FDP <- ifelse(data_selects_1971_2011$vdn1b == "fdp/prd", 1, 0)
data_selects_1971_2011$dummy_LPS <- ifelse(data_selects_1971_2011$vdn1b == "lps/pls", 1, 0)

## Probit-Modelle schätzen
# survey design definieren (für die Schätzung der Wahlbeteiligung wird analog zu Lutz (2012) die Gewichtungsvariable "weightst" verwendet und für die Schätzung des Wahlentscheides die Gewichtungskette aus den Variablen weightc * weightp)
design_selects_1971_2011_decision <- svydesign(ids = ~1,
                                               data = data_selects_1971_2011,
                                               weights = (data_selects_1971_2011$weightc * data_selects_1971_2011$weightp))

design_selects_1971_2011_turnout <- svydesign(ids = ~1,
                                              data = data_selects_1971_2011,
                                              weights = data_selects_1971_2011$weightst)

# Hilfsvariable für alle relevanten Parteien definieren
party_names <- c("SVP",
                 "Lega",
                 "SD",
                 "EDU",
                 "CVP",
                 "EVP",
                 "CSP",
                 "SP",
                 "PdA",
                 "SolidaritéS",
                 "PSA",
                 "FGA",
                 "GPS",
                 "glp",
                 "LdU",
                 "BDP",
                 "FDP",
                 "LPS")

# Je ein Probit-GL-Modell für die Jahre 1991-2011 für die Wahlbeteiligung (mit und ohne "sex"; 6 * 2 = 12 Modelle) sowie für die Wahlwahrscheinlichkeit aller relevanten Parteien schätzen (6 * 18 = 108 Modelle); vorausgesagte Wahrscheinlichkeiten für die verschiedenen Altersgruppen für die Wahlbeteiligung sowie die Wahl aller relevanten Parteien in den Jahren 1991-2011 berechnen; 95%-Konfidenzintervalle konventionell sowie mittels Simulation berechnen (jeweils 100'000 Zufallsziehungen); berechnen, ob die vorausgesagten Wahrscheinlichkeiten statistisch signifikant (95%-Konfidenzniveau) unterschiedlich gegenüber der Referenzkategorie ausfallen (auch mittels Simulation); Chi-Quadrat-Test für alle Modelle durchführen
age_category_levels <- c("18-24" = 1,
                         "25-34" = 2,
                         "35-44" = 3,
                         "45-54" = 4,
                         "55-64" = 5,
                         "65-74" = 6,
                         "75+" = 7)

results_decision <- array(dim = c(6, 18, 7, 8),
                          dimnames = list(seq(1991, 2011, by = 4),
                                          party_names,
                                          names(age_category_levels),
                                          c("y_star", "y_hat", "lower_ci", "upper_ci", "lower_ci_sim", "upper_ci_sim", "sig", "p_chi2")))

results_turnout <- array(dim = c(6, 3, 7, 8),
                         dimnames = list(seq(1991, 2011, by = 4),
                                         c("total", "male", "female"),
                                         names(age_category_levels),
                                         c("y_star", "y_hat", "lower_ci", "upper_ci", "lower_ci_sim", "upper_ci_sim", "sig", "p_chi2")))

for ( i in seq(1991, 2011, by = 4) ) {

  assign(paste("model", i, "turnout", sep = "_"),
         svyglm(vp1 ~ age_category,
                design = design_selects_1971_2011_turnout,
                subset = (year == i),
                family = quasibinomial(link = "probit")))

  assign(paste("model", i, "turnout_sex", sep = "_"),
         svyglm(vp1 ~ sex + age_category,
                design = design_selects_1971_2011_turnout,
                subset = (year == i),
                family = quasibinomial(link = "probit")))

  random_draws_turnout <- mvrnorm(100000,
                                  coef(get(paste("model", i, "turnout", sep = "_"))),
                                  vcov(get(paste("model", i, "turnout", sep = "_"))))

  random_draws_turnout_sex <- mvrnorm(100000,
                                      coef(get(paste("model", i, "turnout_sex", sep = "_"))),
                                      vcov(get(paste("model", i, "turnout_sex", sep = "_"))))

  random_draws_y_star_reference_total <- random_draws_turnout %*% c(1, 0, 0, 0, 0, 0, 0)
  random_draws_y_hat_reference_total <- pnorm(random_draws_y_star_reference_total)
  random_draws_y_star_reference_male <- random_draws_turnout_sex %*% c(1, 0, 0, 0, 0, 0, 0, 0)
  random_draws_y_hat_reference_male <- pnorm(random_draws_y_star_reference_male)
  random_draws_y_star_reference_female <- random_draws_turnout_sex %*% c(1, 1, 0, 0, 0, 0, 0, 0)
  random_draws_y_hat_reference_female <- pnorm(random_draws_y_star_reference_female)

  for ( j in age_category_levels ) {

    allocation_x_total <- c(1, ifelse(j == 1, 1, 0), ifelse(j == 2, 1, 0), ifelse(j == 3, 1, 0), ifelse(j == 5, 1, 0), ifelse(j == 6, 1, 0), ifelse(j == 7, 1, 0))
    allocation_x_male <- c(1, 0, ifelse(j == 1, 1, 0), ifelse(j == 2, 1, 0), ifelse(j == 3, 1, 0), ifelse(j == 5, 1, 0), ifelse(j == 6, 1, 0), ifelse(j == 7, 1, 0))
    allocation_x_female <- c(1, 1, ifelse(j == 1, 1, 0), ifelse(j == 2, 1, 0), ifelse(j == 3, 1, 0), ifelse(j == 5, 1, 0), ifelse(j == 6, 1, 0), ifelse(j == 7, 1, 0))

    random_draws_y_star_total <- random_draws_turnout %*% allocation_x_total
    random_draws_y_hat_total <- pnorm(random_draws_y_star_total)
    random_draws_difference_total <- random_draws_y_hat_reference_total - random_draws_y_hat_total
    random_draws_y_star_male <- random_draws_turnout_sex %*% allocation_x_male
    random_draws_y_hat_male <- pnorm(random_draws_y_star_male)
    random_draws_difference_male <- random_draws_y_hat_reference_male - random_draws_y_hat_male
    random_draws_y_star_female <- random_draws_turnout_sex %*% allocation_x_female
    random_draws_y_hat_female <- pnorm(random_draws_y_star_female)
    random_draws_difference_female <- random_draws_y_hat_reference_female - random_draws_y_hat_female

    results_turnout[as.character(i), "total", names(age_category_levels[j]), "lower_ci"] <- pnorm(confint(get(paste("model", i, "turnout", sep = "_")))[, 1] %*% allocation_x_total)
    results_turnout[as.character(i), "total", names(age_category_levels[j]), "upper_ci"] <- pnorm(confint(get(paste("model", i, "turnout", sep = "_")))[, 2] %*% allocation_x_total)
    results_turnout[as.character(i), "total", names(age_category_levels[j]), "lower_ci_sim"] <- min(sort(random_draws_y_hat_total)[2500], sort(random_draws_y_hat_total)[97500])
    results_turnout[as.character(i), "total", names(age_category_levels[j]), "upper_ci_sim"] <- max(sort(random_draws_y_hat_total)[2500], sort(random_draws_y_hat_total)[97500])
    results_turnout[as.character(i), "total", names(age_category_levels[j]), "sig"] <- ifelse (sort(random_draws_difference_total)[2500] * sort(random_draws_difference_total)[97500] > 0, TRUE, FALSE)
    results_turnout[as.character(i), "total", names(age_category_levels[j]), "y_star"] <- coef(get(paste("model", i, "turnout", sep = "_"))) %*% allocation_x_total
    results_turnout[as.character(i), "total", names(age_category_levels[j]), "y_hat"] <- pnorm(results_turnout[as.character(i), "total", names(age_category_levels[j]), "y_star"])
    results_turnout[as.character(i), "total", names(age_category_levels[j]), "p_chi2"] <- wald.test(b = coef(get(paste("model", i, "turnout", sep = "_"))), Sigma = vcov(get(paste("model", i, "turnout", sep = "_"))), Terms = 2:7)$result$chi2[3]
    results_turnout[as.character(i), "male", names(age_category_levels[j]), "lower_ci"] <- pnorm(confint(get(paste("model", i, "turnout_sex", sep = "_")))[, 1] %*% allocation_x_male)
    results_turnout[as.character(i), "male", names(age_category_levels[j]), "upper_ci"] <- pnorm(confint(get(paste("model", i, "turnout_sex", sep = "_")))[, 2] %*% allocation_x_male)
    results_turnout[as.character(i), "male", names(age_category_levels[j]), "lower_ci_sim"] <- min(sort(random_draws_y_hat_male)[2500], sort(random_draws_y_hat_male)[97500])
    results_turnout[as.character(i), "male", names(age_category_levels[j]), "upper_ci_sim"] <- max(sort(random_draws_y_hat_male)[2500], sort(random_draws_y_hat_male)[97500])
    results_turnout[as.character(i), "male", names(age_category_levels[j]), "sig"] <- ifelse (sort(random_draws_difference_male)[2500] * sort(random_draws_difference_male)[97500] > 0, TRUE, FALSE)
    results_turnout[as.character(i), "male", names(age_category_levels[j]), "y_star"] <- coef(get(paste("model", i, "turnout_sex", sep = "_"))) %*% allocation_x_male
    results_turnout[as.character(i), "male", names(age_category_levels[j]), "y_hat"] <- pnorm(results_turnout[as.character(i), "male", names(age_category_levels[j]), "y_star"])
    results_turnout[as.character(i), "male", names(age_category_levels[j]), "p_chi2"] <- wald.test(b = coef(get(paste("model", i, "turnout_sex", sep = "_"))), Sigma = vcov(get(paste("model", i, "turnout_sex", sep = "_"))), Terms = 2:8)$result$chi2[3]
    results_turnout[as.character(i), "female", names(age_category_levels[j]), "lower_ci"] <- pnorm(confint(get(paste("model", i, "turnout_sex", sep = "_")))[, 1] %*% allocation_x_female)
    results_turnout[as.character(i), "female", names(age_category_levels[j]), "upper_ci"] <- pnorm(confint(get(paste("model", i, "turnout_sex", sep = "_")))[, 2] %*% allocation_x_female)
    results_turnout[as.character(i), "female", names(age_category_levels[j]), "lower_ci_sim"] <- min(sort(random_draws_y_hat_female)[2500], sort(random_draws_y_hat_female)[97500])
    results_turnout[as.character(i), "female", names(age_category_levels[j]), "upper_ci_sim"] <- max(sort(random_draws_y_hat_female)[2500], sort(random_draws_y_hat_female)[97500])
    results_turnout[as.character(i), "female", names(age_category_levels[j]), "sig"] <- ifelse (sort(random_draws_difference_female)[2500] * sort(random_draws_difference_female)[97500] > 0, TRUE, FALSE)
    results_turnout[as.character(i), "female", names(age_category_levels[j]), "y_star"] <- coef(get(paste("model", i, "turnout_sex", sep = "_"))) %*% allocation_x_female
    results_turnout[as.character(i), "female", names(age_category_levels[j]), "y_hat"] <- pnorm(results_turnout[as.character(i), "female", names(age_category_levels[j]), "y_star"])
    results_turnout[as.character(i), "female", names(age_category_levels[j]), "p_chi2"] <- wald.test(b = coef(get(paste("model", i, "turnout_sex", sep = "_"))), Sigma = vcov(get(paste("model", i, "turnout_sex", sep = "_"))), Terms = 2:8)$result$chi2[3]
  }

  for ( j in party_names ) {

    glm_formula <- formula(paste("dummy_", j, " ~ age_category", sep = ""))

    assign(paste("model", i, j, sep = "_"),
           svyglm(glm_formula,
                  design = design_selects_1971_2011_decision,
                  subset = (year == i),
                  family = quasibinomial(link = "probit")))

    random_draws <- mvrnorm(100000,
                            coef(get(paste("model", i, j, sep = "_"))),
                            vcov(get(paste("model", i, j, sep = "_"))))

    random_draws_y_star_reference <- random_draws %*% c(1, 0, 0, 0, 0, 0, 0)
    random_draws_y_hat_reference <- pnorm(random_draws_y_star_reference)

    for ( k in age_category_levels ) {

      allocation_x <- c(1, ifelse(k == 1, 1, 0), ifelse(k == 2, 1, 0), ifelse(k == 3, 1, 0), ifelse(k == 5, 1, 0), ifelse(k == 6, 1, 0), ifelse(k == 7, 1, 0))

      random_draws_y_star <- random_draws %*% allocation_x
      random_draws_y_hat <- pnorm(random_draws_y_star)
      random_draws_difference <- random_draws_y_hat_reference - random_draws_y_hat

      results_decision[as.character(i), j, names(age_category_levels[k]), "lower_ci"] <- pnorm(confint(get(paste("model", i, j, sep = "_")))[, 1] %*% allocation_x)
      results_decision[as.character(i), j, names(age_category_levels[k]), "upper_ci"] <- pnorm(confint(get(paste("model", i, j, sep = "_")))[, 2] %*% allocation_x)
      results_decision[as.character(i), j, names(age_category_levels[k]), "lower_ci_sim"] <- min(sort(random_draws_y_hat)[2500], sort(random_draws_y_hat[97500]))
      results_decision[as.character(i), j, names(age_category_levels[k]), "upper_ci_sim"] <- max(sort(random_draws_y_hat)[2500], sort(random_draws_y_hat[97500]))
      results_decision[as.character(i), j, names(age_category_levels[k]), "sig"] <- ifelse (sort(random_draws_difference)[2500] * sort(random_draws_difference)[97500] > 0, TRUE, FALSE)
      results_decision[as.character(i), j, names(age_category_levels[k]), "y_star"] <- coef(get(paste("model", i, j, sep = "_"))) %*% allocation_x
      results_decision[as.character(i), j, names(age_category_levels[k]), "y_hat"] <- pnorm(results_decision[as.character(i), j, names(age_category_levels[k]), "y_star"])
      results_decision[as.character(i), j, names(age_category_levels[k]), "p_chi2"] <- wald.test(b = coef(get(paste("model", i, j, sep = "_"))), Sigma = vcov(get(paste("model", i, j, sep = "_"))), Terms = 2:7)$result$chi2[3]
    }
  }
}


### Wahlresultate berechnen (bei tatsächlicher, gleicher sowie  umgekehrter Wahlbeteiligung der verschiedenen Altersgruppen)
results_decision_total <- array(dim = c(6, 18, 3), dimnames = list(seq(1991, 2011, by = 4), party_names, c("real_turnout", "even_turnout", "inverse_turnout")))

for ( i in seq(1991, 2011, by = 4) ) {

  turnout_total <- sum(subset(data_selects_1971_2011, year == i & vp1 == "yes")$weightst) / length(subset(data_selects_1971_2011, year == i)$userid)
  nr_of_voters_total <- data_age_category_1971_2014[as.character(i), "total"] * turnout_total
  nr_of_voters_real <- 0
  nr_of_voters_inverse <- 0

  for ( j in age_category_levels ) {

    nr_of_voters_real <- nr_of_voters_real + (data_age_category_1971_2014[as.character(i), names(age_category_levels)[j]] * results_turnout[as.character(i), "total", names(age_category_levels)[j], "y_hat"])
    nr_of_voters_inverse <- nr_of_voters_inverse + (data_age_category_1971_2014[as.character(i), names(age_category_levels)[j]] * results_turnout[as.character(i), "total", names(age_category_levels)[abs(j - 8)], "y_hat"])
  }

  for ( j in party_names ) {

    result_real_turnout <- 0
    result_even_turnout <- 0
    result_inverse_turnout <- 0

    for ( k in age_category_levels ) {

      result_real_turnout <- result_real_turnout + (data_age_category_1971_2014[as.character(i), names(age_category_levels)[k]] * results_decision[as.character(i), j, k, "y_hat"] * results_turnout[as.character(i), "total", names(age_category_levels)[k], "y_hat"])
      result_even_turnout <- result_even_turnout + (data_age_category_1971_2014[as.character(i), names(age_category_levels)[k]] * results_decision[as.character(i), j, k, "y_hat"] * turnout_total)
      result_inverse_turnout <- result_inverse_turnout + (data_age_category_1971_2014[as.character(i), names(age_category_levels)[k]] * results_decision[as.character(i), j, k, "y_hat"] * results_turnout[as.character(i), "total", names(age_category_levels)[abs(k - 8)], "y_hat"])
    }
    results_decision_total[as.character(i), j, "real_turnout"] <- result_real_turnout / nr_of_voters_real
    results_decision_total[as.character(i), j, "even_turnout"] <- result_even_turnout / nr_of_voters_total
    results_decision_total[as.character(i), j, "inverse_turnout"] <- result_inverse_turnout / nr_of_voters_inverse
  }
}


### Resultate der Chi-Quadrat-Tests sichten
# Test-Werte
round(results_turnout[, "total", , "p_chi2"], digits = 4)
round(results_decision[, , , "p_chi2"], digits = 4)

# welche Werte sind signifikant auf dem 95%-Konfidenzniveau?
round(results_turnout[, "total", , "p_chi2"], digits = 4) < 0.05 # alle!
round(results_decision[, "GPS", , "p_chi2"], digits = 4) < 0.05 # 1991 nicht!
round(results_decision[, "SP", , "p_chi2"], digits = 4) < 0.05 # 1991 und 1999 nicht!
round(results_decision[, "glp", , "p_chi2"], digits = 4) < 0.05 # nur 2011 (logisch!)
round(results_decision[, "BDP", , "p_chi2"], digits = 4) < 0.05 # nur 2011 (logisch!)
round(results_decision[, "CVP", , "p_chi2"], digits = 4) < 0.05 # nur 1991!!
round(results_decision[, "FDP", , "p_chi2"], digits = 4) < 0.05 # 1991 und 2007 nicht!
round(results_decision[, "SVP", , "p_chi2"], digits = 4) < 0.05 # 1991 und 1999 nicht!


### Plotten mittels Plotly
## Berechnete Wahlteilnahmewahrscheinlichkeiten plotten
plot_turnout <- plot_ly(type = "scatter",
                        mode = "lines+markers",
                        x = dimnames(results_turnout)[[3]],
                        y = round(results_turnout["2011", "total", , "y_hat"] * 100, digits = 0),
                        name = "2011",
                        marker = list(symbol = "circle-dot",
                                      size = 12),
                        line = list(color = "#6600cc",
                                    width = 4),
                        opacity = 0.5,
                        hoverinfo = "name+x+y",
                        legendgroup = "2011")

plot_turnout <- add_trace(mode = "lines+markers",
                          x = dimnames(results_turnout)[[3]],
                          y = round(results_turnout["2007", "total", , "y_hat"] * 100, digits = 0),
                          name = "2007",
                          line = list(color = "#0033cc",
                                      width = 4),
                          opacity = 0.5,
                          visible = "legendonly",
                          hoverinfo = "name+x+y",
                          legendgroup = "2007")

plot_turnout <- add_trace(mode = "lines+markers",
                          x = dimnames(results_turnout)[[3]],
                          y = round(results_turnout["2003", "total", , "y_hat"] * 100,
                                    digits = 0),
                          name = "2003",
                          line = list(color = "#0099cc",
                                      width = 4),
                          opacity = 0.5,
                          visible = "legendonly",
                          hoverinfo = "name+x+y",
                          legendgroup = "2003")

plot_turnout <- add_trace(mode = "lines+markers",
                          x = dimnames(results_turnout)[[3]],
                          y = round(results_turnout["1999", "total", , "y_hat"] * 100,
                                    digits = 0),
                          name = "1999",
                          line = list(color = "#00cc33",
                                      width = 4),
                          opacity = 0.5,
                          visible = "legendonly",
                          hoverinfo = "name+x+y",
                          legendgroup = "1999")

plot_turnout <- add_trace(mode = "lines+markers",
                          x = dimnames(results_turnout)[[3]],
                          y = round(results_turnout["1995", "total", , "y_hat"] * 100,
                                    digits = 0),
                          name = "1995",
                          line = list(color = "#ff9900",
                                      width = 4),
                          opacity = 0.5,
                          visible = "legendonly",
                          hoverinfo = "name+x+y",
                          legendgroup = "1995")

plot_turnout <- add_trace(mode = "lines+markers",
                          x = dimnames(results_turnout)[[3]],
                          y = round(results_turnout["1991", "total", , "y_hat"] * 100,
                                    digits = 0),
                          name = "1991",
                          line = list(color = "#cc3300",
                                      width = 4), opacity = 0.5,
                          visible = "legendonly",
                          hoverinfo = "name+x+y",
                          legendgroup = "1991")

plot_turnout <- layout(p = plot_turnout,
                       title = "Vorausgesagte Wahrscheinlichkeit der Wahlbeteiligung",
                       xaxis = list(title = "Alter",
                                    fixedrange = TRUE),
                       yaxis = list(title = "Wahlbeteiligung in %",
                                    fixedrange = TRUE),
                       margin = list(l = 55,
                                     r = 55,
                                     t = 60,
                                     b = 60,
                                     pad = 0,
                                     autoexpand = TRUE))

plot_turnout
#plotly_POST(plot_turnout, filename = "DDJ-Blogeintrag 1/Vorausgesagte Wahrscheinlichkeit der Wahlbeteiligung", fileopt = "overwrite", sharing = "public")

## Berechnete Wahlteilnahmewahrscheinlichkeiten inkl. Konfidenzintervalle plotten
plot_turnout_conf <- plot_ly(type = "scatter",
                             mode = "lines+markers",
                             x = dimnames(results_turnout)[[3]],
                             y = round(results_turnout["2011", "total", , "y_hat"] * 100,
                                       digits = 0),
                             name = "2011",
                             marker = list(symbol = "circle-dot",
                                           size = 12),
                             line = list(color = "#6600cc",
                                         width = 4),
                             opacity = 0.5,
                             hoverinfo = "name+x+y",
                             legendgroup = "2011")

plot_turnout_conf <- add_trace(mode = "lines",
                               x = dimnames(results_turnout)[[3]],
                               y = round(results_turnout["2011", "total", , "upper_ci_sim"] * 100, digits = 0),
                               name = "2011",
                               line = list(color = "#b8a3cc",
                                           width = 0),
                               opacity = 0.5,
                               fill = "tonexty",
                               showlegend = FALSE,
                               hoverinfo = "name+x+y",
                               legendgroup = "2011")

plot_turnout_conf <- add_trace(mode = "lines",
                               x = dimnames(results_turnout)[[3]],
                               y = round(results_turnout["2011", "total", , "lower_ci_sim"] * 100, digits = 0),
                               name = "2011",
                               line = list(color = "#b8a3cc",
                                           width = 0),
                               opacity = 0.5,
                               fill = "tonexty",
                               showlegend = FALSE,
                               hoverinfo = "name+x+y",
                               legendgroup = "2011")

plot_turnout_conf <- add_trace(mode = "lines+markers",
                               x = dimnames(results_turnout)[[3]],
                               y = round(results_turnout["2007", "total", , "y_hat"] * 100, digits = 0),
                               name = "2007",
                               line = list(color = "#0033cc",
                                           width = 4),
                               opacity = 0.5,
                               visible = "legendonly",
                               hoverinfo = "name+x+y",
                               legendgroup = "2007")

plot_turnout_conf <- add_trace(mode = "lines",
                               x = dimnames(results_turnout)[[3]],
                               y = round(results_turnout["2007", "total", , "upper_ci_sim"] * 100, digits = 0),
                               name = "2007",
                               line = list(color = "#a3adcc",
                                           width = 0),
                               opacity = 0.5,
                               fill = "tonexty",
                               showlegend = FALSE,
                               hoverinfo = "name+x+y",
                               visible = "legendonly",
                               legendgroup = "2007")

plot_turnout_conf <- add_trace(mode = "lines",
                               x = dimnames(results_turnout)[[3]],
                               y = round(results_turnout["2007", "total", , "lower_ci_sim"] * 100, digits = 0),
                               name = "2007",
                               line = list(color = "#a3adcc",
                                           width = 0),
                               opacity = 0.5,
                               fill = "tonexty",
                               showlegend = FALSE,
                               hoverinfo = "name+x+y",
                               visible = "legendonly",
                               legendgroup = "2007")

plot_turnout_conf <- add_trace(mode = "lines+markers",
                               x = dimnames(results_turnout)[[3]],
                               y = round(results_turnout["2003", "total", , "y_hat"] * 100, digits = 0),
                               name = "2003",
                               line = list(color = "#0099cc",
                                           width = 4),
                               opacity = 0.5,
                               visible = "legendonly",
                               hoverinfo = "name+x+y",
                               legendgroup = "2003")

plot_turnout_conf <- add_trace(mode = "lines",
                               x = dimnames(results_turnout)[[3]],
                               y = round(results_turnout["2003", "total", , "upper_ci_sim"] * 100, digits = 0),
                               name = "2003",
                               line = list(color = "#a3c2cc",
                                           width = 0),
                               opacity = 0.5,
                               fill = "tonexty",
                               showlegend = FALSE,
                               hoverinfo = "name+x+y",
                               visible = "legendonly",
                               legendgroup = "2003")

plot_turnout_conf <- add_trace(mode = "lines",
                               x = dimnames(results_turnout)[[3]],
                               y = round(results_turnout["2003", "total", , "lower_ci_sim"] * 100, digits = 0),
                               name = "2003",
                               line = list(color = "#a3c2cc",
                                           width = 0),
                               opacity = 0.5,
                               fill = "tonexty",
                               showlegend = FALSE,
                               hoverinfo = "name+x+y",
                               visible = "legendonly",
                               legendgroup = "2003")

plot_turnout_conf <- add_trace(mode = "lines+markers",
                               x = dimnames(results_turnout)[[3]],
                               y = round(results_turnout["1999", "total", , "y_hat"] * 100, digits = 0),
                               name = "1999",
                               line = list(color = "#00cc33",
                                           width = 4),
                               opacity = 0.5,
                               visible = "legendonly",
                               hoverinfo = "name+x+y",
                               legendgroup = "1999")

plot_turnout_conf <- add_trace(mode = "lines",
                               x = dimnames(results_turnout)[[3]],
                               y = round(results_turnout["1999", "total", , "upper_ci_sim"] * 100, digits = 0),
                               name = "1999",
                               line = list(color = "#a3ccad",
                                           width = 0),
                               opacity = 0.5,
                               fill = "tonexty",
                               showlegend = FALSE,
                               hoverinfo = "name+x+y",
                               visible = "legendonly",
                               legendgroup = "1999")

plot_turnout_conf <- add_trace(mode = "lines",
                               x = dimnames(results_turnout)[[3]],
                               y = round(results_turnout["1999", "total", , "lower_ci_sim"] * 100, digits = 0),
                               name = "1999",
                               line = list(color = "#a3ccad",
                                           width = 0),
                               opacity = 0.5,
                               fill = "tonexty",
                               showlegend = FALSE,
                               hoverinfo = "name+x+y",
                               visible = "legendonly",
                               legendgroup = "1999")

plot_turnout_conf <- add_trace(mode = "lines+markers",
                               x = dimnames(results_turnout)[[3]],
                               y = round(results_turnout["1995", "total", , "y_hat"] * 100, digits = 0),
                               name = "1995",
                               line = list(color = "#ff9900",
                                           width = 4),
                               opacity = 0.5,
                               visible = "legendonly",
                               hoverinfo = "name+x+y",
                               legendgroup = "1995")

plot_turnout_conf <- add_trace(mode = "lines",
                               x = dimnames(results_turnout)[[3]],
                               y = round(results_turnout["1995", "total", , "upper_ci_sim"] * 100, digits = 0),
                               name = "1995",
                               line = list(color = "#ffebcc",
                                           width = 0),
                               opacity = 0.5,
                               fill = "tonexty",
                               showlegend = FALSE,
                               hoverinfo = "name+x+y",
                               visible = "legendonly",
                               legendgroup = "1995")

plot_turnout_conf <- add_trace(mode = "lines",
                               x = dimnames(results_turnout)[[3]],
                               y = round(results_turnout["1995", "total", , "lower_ci_sim"] * 100, digits = 0),
                               name = "1995",
                               line = list(color = "#ffebcc",
                                           width = 0),
                               opacity = 0.5,
                               fill = "tonexty",
                               showlegend = FALSE,
                               hoverinfo = "name+x+y",
                               visible = "legendonly",
                               legendgroup = "1995")

plot_turnout_conf <- add_trace(mode = "lines+markers",
                               x = dimnames(results_turnout)[[3]],
                               y = round(results_turnout["1991", "total", , "y_hat"] * 100, digits = 0),
                               name = "1991",
                               line = list(color = "#cc3300",
                                           width = 4),
                               opacity = 0.5,
                               visible = "legendonly",
                               hoverinfo = "name+x+y",
                               legendgroup = "1991")

plot_turnout_conf <- add_trace(mode = "lines",
                               x = dimnames(results_turnout)[[3]],
                               y = round(results_turnout["1991", "total", , "upper_ci_sim"] * 100, digits = 0),
                               name = "1991",
                               line = list(color = "#ccada3",
                                           width = 0),
                               opacity = 0.5,
                               fill = "tonexty",
                               showlegend = FALSE,
                               hoverinfo = "name+x+y",
                               visible = "legendonly",
                               legendgroup = "1991")

plot_turnout_conf <- add_trace(mode = "lines",
                               x = dimnames(results_turnout)[[3]],
                               y = round(results_turnout["1991", "total", , "lower_ci_sim"] * 100, digits = 0),
                               name = "1991",
                               line = list(color = "#ccada3",
                                           width = 0),
                               opacity = 0.5,
                               fill = "tonexty",
                               showlegend = FALSE,
                               hoverinfo = "name+x+y",
                               visible = "legendonly",
                               legendgroup = "1991")

plot_turnout_conf <- layout(p = plot_turnout_conf,
                            title = "Vorausgesagte Wahrscheinlichkeit der Wahlbeteiligung",
                            xaxis = list(title = "Alter",
                                         fixedrange = TRUE),
                            yaxis = list(title = "Wahlbeteiligung in %",
                                         fixedrange = TRUE),
                            margin = list(l = 55,
                                          r = 55,
                                          t = 60,
                                          b = 60,
                                          pad = 0,
                                          autoexpand = TRUE))

plot_turnout_conf
#plotly_POST(plot_turnout_conf, filename = "DDJ-Blogeintrag 1/Vorausgesagte Wahrscheinlichkeit der Wahlbeteiligung inkl. Konfidenzintervalle", fileopt = "overwrite", sharing = "public")

## Berechnete Wahlentscheidwahrscheinlichkeiten plotten
# GPS und SP
plot_decision_GPS <- plot_ly(type = "scatter", mode = "lines+markers", x = dimnames(results_decision)[[3]], y = round(results_decision["2011", "GPS", , "y_hat"] * 100, digits = 0), name = "Grüne 2011", marker = list(symbol = "circle-dot", size = 12), line = list(color = "#bbff1c", width = 4), opacity = 0.7)
plot_decision_GPS <- add_trace(mode = "lines+markers", x = dimnames(results_decision)[[3]], y = round(results_decision["2007", "GPS", , "y_hat"] * 100, digits = 0), name = "Grüne 2007", line = list(color = "#9fd918", width = 4), opacity = 0.7, visible = "legendonly")
plot_decision_GPS <- add_trace(mode = "lines+markers", x = dimnames(results_decision)[[3]], y = round(results_decision["2003", "GPS", , "y_hat"] * 100, digits = 0), name = "Grüne 2003", line = list(color = "#84b414", width = 4), opacity = 0.7, visible = "legendonly")
plot_decision_GPS <- add_trace(mode = "lines+markers", x = dimnames(results_decision)[[3]], y = round(results_decision["1999", "GPS", , "y_hat"] * 100, digits = 0), name = "Grüne 1999", line = list(color = "#5e800e", width = 4), opacity = 0.7, visible = "legendonly")
plot_decision_GPS <- add_trace(mode = "lines+markers", x = dimnames(results_decision)[[3]], y = round(results_decision["1995", "GPS", , "y_hat"] * 100, digits = 0), name = "Grüne 1995", line = list(color = "#384d09", width = 4), opacity = 0.7, visible = "legendonly")
plot_decision_GPS <- add_trace(mode = "lines+markers", x = dimnames(results_decision)[[3]], y = round(results_decision["1991", "GPS", , "y_hat"] * 100, digits = 0), name = "Grüne 1991", line = list(color = "#131a03", width = 4), opacity = 0.7, visible = "legendonly")
plot_decision_GPS <- layout(p = plot_decision_GPS, title = "Grüne", xaxis = list(title = "Alter", fixedrange = TRUE), yaxis = list(title = "Wahlwahrscheinlichkeit in %", fixedrange = TRUE, range = c(0, 40), zeroline = FALSE))
plot_decision_SP <- plot_ly(type = "scatter", mode = "lines+markers", x = dimnames(results_decision)[[3]], y = round(results_decision["2011", "SP", , "y_hat"] * 100, digits = 0), name = "SP 2011", marker = list(symbol = "circle-dot", size = 12), line = list(color = "#ff2d3f", width = 4), opacity = 0.7)
plot_decision_SP <- add_trace(mode = "lines+markers", x = dimnames(results_decision)[[3]], y = round(results_decision["2007", "SP", , "y_hat"] * 100, digits = 0), name = "SP 2007", line = list(color = "#e62939", width = 4), opacity = 0.7, visible = "legendonly")
plot_decision_SP <- add_trace(mode = "lines+markers", x = dimnames(results_decision)[[3]], y = round(results_decision["2003", "SP", , "y_hat"] * 100, digits = 0), name = "SP 2003", line = list(color = "#bf222f", width = 4), opacity = 0.7, visible = "legendonly")
plot_decision_SP <- add_trace(mode = "lines+markers", x = dimnames(results_decision)[[3]], y = round(results_decision["1999", "SP", , "y_hat"] * 100, digits = 0), name = "SP 1999", line = list(color = "#8c1923", width = 4), opacity = 0.7, visible = "legendonly")
plot_decision_SP <- add_trace(mode = "lines+markers", x = dimnames(results_decision)[[3]], y = round(results_decision["1995", "SP", , "y_hat"] * 100, digits = 0), name = "SP 1995", line = list(color = "#591016", width = 4), opacity = 0.7, visible = "legendonly")
plot_decision_SP <- add_trace(mode = "lines+markers", x = dimnames(results_decision)[[3]], y = round(results_decision["1991", "SP", , "y_hat"] * 100, digits = 0), name = "SP 1991", line = list(color = "#260709", width = 4), opacity = 0.7, visible = "legendonly")
plot_decision_SP <- layout(p = plot_decision_SP, title = "SP", xaxis = list(title = "Alter", fixedrange = TRUE), yaxis = list(title = "", fixedrange = TRUE, range = c(0, 40), zeroline = FALSE))
plot_decision_SP_GPS <- subplot(plot_decision_GPS, plot_decision_SP) %>% layout(title = "Vorausgesagte Wahlwahrscheinlichkeit nach Partei und Jahr", margin = list(l = 55, r = 55, t = 60, b = 55, pad = 0, autoexpand = TRUE))
plot_decision_SP_GPS <- config(p = plot_decision_SP_GPS, displaylogo = FALSE, showLink = FALSE, displayModeBar = FALSE)
plot_decision_SP_GPS
#plotly_POST(plot_decision_SP_GPS, filename = "DDJ-Blogeintrag 1/Vorausgesagte Wahlwahrscheinlichkeit für SP und GPS", fileopt = "overwrite", sharing = "public")

# glp und BDP
plot_decision_glp <- plot_ly(type = "scatter", mode = "lines+markers", x = dimnames(results_decision)[[3]], y = round(results_decision["2011", "glp", , "y_hat"] * 100, digits = 0), name = "glp 2011", marker = list(symbol = "circle-dot", size = 12), line = list(color = "#e5ff4a", width = 4), opacity = 0.7)
plot_decision_glp <- add_trace(mode = "lines+markers", x = dimnames(results_decision)[[3]], y = round(results_decision["2007", "glp", , "y_hat"] * 100, digits = 0), name = "glp 2007", line = list(color = "#c3d93f", width = 4), opacity = 0.7, visible = "legendonly")
plot_decision_glp <- layout(p = plot_decision_glp, title = "glp", xaxis = list(title = "Alter", fixedrange = TRUE), yaxis = list(title = "Vorausgesagte Wahlwahrscheinlichkeit in %", fixedrange = TRUE, range = c(0, 40), zeroline = FALSE))
plot_decision_BDP <- plot_ly(type = "scatter", mode = "lines+markers", x = dimnames(results_decision)[[3]], y = round(results_decision["2011", "BDP", , "y_hat"] * 100, digits = 0), name = "BDP 2011", marker = list(symbol = "circle-dot", size = 12), line = list(color = "#ffdd00", width = 4), opacity = 0.7)
plot_decision_BDP <- layout(p = plot_decision_BDP, title = "BDP", xaxis = list(title = "Alter", fixedrange = TRUE), yaxis = list(title = "", fixedrange = TRUE, range = c(0, 40), zeroline = FALSE))
plot_decision_glp_BDP <- subplot(plot_decision_glp, plot_decision_BDP) %>% layout(title = "", margin = list(l = 55, r = 55, t = 20, b = 55, pad = 0, autoexpand = TRUE))
plot_decision_glp_BDP <- config(p = plot_decision_glp_BDP, displaylogo = FALSE, showLink = FALSE, displayModeBar = FALSE)
plot_decision_glp_BDP
#plotly_POST(plot_decision_glp_BDP, filename = "DDJ-Blogeintrag 1/Vorausgesagte Wahlwahrscheinlichkeit für glp und BDP", fileopt = "overwrite", sharing = "public")

# CVP und FDP
plot_decision_CVP <- plot_ly(type = "scatter", mode = "lines+markers", x = dimnames(results_decision)[[3]], y = round(results_decision["2011", "CVP", , "y_hat"] * 100, digits = 0), name = "CVP 2011", marker = list(symbol = "circle-dot", size = 12), line = list(color = "#ff850c", width = 4), opacity = 0.7)
plot_decision_CVP <- add_trace(mode = "lines+markers", x = dimnames(results_decision)[[3]], y = round(results_decision["2007", "CVP", , "y_hat"] * 100, digits = 0), name = "CVP 2007", line = list(color = "#d9710a", width = 4), opacity = 0.7, visible = "legendonly")
plot_decision_CVP <- add_trace(mode = "lines+markers", x = dimnames(results_decision)[[3]], y = round(results_decision["2003", "CVP", , "y_hat"] * 100, digits = 0), name = "CVP 2003", line = list(color = "#a65608", width = 4), opacity = 0.7, visible = "legendonly")
plot_decision_CVP <- add_trace(mode = "lines+markers", x = dimnames(results_decision)[[3]], y = round(results_decision["1999", "CVP", , "y_hat"] * 100, digits = 0), name = "CVP 1999", line = list(color = "#733c05", width = 4), opacity = 0.7, visible = "legendonly")
plot_decision_CVP <- add_trace(mode = "lines+markers", x = dimnames(results_decision)[[3]], y = round(results_decision["1995", "CVP", , "y_hat"] * 100, digits = 0), name = "CVP 1995", line = list(color = "#402103", width = 4), opacity = 0.7, visible = "legendonly")
plot_decision_CVP <- add_trace(mode = "lines+markers", x = dimnames(results_decision)[[3]], y = round(results_decision["1991", "CVP", , "y_hat"] * 100, digits = 0), name = "CVP 1991", line = list(color = "#0d0701", width = 4), opacity = 0.7, visible = "legendonly")
plot_decision_CVP <- layout(p = plot_decision_CVP, title = "CVP", xaxis = list(title = "Alter", fixedrange = TRUE), yaxis = list(title = "Vorausgesagte Wahlwahrscheinlichkeit in %", range = c(0, 40), fixedrange = TRUE, zeroline = FALSE))
plot_decision_FDP <- plot_ly(type = "scatter", mode = "lines+markers", x = dimnames(results_decision)[[3]], y = round(results_decision["2011", "FDP", , "y_hat"] * 100, digits = 0), name = "FDP 2011", marker = list(symbol = "circle-dot", size = 12), line = list(color = "#197eff", width = 4), opacity = 0.7)
plot_decision_FDP <- add_trace(mode = "lines+markers", x = dimnames(results_decision)[[3]], y = round(results_decision["2007", "FDP", , "y_hat"] * 100, digits = 0), name = "FDP 2007", line = list(color = "#1465cc", width = 4), opacity = 0.7, visible = "legendonly")
plot_decision_FDP <- add_trace(mode = "lines+markers", x = dimnames(results_decision)[[3]], y = round(results_decision["2003", "FDP", , "y_hat"] * 100, digits = 0), name = "FDP 2003", line = list(color = "#104fa0", width = 4), opacity = 0.7, visible = "legendonly")
plot_decision_FDP <- add_trace(mode = "lines+markers", x = dimnames(results_decision)[[3]], y = round(results_decision["1999", "FDP", , "y_hat"] * 100, digits = 0), name = "FDP 1999", line = list(color = "#0d3f80", width = 4), opacity = 0.7, visible = "legendonly")
plot_decision_FDP <- add_trace(mode = "lines+markers", x = dimnames(results_decision)[[3]], y = round(results_decision["1995", "FDP", , "y_hat"] * 100, digits = 0), name = "FDP 1995", line = list(color = "#08264d", width = 4), opacity = 0.7, visible = "legendonly")
plot_decision_FDP <- add_trace(mode = "lines+markers", x = dimnames(results_decision)[[3]], y = round(results_decision["1991", "FDP", , "y_hat"] * 100, digits = 0), name = "FDP 1991", line = list(color = "#030d1a", width = 4), opacity = 0.7, visible = "legendonly")
plot_decision_FDP <- layout(p = plot_decision_FDP, title = "FDP", xaxis = list(title = "Alter", fixedrange = TRUE), yaxis = list(title = "", range = c(0, 40), fixedrange = TRUE, zeroline = FALSE))
plot_decision_CVP_FDP <- subplot(plot_decision_CVP, plot_decision_FDP) %>% layout(title = "", margin = list(l = 55, r = 55, t = 20, b = 55, pad = 0, autoexpand = TRUE))
plot_decision_CVP_FDP <- config(p = plot_decision_CVP_FDP, displaylogo = FALSE, showLink = FALSE, displayModeBar = FALSE)
plot_decision_CVP_FDP
#plotly_POST(plot_decision_CVP_FDP, filename = "DDJ-Blogeintrag 1/Vorausgesagte Wahlwahrscheinlichkeit für CVP und FDP", fileopt = "overwrite", sharing = "public")

# SVP
plot_decision_SVP <- plot_ly(type = "scatter", mode = "lines+markers", x = dimnames(results_decision)[[3]], y = round(results_decision["2011", "SVP", , "y_hat"] * 100, digits = 0), name = "SVP 2011", marker = list(symbol = "circle-dot", size = 12), line = list(color = "#4e991d", width = 4), opacity = 0.7)
plot_decision_SVP <- add_trace(mode = "lines+markers", x = dimnames(results_decision)[[3]], y = round(results_decision["2007", "SVP", , "y_hat"] * 100, digits = 0), name = "SVP 2007", line = list(color = "#3f7b17", width = 4), opacity = 0.7, visible = "legendonly")
plot_decision_SVP <- add_trace(mode = "lines+markers", x = dimnames(results_decision)[[3]], y = round(results_decision["2003", "SVP", , "y_hat"] * 100, digits = 0), name = "SVP 2003", line = list(color = "#346613", width = 4), opacity = 0.7, visible = "legendonly")
plot_decision_SVP <- add_trace(mode = "lines+markers", x = dimnames(results_decision)[[3]], y = round(results_decision["1999", "SVP", , "y_hat"] * 100, digits = 0), name = "SVP 1999", line = list(color = "#21400c", width = 4), opacity = 0.7, visible = "legendonly")
plot_decision_SVP <- add_trace(mode = "lines+markers", x = dimnames(results_decision)[[3]], y = round(results_decision["1995", "SVP", , "y_hat"] * 100, digits = 0), name = "SVP 1995", line = list(color = "#142607", width = 4), opacity = 0.7, visible = "legendonly")
plot_decision_SVP <- add_trace(mode = "lines+markers", x = dimnames(results_decision)[[3]], y = round(results_decision["1991", "SVP", , "y_hat"] * 100, digits = 0), name = "SVP 1991", line = list(color = "#000000", width = 4), opacity = 0.7, visible = "legendonly")
plot_decision_SVP <- layout(p = plot_decision_SVP, title = "", xaxis = list(title = "Alter", fixedrange = TRUE), yaxis = list(title = "Vorausgesagte Wahlwahrscheinlichkeit in %", fixedrange = TRUE, range = c(0, 40), zeroline = FALSE), margin = list(l = 55, r = 55, t = 20, b = 65, pad = 0, autoexpand = TRUE), autosize = "initial", heigth = 450)
plot_decision_SVP <- config(p = plot_decision_SVP, displaylogo = FALSE, showLink = FALSE, displayModeBar = FALSE)
plot_decision_SVP
#plotly_POST(plot_decision_SVP, filename = "DDJ-Blogeintrag 1/Vorausgesagte Wahlwahrscheinlichkeit für SVP", fileopt = "overwrite", sharing = "public")

## Berechnete kumulierte Wahlwahrscheinlichkeiten je nach Beteiligungsmodus plotten
# "results_decision_total" neu organisieren in "results_decision_total_v2"
results_decision_total_v2 <- data.frame("year" = NA,
                                        "party" = NA,
                                        "turnout_mode" = NA,
                                        "result" = NA,
                                        "relative_shift" = NA)

counter <- 1

for ( i in names(results_decision_total[, 1, 1]) ) {

  for ( j in names(results_decision_total[1, , 1]) ) {

    if ( is.element(j, c("GPS", "SP", "glp", "BDP", "CVP", "FDP", "SVP")) ) {

      j_german <- j

      if ( j == "GPS" ) { j_german <- "Grüne" }

      for ( k in names(results_decision_total[1, 1, ]) ) {

        k_german <- ""

        if ( k == "real_turnout") { k_german <- "real"}
        if ( k == "even_turnout") { k_german <- "gleichmässig"}
        if ( k == "inverse_turnout") { k_german <- "spiegelverkehrt"}

        results_decision_total_v2[counter, ] <- list(as.integer(i),
                                                     j_german,
                                                     k_german,
                                                     results_decision_total[i, j, k],
                                                     1)
        counter <- counter + 1
      }
    }
  }
}
# Sicherstellung korrekter Datentypen
results_decision_total_v2 <- transform(results_decision_total_v2, year = as.integer(year), result = as.numeric(result))

# Reihenfolge der Parteien anpassen
target <- c("Grüne", "SP", "glp", "BDP", "CVP", "FDP", "SVP")
results_decision_total_v2$party <- factor(results_decision_total_v2$party, levels = target)
results_decision_total_v2 <- results_decision_total_v2[order(results_decision_total_v2$year, results_decision_total_v2$party),]

# Plotting
plot_decision_total <- plot_ly(subset(results_decision_total_v2, year == 2011), type = "bar", x = party, y = round(result * 100, digits = 1), name = "2011", opacity = 0.7, legendgroup = "2011", color = turnout_mode, colors = c("#6600cc", "#9933ff", "#cc99ff"), hoverinfo = "y+name")
plot_decision_total <- add_trace(subset(results_decision_total_v2, year == 2007), type = "bar", x = party, y = round(result * 100, digits = 1), name = "2007", opacity = 0.7, legendgroup = "2007", visible = "legendonly", color = turnout_mode, colors = c("#0033cc", "#3366ff", "#99b3ff"))
plot_decision_total <- add_trace(subset(results_decision_total_v2, year == 2003), type = "bar", x = party, y = round(result * 100, digits = 1), name = "2003", opacity = 0.7, legendgroup = "2003", visible = "legendonly", color = turnout_mode, colors = c("#0099cc", "#33ccff", "#99e6ff"))
plot_decision_total <- add_trace(subset(results_decision_total_v2, year == 1999), type = "bar", x = party, y = round(result * 100, digits = 1), name = "1999", opacity = 0.7, legendgroup = "1999", visible = "legendonly", color = turnout_mode, colors = c("#00cc33", "#33ff66", "#99ffb3"))
plot_decision_total <- add_trace(subset(results_decision_total_v2, year == 1995), type = "bar", x = party, y = round(result * 100, digits = 1), name = "1995", opacity = 0.7, legendgroup = "1995", visible = "legendonly", color = turnout_mode, colors = c("#ff9900", "#ffc266", "#ffebcc"))
plot_decision_total <- add_trace(subset(results_decision_total_v2, year == 1991), type = "bar", x = party, y = round(result * 100, digits = 1), name = "1991", opacity = 0.7, legendgroup = "1991", visible = "legendonly", color = turnout_mode, colors = c("#cc3300", "#ff6633", "#ffb399"))
plot_decision_total <- layout(p = plot_decision_total, title = "Vorausgesagte Wähleranteile nach Beteiligungsmodus", xaxis = list(title = "", fixedrange = TRUE), yaxis = list(title = "vorausgesagter Wähleranteil in %", fixedrange = TRUE, zeroline = TRUE), legend = list(traceorder = "grouped"), margin = list(l = 55, r = 55, t = 60, b = 40, pad = 0, autoexpand = TRUE, height = 700))
plot_decision_total <- config(p = plot_decision_total, displaylogo = FALSE, showLink = FALSE, displayModeBar = FALSE)
plot_decision_total
#plotly_POST(plot_decision_total, filename = "DDJ-Blogeintrag 1/Vorausgesagte Wähleranteile nach Beteiligungsmodus", fileopt = "overwrite", sharing = "public")


### Diverses
# Berechnete Wahlteilnahmewahrscheinlichkeiten für 2011 anzeigen
round(results_turnout["2011", , , "y_hat"], digits = 6)
# Berechnete Wahlentscheidwahrscheinlichkeiten für 2011 anzeigen
round(results_decision["2011", , , "y_hat"], digits = 6)
# Berechnete kumulierte Wahlwahrscheinlichkeiten je nach Beteiligungsmodus für 2011 anzeigen
round(results_decision_total["2011", ,], digits = 6)

## simple Berechnung des Wähleranteils der SVP bei unter 25-Jährigen
# naiv
with(data_selects_1971_2011, length(userid[year == 2011 & vp1 == "yes" & age < 25 & vdn1b == "svp/udc"])/length(userid[year == 2011 & vp1 == "yes" & age < 25]))
# mit Gewichtungsvariable "weighttot"
with(data_selects_1971_2011, sum(weighttot[year == 2011 & vp1 == "yes" & age < 25 & vdn1b == "svp/udc"], na.rm = TRUE) / sum(weighttot[year == 2011 & vp1 == "yes" & age < 25], na.rm = TRUE))

# Wahlbeteiligung gemäss Selects-Datensatz berechnen (2011)
sum(subset(data_selects_1971_2011, year == 2011 & vp1 == "yes")$weightst) / length(subset(data_selects_1971_2011, year == 2011)$userid)

# Im Selects-Datensatz befinden sich relativ viele Teilnehmer, welche zwar angaben, gewählt zu haben, bei deren Wahlentscheid dann jedoch "NA" aufgeführt ist
length(subset(data_selects_1971_2011, vp1 == "yes" & is.na(vdn1b))$userid) # über alle 10 enthaltenen Wahljahre sind es 1812 Fälle!
length(subset(data_selects_1971_2011, year == 2011 & vp1 == "yes" & is.na(vdn1b))$userid) # 2011 sind es überdurchschnittlich viele: 252 Fälle!

# Turnout-Plot mit error bars
plot_turnout <- plot_ly(type = "scatter",
                        mode = "lines+markers",
                        x = dimnames(results_turnout)[[3]],
                        y = round(results_turnout["2011", "total", , "y_hat"] * 100, digits = 0),
                        name = "2011",
                        marker = list(symbol = "circle-dot", size = 12),
                        line = list(color = "#6600cc", width = 4),
                        opacity = 0.5, hoverinfo = "name+y",
                        legendgroup = "2011",
                        error_y = list(visible = TRUE,
                                       symmetric = FALSE,
                                       color = "#6600cc",
                                       type = "data",
                                       array = round((results_turnout["2011", "total", , "upper_ci_sim"] - results_turnout["2011", "total", , "y_hat"]) * 100, digits = 0),
                                       arrayminus = round((results_turnout["2011", "total", , "y_hat"] - results_turnout["2011", "total", , "lower_ci_sim"]) * 100, digits = 0)))
plot_turnout

# alternativer Result-Total-Plot
plot_decision_total <- plot_ly(subset(results_decision_total_v2, year == 2011 & turnout_mode == "real"), type = "bar", x = party, y = round(result * 100, digits = 1), name = "real 2011", opacity = 0.7, legendgroup = "2011", marker = list(color = "#6600cc"), hoverinfo = "y+name")
plot_decision_total <- add_trace(subset(results_decision_total_v2, year == 2011 & turnout_mode == "gleichmässig"), type = "bar", x = party, y = round(result * 100, digits = 1), name = "gleichmässig 2011", opacity = 0.7, legendgroup = "2011", marker = list(color = "#9933ff"), showlegend = FALSE)
plot_decision_total <- add_trace(subset(results_decision_total_v2, year == 2011 & turnout_mode == "spiegelverkehrt"), type = "bar", x = party, y = round(result * 100, digits = 1), name = "spiegelverkehrt 2011", opacity = 0.7, legendgroup = "2011", marker = list(color = "#cc99ff"), showlegend = FALSE)
plot_decision_total <- add_trace(subset(results_decision_total_v2, year == 2007 & turnout_mode == "real"), type = "bar", x = party, y = round(result * 100, digits = 1), name = "real 2007", opacity = 0.7, legendgroup = "2007", visible = "legendonly", marker = list(color = "#0033cc"))
plot_decision_total <- add_trace(subset(results_decision_total_v2, year == 2007 & turnout_mode == "gleichmässig"), type = "bar", x = party, y = round(result * 100, digits = 1), name = "gleichmässig 2007", opacity = 0.7, legendgroup = "2007", visible = "legendonly", marker = list(color = "#3366ff"), showlegend = FALSE)
plot_decision_total <- add_trace(subset(results_decision_total_v2, year == 2007 & turnout_mode == "spiegelverkehrt"), type = "bar", x = party, y = round(result * 100, digits = 1), name = "spiegelverkehrt 2007", opacity = 0.7, legendgroup = "2007", visible = "legendonly", marker = list(color = "#99b3ff"), showlegend = FALSE)
plot_decision_total <- add_trace(subset(results_decision_total_v2, year == 2003 & turnout_mode == "real"), type = "bar", x = party, y = round(result * 100, digits = 1), name = "real 2003", opacity = 0.7, legendgroup = "2003", visible = "legendonly", marker = list(color = "#0099cc"))
plot_decision_total <- add_trace(subset(results_decision_total_v2, year == 2003 & turnout_mode == "gleichmässig"), type = "bar", x = party, y = round(result * 100, digits = 1), name = "gleichmässig 2003", opacity = 0.7, legendgroup = "2003", visible = "legendonly", marker = list(color = "#33ccff"), showlegend = FALSE)
plot_decision_total <- add_trace(subset(results_decision_total_v2, year == 2003 & turnout_mode == "spiegelverkehrt"), type = "bar", x = party, y = round(result * 100, digits = 1), name = "spiegelverkehrt 2003", opacity = 0.7, legendgroup = "2003", visible = "legendonly", marker = list(color = "#99e6ff"), showlegend = FALSE)
plot_decision_total <- add_trace(subset(results_decision_total_v2, year == 1999 & turnout_mode == "real"), type = "bar", x = party, y = round(result * 100, digits = 1), name = "real 1999", opacity = 0.7, legendgroup = "1999", visible = "legendonly", marker = list(color = "#00cc33"))
plot_decision_total <- add_trace(subset(results_decision_total_v2, year == 1999 & turnout_mode == "gleichmässig"), type = "bar", x = party, y = round(result * 100, digits = 1), name = "gleichmässig 1999", opacity = 0.7, legendgroup = "1999", visible = "legendonly", marker = list(color = "#33ff66"), showlegend = FALSE)
plot_decision_total <- add_trace(subset(results_decision_total_v2, year == 1999 & turnout_mode == "spiegelverkehrt"), type = "bar", x = party, y = round(result * 100, digits = 1), name = "spiegelverkehrt 1999", opacity = 0.7, legendgroup = "1999", visible = "legendonly", marker = list(color = "#99ffb3"), showlegend = FALSE)
plot_decision_total <- add_trace(subset(results_decision_total_v2, year == 1995 & turnout_mode == "real"), type = "bar", x = party, y = round(result * 100, digits = 1), name = "real 1995", opacity = 0.7, legendgroup = "1995", visible = "legendonly", marker = list(color = "#ff9900"))
plot_decision_total <- add_trace(subset(results_decision_total_v2, year == 1995 & turnout_mode == "gleichmässig"), type = "bar", x = party, y = round(result * 100, digits = 1), name = "gleichmässig 1995", opacity = 0.7, legendgroup = "1995", visible = "legendonly", marker = list(color = "#ffc266"), showlegend = FALSE)
plot_decision_total <- add_trace(subset(results_decision_total_v2, year == 1995 & turnout_mode == "spiegelverkehrt"), type = "bar", x = party, y = round(result * 100, digits = 1), name = "spiegelverkehrt 1995", opacity = 0.7, legendgroup = "1995", visible = "legendonly", marker = list(color = "#ffebcc"), showlegend = FALSE)
plot_decision_total <- add_trace(subset(results_decision_total_v2, year == 1991 & turnout_mode == "real"), type = "bar", x = party, y = round(result * 100, digits = 1), name = "real 1991", opacity = 0.7, legendgroup = "1991", visible = "legendonly", marker = list(color = "#cc3300"))
plot_decision_total <- add_trace(subset(results_decision_total_v2, year == 1991 & turnout_mode == "gleichmässig"), type = "bar", x = party, y = round(result * 100, digits = 1), name = "gleichmässig 1991", opacity = 0.7, legendgroup = "1991", visible = "legendonly", marker = list(color = "#ff6633"), showlegend = FALSE)
plot_decision_total <- add_trace(subset(results_decision_total_v2, year == 1991 & turnout_mode == "spiegelverkehrt"), type = "bar", x = party, y = round(result * 100, digits = 1), name = "spiegelverkehrt 1991", opacity = 0.7, legendgroup = "1991", visible = "legendonly", marker = list(color = "#ffb399"), showlegend = FALSE)
plot_decision_total <- layout(p = plot_decision_total, title = "Vorausgesagte Wähleranteile nach Beteiligungsmodus", xaxis = list(title = "", fixedrange = TRUE), yaxis = list(title = "vorausgesagter Wähleranteil in %", zeroline = TRUE), legend = list(traceorder = "grouped"), margin = list(l = 55, r = 55, t = 80, b = 80, pad = 0, autoexpand = TRUE))
plot_decision_total <- config(p = plot_decision_total, displaylogo = FALSE, showLink = FALSE, displayModeBar = FALSE)
plot_decision_total


### test stuff
real_result_SVP_2011 <- (sum(subset(data_selects_1971_2011, year == 2011 & age_category == names(age_category_levels)[1] & !is.na(vp1))$weighttot)
                         * results_decision[as.character(2011), "SVP", names(age_category_levels)[1], "y_hat"]
                         * results_turnout[as.character(2011), "total", names(age_category_levels)[1], "y_hat"]
                         + sum(subset(data_selects_1971_2011, year == 2011 & age_category == names(age_category_levels)[2] & !is.na(vp1))$weighttot)
                         * results_decision[as.character(2011), "SVP", names(age_category_levels)[2], "y_hat"]
                         * results_turnout[as.character(2011), "total", names(age_category_levels)[2], "y_hat"]
                         + sum(subset(data_selects_1971_2011, year == 2011 & age_category == names(age_category_levels)[3] & !is.na(vp1))$weighttot)
                         * results_decision[as.character(2011), "SVP", names(age_category_levels)[3], "y_hat"]
                         * results_turnout[as.character(2011), "total", names(age_category_levels)[3], "y_hat"]
                         + sum(subset(data_selects_1971_2011, year == 2011 & age_category == names(age_category_levels)[4] & !is.na(vp1))$weighttot)
                         * results_decision[as.character(2011), "SVP", names(age_category_levels)[4], "y_hat"]
                         * results_turnout[as.character(2011), "total", names(age_category_levels)[4], "y_hat"]
                         + sum(subset(data_selects_1971_2011, year == 2011 & age_category == names(age_category_levels)[5] & !is.na(vp1))$weighttot)
                         * results_decision[as.character(2011), "SVP", names(age_category_levels)[5], "y_hat"]
                         * results_turnout[as.character(2011), "total", names(age_category_levels)[5], "y_hat"]
                         + sum(subset(data_selects_1971_2011, year == 2011 & age_category == names(age_category_levels)[6] & !is.na(vp1))$weighttot)
                         * results_decision[as.character(2011), "SVP", names(age_category_levels)[6], "y_hat"]
                         * results_turnout[as.character(2011), "total", names(age_category_levels)[6], "y_hat"]
                         + sum(subset(data_selects_1971_2011, year == 2011 & age_category == names(age_category_levels)[7] & !is.na(vp1))$weighttot)
                         * results_decision[as.character(2011), "SVP", names(age_category_levels)[7], "y_hat"]
                         * results_turnout[as.character(2011), "total", names(age_category_levels)[7], "y_hat"]) / sum(subset(data_selects_1971_2011, year == 2011 & vp1 == "yes")$weighttot)

data_selects_1971_2011$dummy_u25 <- ifelse(data_selects_1971_2011$age < 25, 1, 0)
model_2011_SVP_test <- svyglm(dummy_SVP ~ dummy_u25, design = design_selects_1971_2011_decision, subset = (year == 2011 & !is.na(vdn1b)), family = quasibinomial(link = "probit"))
alloc_u25_svp <- c(1, 1)
ystar_u25_svp <- coef(model_2011_SVP_test) %*% alloc_u25_svp
yhat_u25_svp <- pnorm(ystar_u25_svp)
random_draws_u25_svp <- mvrnorm(10000, coef(model_2011_SVP_test), vcov(model_2011_SVP_test))
random_draws_ystar_u25_svp <- random_draws_u25_svp %*% alloc_u25_svp
random_draws_yhat_u25_svp <- pnorm(random_draws_ystar_u25_svp)
alloc_25_34_svp <- c(1, 0, 1, 0, 0, 0, 0)
ystar_25_34_svp <- coef(model_2011_SVP_test) %*% alloc_25_34_svp
yhat_25_34_svp <- pnorm(ystar_25_34_svp)
random_draws_25_34_svp <- mvrnorm(10000, coef(model_2011_SVP_test), vcov(model_2011_SVP_test))
random_draws_ystar_25_34_svp <- random_draws_25_34_svp %*% alloc_25_34_svp
random_draws_yhat_25_34_svp <- pnorm(random_draws_ystar_25_34_svp)
random_draws_diff <- random_draws_yhat_u25_svp - random_draws_yhat_25_34_svp
print(paste("Die beiden Wahrscheinlichkeiten sind auf dem 95%-Konfidenzniveau", ifelse ( sort(random_draws_diff)[250] * sort(random_draws_diff)[9750] > 0, "", "NICHT"), "signifikant unterschiedlich"))