/TwitterParlPrint.R Secret

## TwitterParlPrint.R
# Copyright (C) 2016  Salim Brüggemann

# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# To get a copy of the GNU General Public License see <http://www.gnu.org/licenses/>.

###############################################################################
# Blogeintrag im UZH-Seminar "Politischer Datenjournalismus" 2016: http://pwipdm.uzh.ch/wordpress/?p=6504
# Autor: Salim Brüggemann, 08-915-126
###############################################################################

remove(list = ls(all = TRUE))

setwd("~/Dokumente/Ausbildung/Studium UZH/Master/2. Semester/Politischer Datenjournalismus/Blogeinträge/2. Blogeintrag")
library(RMySQL)
library(dplyr)
library(jsonlite)
library(RCurl)
library(twitteR)
library(ggplot2)
library(viridis)
library(plotly)


# Brunos Datensätze laden (aus MySQL-Datenbank, falls lokal nicht vorhanden)
if ( file.exists("data_bruno.Rda") ) {
  load(file = "data_bruno.Rda")
} else {
  # Brunos Schweizerpolitik-Twitterkonten-Datensatz aus Datenbank laden
  conn <- dbConnect(MySQL(),
                    user = "slim",
                    pass = "KydEmnvE",
                    host = "pwipdm.uzh.ch",
                    dbname = "twitter")

  dbGetQuery(conn,
             "SET NAMES 'utf8'")

  twitter_accounts_bruno <- dbGetQuery(conn,
                                       "SELECT * FROM `accounts`")

  dbDisconnect(conn)

  # problematischen Spaltennamen ("function") ändern
  colnames(twitter_accounts_bruno) <- gsub(pattern = "function",
                                           replacement = "role",
                                           x = colnames(twitter_accounts_bruno))

  # Brunos Parlamentsreden-Datensatz aus Datenbank laden
  conn <- dbConnect(MySQL(),
                    user = "slim",
                    pass = "KydEmnvE",
                    host = "pwipdm.uzh.ch",
                    dbname = "parliamentCH")

  dbGetQuery(conn,
             "SET NAMES 'utf8'")

  parliament_speeches_bruno <- dbGetQuery(conn,
                                          "SELECT * FROM `RedenBV`")

  dbDisconnect(conn)

  # Brunos Datensätze lokal speichern
  save(twitter_accounts_bruno,
       parliament_speeches_bruno,
       file = "data_bruno.Rda",
       compress = "xz",
       compression_level = 9)
}

# Abstimmungsmonitor-Daten einlesen
Abstimmungsvorlagen <- read.csv(file = "Data_Abstimmungsmonitor.csv",
                                sep = ";",
                                stringsAsFactors = FALSE)

# Beobachtungszeitraum-Daten in richtiges Format wandeln
Abstimmungsvorlagen$Beobachtungszeitraum_Start <- as.Date(Abstimmungsvorlagen$Beobachtungszeitraum_Start,
                                                          format = "%d.%m.%Y",
                                                          tz = "Europe/Zurich")
Abstimmungsvorlagen$Beobachtungszeitraum_Ende <- as.Date(Abstimmungsvorlagen$Beobachtungszeitraum_Ende,
                                                         format = "%d.%m.%Y",
                                                         tz = "Europe/Zurich")

# Tweets der Politiker laden (alle Tweets der Twitter-Konten aus Brunos Schweizerpolitik-Datensatz)
load(file = "twitter_scrape_job.Rda")

# neues dataframe erstellen, das nur Tweets von Bundesparlamentariern enthält
tweets_federal_assembly <- subset(tweets, twitterUserID %in% twitter_accounts_bruno$twitterID[twitter_accounts_bruno$level =="national" & twitter_accounts_bruno$role=="legislative"])

# Tweets nach Datum sortieren
tweets <- arrange(tweets, created)
tweets_federal_assembly <- arrange(tweets_federal_assembly, created)


# reguläre Ausdrücke für die Zuordnung der Tweets zu den Abstimmungsvorlagen definieren (nur Volksinitiativen)
Abstimmungsvorlagen$Regex_Twitter <- NA

Abstimmungsvorlagen$Regex_Twitter[Abstimmungsvorlagen$Abstimmung=="Abzockerinitiative"] <- paste("(?i)abzocker",
                                                                                                 "(?i)goldener?( |_|-)?fallschirm",
                                                                                                 "(?i)abgangs?( |_|-)?entschädigung",
                                                                                                 "(?i)aktionärs?( |_|-)?recht",
                                                                                                 "(?i)(?=.*minder)(?=.*(initia|abstimm|vorlage|abst13|votenow|chvote))",
                                                                                                 sep = ",")

Abstimmungsvorlagen$Regex_Twitter[Abstimmungsvorlagen$Abstimmung=="Volkswahl Bundesrat"] <- paste("(?i)(?=.*volk)(?=.*bundesr(a|ä)t)",
                                                                                                  "(?i)volks(-|_)?wahl",
                                                                                                  sep = ",")

Abstimmungsvorlagen$Regex_Twitter[Abstimmungsvorlagen$Abstimmung=="Abschaffung Wehrpflicht"] <- paste("(?i)wehr(-|_| )?pflicht",
                                                                                                      "(?i)unsicherheits( |_|-)?initiative",
                                                                                                      "(?i)(?=.*(militär|armee|streit(kräfte|macht)|miliz))(?=.*(dienst|pflicht|initia|abstimm|vorlage|abst13|votenow|chvote))",
                                                                                                      "(?i)(?=.*gsoa)(?=.*(initia|abstimm|vorlage))",
                                                                                                      sep = ",")

Abstimmungsvorlagen$Regex_Twitter[Abstimmungsvorlagen$Abstimmung=="Steuerabzug für Betreuung eigener Kinder"] <- paste("(?i)(?=.*(familien?|\\bbetreu))(?=.*(initia|abstimm|vorlage))",
                                                                                                                       "(?i)(eigen|selbst|fremd)(-|_| )?betreu",
                                                                                                                       "(?i)(?=.*steuer)(?=.*abz(ug|üg|iehen))(?=.*(kind|eltern|familie|betreu))",
                                                                                                                       sep = ",")

Abstimmungsvorlagen$Regex_Twitter[Abstimmungsvorlagen$Abstimmung=="1:12-Initiative"] <- paste("\\b(1|eins)( |_|-)?(:|zu)( |_|-)?(12|zw(ö|oe?)lf)\\b",
                                                                                              "(?i)(?=.*juso)(?=.*(initia|abstimm|vorlage))",
                                                                                              "(?i)(?=.*(faire|gerechte|gleiche))(?=.*(löhne|lohn))",
                                                                                              "(?i)lohn(diktat|(be)?grenz|exzess)",
                                                                                              sep = ",")

Abstimmungsvorlagen$Regex_Twitter[Abstimmungsvorlagen$Abstimmung=="Masseneinwanderungsinitiative"] <- paste("(?i)(ein|zu)wanderung",
                                                                                                            "(?i)\\bmei(ja|nei)?\\b",
                                                                                                            "(?i)(kontingente|personen-?freizügigkeit|freizügigkeits?-?abkommen|bilaterale|abschottung)",
                                                                                                            "(?i)freier( |_|-)?personenverkehr",
                                                                                                            "(?i)svp(-|_)?(initia|vorlage)",
                                                                                                            sep = ",")

Abstimmungsvorlagen$Regex_Twitter[Abstimmungsvorlagen$Abstimmung=="Keine öffentliche Abtreibungsfinanzierung"] <- paste("(?i)\\babtreib",
                                                                                                                        "(?i)lebens?(schutz|recht)",
                                                                                                                        "(?i)\\babort(ion)?\\b",
                                                                                                                        "(?i)(?=.*schwanger)(?=.*abbr(uch|echen))",
                                                                                                                        "(?i)(?=.*schwanger)(?=.*\\b(breche|bricht))(?=.*\\bab\\b)",
                                                                                                                        sep = ",")

Abstimmungsvorlagen$Regex_Twitter[Abstimmungsvorlagen$Abstimmung=="Mindestlohninitiative"] <- paste("(?i)mindest(-|_)?lohn",
                                                                                                    "(?i)milo14",
                                                                                                    "(?i)\\bmilo\\b",
                                                                                                    "(?i)(?=.*(dump(en|ing)|faire|gerechte|\\bgleichh?e|sch(u|ü)tz|initia|abstimm|vorlage|gesetz|abst14|votenow|chvote))(?=.*(löhne|lohn|gehalt|salär|vergütung))(?!.*grossrat)",
                                                                                                    "(?i)lohn(-| |_)?(minimum|(an)?gleich|diktat|wucher|schlichtung|mafia|schutz|(unter)?grenz)",
                                                                                                    "(?i)(minimal|minimum|tief|niedrig)(-| |_)?lohn",
                                                                                                    "(?i)minimal(-| |_)?gehalt",
                                                                                                    sep = ",")

Abstimmungsvorlagen$Regex_Twitter[Abstimmungsvorlagen$Abstimmung=="Berufsverbot Pädophile"] <- paste("(?i)\\bp(ä|a)?e?do(-|_| )?(phil|sex|14|nei|ja)",
                                                                                                     "(?i)(?=.*\\bsex)(?=.*(kinder|p(ä|a)?e?do))",
                                                                                                     "(?i)kinder(-|_)?sex",
                                                                                                     "(?i)(?=.*(pädo|marche( |_|-)?blanche|bussat))(?=.*(initia|abstimm|vorlage|gesetz|abst14|votenow|chvote))",
                                                                                                     sep = ",")

Abstimmungsvorlagen$Regex_Twitter[Abstimmungsvorlagen$Abstimmung=="Einheitskasse"] <- paste("(?i)(\\beinheits?|\\böffentliche|staatliche|28(\\. ?| )?sep|sozialistisch|risikoselektion|gute risiken|wahlfreiheit|initia|abstimm|vorlage|gesetz|abst14|votenow|chvote)(?=.*((kranken(-?pflege)?|grund)-?versicherung|kranken-?kasse|\\bkks?\\b))",
                                                                                            "(?i)\\b(oe|ö)(ff)?kk",
                                                                                            "(?i)\\behk(ja|nei)?\\b",
                                                                                            "(?i)krankenkasse",
                                                                                            "(?i)prämien(explosion|zahle)",
                                                                                            sep = ",")

Abstimmungsvorlagen$Regex_Twitter[Abstimmungsvorlagen$Abstimmung=="MwSt-Senkung Gastgewerbe"] <- paste("(?i)(?=.*(mwst|mehrwertsteuer))(?=.*(diskriminierung|einheitlich))",
                                                                                                       "(?i)(mwst|mehrwertsteuer)( |_|-)?(volks?)?(initia|abstimm|vorlage|gesetz)",
                                                                                                       "(?i)(?=.*bratw(u|ü)rst)(?=.*(mehrwertsteuer|mwst|diskriminierung|initia|abstimm|vorlage|abst14|votenow|chvote))",
                                                                                                       "(?i)(?=.*(gast(ro|gewerbe|hof|wirtschaft|stätte|h(a|ä)us)|restaurant|imbiss|döner|fast-?food|take-?away))(?=.*(mehrwertsteuer|mwst|diskriminierung|steuer|initia|abstimm|vorlage|abst14|votenow|chvote))",
                                                                                                       sep = ",")

Abstimmungsvorlagen$Regex_Twitter[Abstimmungsvorlagen$Abstimmung=="Ecopop-Initiative"] <- paste("(?i)e(c|g)o(p|fl)(o|ö)p",
                                                                                                "(?i)(ein|zu)wanderung",
                                                                                                "(?i)(?=.*umwelt)(?=.*ausländ)",
                                                                                                "(?i)überbevölkerung",
                                                                                                "(?i)(?=.*(sicher(n|ung)|erhalt|bewahrung))(?=.*natürlich)(?=.*lebensgrundlagen?)",
                                                                                                sep = ",")

Abstimmungsvorlagen$Regex_Twitter[Abstimmungsvorlagen$Abstimmung=="Abschaffung Pauschalbesteuerung"] <- paste("(?i)(?=.*pauschal)(?=.*steuer)",
                                                                                                              "(?i)(?=.*steuer)(?=.*nach aufwand)",
                                                                                                              "(?i)steuerprivileg",
                                                                                                              "(?i)(?=.*(andere gesetze|sonderr?egel|privileg|(sonder|vor)-?recht|extrawurst|(spezial|sonder)-?behandlung))(?=.*(betucht|gut-?situiert|millionär|\\breiche?n?\\b|milliardär|\\bvermögend|besser( -)?gestellt|finanz(kräftig|stark)|wohlhabend|zahlungskräftig))",
                                                                                                              "(?i)(?=.*(steuer|initia|abstimm|vorlage|abst14|votenow|chvote))(?=.*((aus|fremd)l(ä|a)nd|fremd(staat|stämm)|auswärtig))(?=.*(betucht|gut-?situiert|millionär|\\breiche?n?\\b|milliardär|\\bvermögend|besser( -)?gestellt|finanz(kräftig|stark)|wohl-?(habend|situiert)|zahlungskräftig))",
                                                                                                              "(?i)(?=.*(steuer|initia|abstimm|vorlage|abst14|votenow|chvote))(?=.*gerecht)(?=.*((aus|fremd)l(ä|a)nd|fremd(staat|stämm)|auswärtig|betucht|gut-?situiert|millionär|\\breiche?n?\\b|milliardär|\\bvermögend|besser( -)?gestellt|finanz(kräftig|stark)|wohl-?(habend|situiert)|zahlungskräftig))",
                                                                                                              sep = ",")

Abstimmungsvorlagen$Regex_Twitter[Abstimmungsvorlagen$Abstimmung=="Gold-Initiative"] <- paste("(?i)(?=.*\\bgold\\b)(?=.*(initia|abstimm|vorlage|nationalbank|\\bsnb|währung|reserven|rett(et|ung)|\\bjordan\\b|abst14|votenow|chvote))",
                                                                                              "(?i)gold(initia|abstimm|vorlage|währung|reserve|rett|preis)",
                                                                                              "(?i)(national(bank)?|snb)gold",
                                                                                              sep = ",")

Abstimmungsvorlagen$Regex_Twitter[Abstimmungsvorlagen$Abstimmung=="Steuerfreie Kinder- und Ausbildungszulagen"] <- paste("(?i)(?=.*familie)(?=.*(initia|abstimm|vorlage|abst15|votenow|chvote))",
                                                                                                                         "(?i)(?=.*steuer(-)?frei)(?=.*(zulagen|familie))",
                                                                                                                         "(?i)kinder(-|_)?zulagen",
                                                                                                                         "(?i)ausbildungs?(-|_)?zulagen",
                                                                                                                         "(?i)cvp(-|_)?(initia|vorlage)",
                                                                                                                         sep = ",")

Abstimmungsvorlagen$Regex_Twitter[Abstimmungsvorlagen$Abstimmung=="Energie- statt Mehrwertsteuer"] <- paste("(?i)\\bviesm",
                                                                                                            "(?i)\\bESM(ja|nei)?\\b",
                                                                                                            "\\benerg15",
                                                                                                            "(?i)(?=.*energie)(?=.*(steuer|mwst|lenkungsabgabe))",
                                                                                                            "(?i)energie(-|_)?abgabe",
                                                                                                            "(?i)(?=.*(mwst|mehrwerts?steuer|lenkungsabgabe))(?=.*(initia|abstimm|vorlage|abst15|votenow|chvote))",
                                                                                                            "(?i)(glp|gr(u|ü)nliberale?)(-|_)?(initia|vorlage)",
                                                                                                            sep = ",")

Abstimmungsvorlagen$Regex_Twitter[Abstimmungsvorlagen$Abstimmung=="Erbschaftssteuerinitiative"] <- paste("(?i)(?=.*((erb|hinterlassen)schaft|\\b(ent|ver|be)?erben?))(?=.*steuer)",
                                                                                                         "(?i)erbsteuer",
                                                                                                         sep = ",")

Abstimmungsvorlagen$Regex_Twitter[Abstimmungsvorlagen$Abstimmung=="Stipendieninitiative"] <- paste("(?i)\\bstip(endi|in)",
                                                                                                   "(?i)(?=.*stud(ent|ium))(?=.*(geld|finanzier|kosten|einkommen|chancen))",
                                                                                                   "(?i)bildungs?chancen",
                                                                                                   sep = ",")

Abstimmungsvorlagen$Regex_Twitter[Abstimmungsvorlagen$Abstimmung=="Durchsetzungsinitiative"] <- paste("(?i)\\bdsi(ja|nei)",
                                                                                                      "(?i)\\bdsi\\b",
                                                                                                      "(?i)HandstandFürDenAnstand",
                                                                                                      "(?i)entrechtung",
                                                                                                      "(?i)durchzwängung",
                                                                                                      "(?i)aus(ge)?s?chaff?(t|ung|en)",
                                                                                                      "(?i)krimineller? ausländer",
                                                                                                      "(?i)(?=.*(automatismus|durchsetzung|zwängerei))(?=.*(\\bsvp\\b|menschenrecht|\\bemrk\\b|initia|abstimm|vorlage|abst16|votenow|chvote|28(\\. ?| )?feb|rechts?staat|richter|verschärfung|\\b#?ASI\\b))",
                                                                                                      "(?i)(?=.*(rechts?staat|richter|verschärfung|\\bASI\\b))(?=.*(initia|abstimm|vorlage|abst16|votenow|chvote|28(\\. ?| )?feb))",
                                                                                                      "(?i)(?=.*(\\bASI\\b|automatismus))(?=.*(um|durch)(ge)?setz)",
                                                                                                      sep = ",")

Abstimmungsvorlagen$Regex_Twitter[Abstimmungsvorlagen$Abstimmung=="Für Ehe und Familie – gegen die Heiratsstrafe"] <- paste("(?i)(heirat|konkubinat)s?(strafe|bonus)",
                                                                                                                            "(?i)cvp(-_)?initiative",
                                                                                                                            "(?)ehe( |-|-)?für( |-|-)?alle",
                                                                                                                            "(?i)gemeinsam( |-|_)?weiter",
                                                                                                                            "(?i)individual(-|_)besteuerung",
                                                                                                                            "(?i)ehe(-|_)verbot",
                                                                                                                            "(?i)(?=.*(heirat|\\behe(paar)?\\b))(?=.*(initia|abstimm|vorlage|abst16|votenow|chvote|28(\\. ?| )?feb))",
                                                                                                                            sep = ",")

Abstimmungsvorlagen$Regex_Twitter[Abstimmungsvorlagen$Abstimmung=="Verbot Nahrungsmittelspekulation"] <- paste("(?i)(?=.*(nahrung|lebensmittel|ess(en|waren)|food|fressalien|hunger|abst16|votenow|chvote|28(\\. ?| )?feb|gameover))(?=.*spekul)",
                                                                                                               "(?i)speku(lations?)?(_|-)?stop",
                                                                                                               "(?i)(#gameover|gegenhungermacher|SpieltNichtMitEssen)",
                                                                                                               sep = ",")

# Tweets nach Treffern zu den jeweiligen Abstimmungen durchforsten (in den Beobachtungszeiträumen des fög-Abstimmungsmonitors)
Abstimmungsvorlagen$Tweets_Fundus_Anzahl <- NA
Abstimmungsvorlagen$Tweets_Fundus_Anzahl_Bundesversammlung <- NA
Abstimmungsvorlagen$Tweets_Treffer_Indizes <- NA
Abstimmungsvorlagen$Tweets_Treffer_Indizes_Bundesversammlung <- NA
Abstimmungsvorlagen$Tweets_Treffer_Anzahl <- NA
Abstimmungsvorlagen$Tweets_Treffer_Anzahl_Bundesversammlung <- NA

for ( Abstimmung in Abstimmungsvorlagen$Abstimmung ) {

  # Analysezeitraum für aktuelle Abstimmung bestimmen
  index_observation_period_start <- min(which(as.Date(tweets$created)==Abstimmungsvorlagen$Beobachtungszeitraum_Start[Abstimmungsvorlagen$Abstimmung==Abstimmung]))
  index_observation_period_start_federal_assembly <- min(which(as.Date(tweets_federal_assembly$created)==Abstimmungsvorlagen$Beobachtungszeitraum_Start[Abstimmungsvorlagen$Abstimmung==Abstimmung]))
  index_observation_period_end <- max(which(as.Date(tweets$created)==Abstimmungsvorlagen$Beobachtungszeitraum_Ende[Abstimmungsvorlagen$Abstimmung==Abstimmung]))
  index_observation_period_end_federal_assembly <- max(which(as.Date(tweets_federal_assembly$created)==Abstimmungsvorlagen$Beobachtungszeitraum_Ende[Abstimmungsvorlagen$Abstimmung==Abstimmung]))

  # Anzahl Tweets zählen, die in den Analysezeitraum fallen
  Abstimmungsvorlagen$Tweets_Fundus_Anzahl[Abstimmungsvorlagen$Abstimmung==Abstimmung] <- length(tweets$text[index_observation_period_start:index_observation_period_end])
  Abstimmungsvorlagen$Tweets_Fundus_Anzahl_Bundesversammlung[Abstimmungsvorlagen$Abstimmung==Abstimmung] <- length(tweets_federal_assembly$text[index_observation_period_start_federal_assembly:index_observation_period_end_federal_assembly])

  # Treffer zählen
  regexs <- unlist(strsplit(Abstimmungsvorlagen$Regex_Twitter[Abstimmungsvorlagen$Abstimmung==Abstimmung], split = ","))
  hits <- c()
  hits_federal_assembly <- c()
  for ( regex in regexs ) {
    hits <- union(hits,
                  grep(pattern = regex,
                       x = tweets$text[index_observation_period_start:index_observation_period_end],
                       ignore.case = FALSE,
                       perl = TRUE))
    hits_federal_assembly <- union(hits_federal_assembly,
                                   grep(pattern = regex,
                                        x = tweets_federal_assembly$text[index_observation_period_start_federal_assembly:index_observation_period_end_federal_assembly],
                                        ignore.case = FALSE,
                                        perl = TRUE))
  }
  Abstimmungsvorlagen$Tweets_Treffer_Indizes[Abstimmungsvorlagen$Abstimmung==Abstimmung] <- paste0(hits + index_observation_period_start - 1, collapse = ",")
  Abstimmungsvorlagen$Tweets_Treffer_Indizes_Bundesversammlung[Abstimmungsvorlagen$Abstimmung==Abstimmung] <- paste0(hits_federal_assembly + index_observation_period_start_federal_assembly - 1, collapse = ",")
  Abstimmungsvorlagen$Tweets_Treffer_Anzahl[Abstimmungsvorlagen$Abstimmung==Abstimmung] <- length(hits)
  Abstimmungsvorlagen$Tweets_Treffer_Anzahl_Bundesversammlung[Abstimmungsvorlagen$Abstimmung==Abstimmung] <- length(hits_federal_assembly)
}


# Parlamentarier-Datensatz über Parlaments-API herunterladen, um danach Abstimmungsverhalten den Vorlagen zuordnen zu können
parliamentarians <- fromJSON(txt = "http://ws-old.parlament.ch/councillors?format=json", flatten = TRUE)
page_counter <- 1
while ( isTRUE(parliamentarians$hasMorePages[length(parliamentarians[, 1])]) ) {
  page_counter <- page_counter + 1
  parliamentarians <- rbind(parliamentarians,
                            fromJSON(txt = paste0("http://ws-old.parlament.ch/councillors",
                                                  "?pageNumber=",
                                                  page_counter,
                                                  "&format=json"),
                                     flatten = TRUE))
}

# Funktion definieren zum Abrufen parlamentarischer Abstimmungen über die Parlaments-API
get_votes <- function(url) {

  # Workaround, damit die fromJSON()-Funktion bei inexistenten Seiten nicht unzählige offene Verbindungen zurücklässt (und schliesslich abbricht)
  if ( length(grep(pattern = "404 - File or directory not found", x = getURL(url))) == 0 ) {

    # Abstimmungen abrufen
    votes <- fromJSON(txt = paste0(url,
                                   ifelse(length(grep(pattern = "\\?", x = url)) > 0, "&", "?"),
                                   "format=json"),
                      flatten = TRUE)$affairVotes
    page_counter <- 1
    while ( isTRUE(votes$hasMorePages[length(votes[, 1])]) ) {
      page_counter <- page_counter + 1
      votes <- rbind(votes, fromJSON(txt = paste0(url,
                                                  ifelse(length(grep(pattern = "\\?", x = url)) > 0, "&", "?"),
                                                  "pageNumber=",
                                                  page_counter,
                                                  "&format=json"),
                                     flatten = TRUE)$affairVotes)
    }
    # NULL zurückgeben, falls Seite inexistent
  } else votes <- NULL

  return(votes)
}

# Indizes und Anzahl der Parlamentsreden zu den verschiedenen Abstimmungsvorlagen zählen sowie IDs und Abstimmungsverhalten der jeweiligen Parlamentarier erfassen (dauert ca. 2h, daher lokale Ergebnisse laden, falls bereits erledigt)
if ( file.exists("Abstimmungsvorlagen.Rda") ) {
  remove(Abstimmungsvorlagen)
  load(file = "Abstimmungsvorlagen.Rda")
} else {
  # Indizes und Anzahl der Parlamentsreden zu den verschiedenen Abstimmungsvorlagen zählen sowie IDs und Abstimmungsverhalten der jeweiligen Parlamentarier erfassen
  Abstimmungsvorlagen$Parlamentsreden_Indizes <- NA
  Abstimmungsvorlagen$Parlamentsreden_Anzahl <- NA
  Abstimmungsvorlagen$Parlamentsreden_Kerngeschäft_Indizes <- NA
  Abstimmungsvorlagen$Parlamentsreden_Kerngeschäft_Anzahl <- NA
  Abstimmungsvorlagen$Parlamentsreden_Kerngeschäft_Überhang <- NA
  Abstimmungsvorlagen$Parlamentarier_IDs_Kerngeschäft <- NA
  Abstimmungsvorlagen$Parlamentarier_IDs_Kerngeschäft_Pro <- NA
  Abstimmungsvorlagen$Parlamentarier_IDs_Kerngeschäft_Contra <- NA

  for ( Abstimmung in Abstimmungsvorlagen$Abstimmung ) {

    print(paste0("Verarbeite Abstimmung ", which(Abstimmungsvorlagen$Abstimmung == Abstimmung), " von ", length(Abstimmungsvorlagen$Abstimmung), ": ", Abstimmung))

    curia_vista_IDs <- unlist(strsplit(Abstimmungsvorlagen$Curia_Vista_IDs[Abstimmungsvorlagen$Abstimmung==Abstimmung], split = ","))

    hits <- c()
    for ( ID in curia_vista_IDs ) {
      hits <- c(hits,
                which(parliament_speeches_bruno$curiaVista==ID))
    }
    Abstimmungsvorlagen$Parlamentsreden_Indizes[Abstimmungsvorlagen$Abstimmung==Abstimmung] <- paste0(hits, collapse = ",")
    Abstimmungsvorlagen$Parlamentsreden_Anzahl[Abstimmungsvorlagen$Abstimmung==Abstimmung] <- length(hits)

    hits_core_affair <- which(parliament_speeches_bruno$curiaVista==curia_vista_IDs[1])
    Abstimmungsvorlagen$Parlamentsreden_Kerngeschäft_Indizes[Abstimmungsvorlagen$Abstimmung==Abstimmung] <- paste0(hits_core_affair, collapse = ",")
    Abstimmungsvorlagen$Parlamentsreden_Kerngeschäft_Anzahl[Abstimmungsvorlagen$Abstimmung==Abstimmung] <- length(hits_core_affair)

    parliamentarian_IDs <- unique(parliament_speeches_bruno$personID[hits_core_affair])
    Abstimmungsvorlagen$Parlamentarier_IDs_Kerngeschäft[Abstimmungsvorlagen$Abstimmung==Abstimmung] <- paste0(parliamentarian_IDs, collapse = ",")

    is_initiative <- ifelse(Abstimmungsvorlagen$Vorlagentyp[Abstimmungsvorlagen$Abstimmung==Abstimmung] == "I", TRUE, FALSE)
    parliamentarian_IDs_pro <- c()
    parliamentarian_IDs_contra <- c()

    affair_ID <- unlist(strsplit(curia_vista_IDs[1], split = "\\."))
    affair_ID[1] <- paste0(ifelse(as.numeric(affair_ID[1]) < 80, "20", "19"), affair_ID[1])
    affair_ID[2] <- paste0(ifelse(nchar(affair_ID[2]) < 4, "0", ""), affair_ID[2])
    affair_ID <- paste0(affair_ID, collapse = "")

    votes_overview <- get_votes(url = paste0("http://ws-old.parlament.ch/votes/affairs/", affair_ID))
    final_votes_indices <- grep(pattern = "(?i)(schlussabstimmung|vote final)", x = votes_overview$divisionText)

    if ( length(final_votes_indices) == 1 ) {
      final_vote_ID <- votes_overview$id[final_votes_indices]
    } else {
      # Spezialfälle behandeln (Geschäfte mit mehreren Schlussabstimmungen)
      final_vote_ID <- switch ( Abstimmung,
                                "Revision Asylgesetz" = 12156,
                                "Preiserhöhung Autobahnvignette" = 12503,
                                "Finanzierung Eisenbahninfrastruktur (FABI)" = 12828,
                                "Berufsverbot Pädophile" = 13324,
                                "Hausarztmedizin" = 13143,
                                "Präimplantationsdiagnostik" = 15050,
                                "Stipendieninitiative" = 15052,
                                "Für Ehe und Familie – gegen die Heiratsstrafe" = 15942,
                                NULL )
    }
    # Abstimmungsverhalten für alle Parlamentarier ermitteln, die eine zugehörige Rede hielten
    for ( ID in parliamentarian_IDs ) {
      if ( ID %in% parliamentarians$id ) {
        print(paste0("   RednerIn ", which(parliamentarian_IDs == ID), " von ", length(parliamentarian_IDs), ": ", parliamentarians$firstName[parliamentarians$id == ID], " ", parliamentarians$lastName[parliamentarians$id == ID]))
        parliamentarian_nr <- parliamentarians$number[parliamentarians$id==ID]
        votes <- get_votes(url = paste0("http://ws-old.parlament.ch/votes/councillors/",
                                        parliamentarian_nr,
                                        "?affairNumberFilter=",
                                        affair_ID))

        # Prüfen, ob RednerIn bei diesem Geschäft gestimmt hat
        if ( !is.null(votes) ) {
          print("      ...hat bei diesem Geschäft gestimmt")
          if ( isTRUE(final_vote_ID %in% votes$id) ) {
            final_vote_target_index <- which(votes$id == final_vote_ID)
            if ( votes$councillorVote.decision[final_vote_target_index] == "No" ) {
              if ( is_initiative ) {
                parliamentarian_IDs_pro <- c(parliamentarian_IDs_pro, ID)
                print("      ...und zwar FÜR die Vorlage")
              } else {
                parliamentarian_IDs_contra <- c(parliamentarian_IDs_contra, ID)
                print("      ...und zwar GEGEN die Vorlage")
              }
            } else if ( votes$councillorVote.decision[final_vote_target_index] == "Yes" ) {
              if ( is_initiative ) {
                parliamentarian_IDs_contra <- c(parliamentarian_IDs_contra, ID)
                print("      ...und zwar GEGEN die Vorlage")
              } else {
                parliamentarian_IDs_pro <- c(parliamentarian_IDs_pro, ID)
                print("      ...und zwar FÜR die Vorlage")
              }
            } else print("      ...aber Stimmverhalten ist unbekannt (Stimme enthalten, war abwesend, etc.)")
          } else print("      ...jedoch bei der Schlussabstimmung NICHT teilgenommen")
        } else print("      ...hat bei diesem Geschäft NICHT gestimmt")
      }
    }
    Abstimmungsvorlagen$Parlamentarier_IDs_Kerngeschäft_Pro[Abstimmungsvorlagen$Abstimmung==Abstimmung] <- paste0(parliamentarian_IDs_pro, collapse = ",")
    Abstimmungsvorlagen$Parlamentarier_IDs_Kerngeschäft_Contra[Abstimmungsvorlagen$Abstimmung==Abstimmung] <- paste0(parliamentarian_IDs_contra, collapse = ",")

    speeches_count_pro <- length(subset(parliament_speeches_bruno[hits_core_affair, ], personID %in% parliamentarian_IDs_pro)[, 1])
    speeches_count_contra <- length(subset(parliament_speeches_bruno[hits_core_affair, ], personID %in% parliamentarian_IDs_contra)[, 1])
    Abstimmungsvorlagen$Parlamentsreden_Kerngeschäft_Überhang[Abstimmungsvorlagen$Abstimmung==Abstimmung] <- speeches_count_pro - speeches_count_contra
  }

  # Tonalität (Überhang) der Parlamentsreden umrechnen analog zum fög-Abstimmungsmonitor (siehe http://www.foeg.uzh.ch/dam/jcr:df8fd8af-1a2b-4677-acaf-8eecf0597c41/Abstimmungsmonitor_Februar_2016.pdf)
  Abstimmungsvorlagen$Parlamentsreden_Kerngeschäft_Tonalität <- Abstimmungsvorlagen$Parlamentsreden_Kerngeschäft_Überhang / Abstimmungsvorlagen$Parlamentsreden_Kerngeschäft_Anzahl * 100

  # Ergebnis in lokale Datei speichern
  save(Abstimmungsvorlagen,
       file = "Abstimmungsvorlagen.Rda")
}


# Tweets-Fundus-Minima und -Maxima zählen
length(tweets$text)
length(tweets_federal_assembly$text)
min(Abstimmungsvorlagen$Tweets_Fundus_Anzahl)
min(Abstimmungsvorlagen$Tweets_Fundus_Anzahl_Bundesversammlung)
max(Abstimmungsvorlagen$Tweets_Fundus_Anzahl)
max(Abstimmungsvorlagen$Tweets_Fundus_Anzahl_Bundesversammlung)

# Berechnen, bei wievielen der Accounts, von denen aufgrund des 3200er-Limits nicht alle Tweets abgerufen werden konnten, der älteste abgerufene Tweet jünger ist als die älteste untersuchte Initiative
incompletely_covered_IDs <- c()
minimum_observation_date <- min(Abstimmungsvorlagen$Beobachtungszeitraum_Start)

for ( ID in busy_IDs ) {
  ID_tweets <- subset(tweets, twitterUserID == ID)

  if ( length(ID_tweets[, 1]) > 0 && as.Date(min(ID_tweets$created)) > minimum_observation_date ) {
    incompletely_covered_IDs <- c(ID, incompletely_covered_IDs)
  }
}
length(incompletely_covered_IDs)


# Pearson-Korrelationskoeffizienten berechnen
Initiativen <- subset(Abstimmungsvorlagen, Vorlagentyp == "I")
with(Initiativen, cor.test(Printbeiträge_Anzahl, Tweets_Treffer_Anzahl)) # hohe Korrelation!!!
with(Initiativen, cor.test(Printbeiträge_Anzahl, Parlamentsreden_Anzahl))
with(Initiativen, cor.test(Printbeiträge_Anzahl, Parlamentsreden_Kerngeschäft_Anzahl))
with(Initiativen, cor.test(Tweets_Treffer_Anzahl, Parlamentsreden_Anzahl))
with(Initiativen, cor.test(Tweets_Treffer_Anzahl, Parlamentsreden_Kerngeschäft_Anzahl))
with(Initiativen, cor.test(Parlamentsreden_Kerngeschäft_Tonalität, Printbeiträge_Tonalität)) # mässige Korrelation, aber ..
with(Abstimmungsvorlagen, cor.test(Parlamentsreden_Kerngeschäft_Tonalität, Printbeiträge_Tonalität)) # ... unter Einbezug der Referenden erhöht sie sich deutlich!!
with(Initiativen, cor.test(Tweets_Treffer_Anzahl, Parlamentsreden_Kerngeschäft_Tonalität))
with(Initiativen, cor.test(Tweets_Treffer_Anzahl, Printbeiträge_Tonalität))
with(Initiativen, cor.test(Parlamentsreden_Kerngeschäft_Tonalität, Zustimmung))
with(Initiativen, cor.test(Printbeiträge_Tonalität, Zustimmung)) # eher dürftig, aber signifikant
with(Initiativen, cor.test(Tweets_Treffer_Anzahl, Zustimmung))
with(Initiativen, cor.test(Printbeiträge_Anzahl, Zustimmung))
with(Initiativen, cor.test(Parlamentsreden_Kerngeschäft_Anzahl, Zustimmung)) # eher dürftig, aber signifikant
with(Initiativen, cor.test(Parlamentsreden_Anzahl, Zustimmung)) # mässige Korrelation
with(Initiativen, cor.test(Tweets_Treffer_Anzahl_Bundesversammlung, Zustimmung))
with(Initiativen, cor.test(Tweets_Treffer_Anzahl_Bundesversammlung, Printbeiträge_Anzahl)) # hohe Korrelation!!!
with(Initiativen, cor.test(Tweets_Treffer_Anzahl_Bundesversammlung, Printbeiträge_Tonalität))
with(Initiativen, cor.test(Tweets_Treffer_Anzahl_Bundesversammlung, Parlamentsreden_Anzahl))
with(Initiativen, cor.test(Tweets_Treffer_Anzahl_Bundesversammlung, Parlamentsreden_Kerngeschäft_Anzahl))
with(Initiativen, cor.test(Tweets_Treffer_Anzahl_Bundesversammlung, Parlamentsreden_Kerngeschäft_Tonalität))
with(Initiativen, cor.test(Tweets_Treffer_Anzahl_Bundesversammlung, Tweets_Treffer_Anzahl)) # fast perfekte Korrelation!


# Bubble-Plot mittels ggplot erstellen
bubble_plot <- ggplot(subset(Abstimmungsvorlagen, Vorlagentyp == "I"),
                      aes(x = Parlamentsreden_Kerngeschäft_Anzahl,
                          y = Tweets_Treffer_Anzahl))

bubble_plot <- bubble_plot + geom_point(aes(fill = Printbeiträge_Tonalität,
                                            size = Printbeiträge_Anzahl,
                                            text = paste0(Abstimmung,
                                                          "<br>Anzahl Printbeiträge: ",
                                                          Printbeiträge_Anzahl,
                                                          "<br><br>⌀ Tonalität der Printbeiträge: ",
                                                          Printbeiträge_Tonalität)),
                                        pch = 21,
                                        colour = "white",
                                        alpha = 0.7,
                                        show.legend = TRUE)

bubble_plot <- bubble_plot + scale_size_continuous(range = c(1, 30)) +
  scale_x_continuous(breaks = pretty_breaks(n = 10),
                     limits = c(0, 330)) +
  scale_y_continuous(breaks = pretty_breaks(n = 5),
                     limits = c(0, 6000)) +
  scale_fill_gradient2(low = "red",
                       mid = "grey",
                       high = "green")

bubble_plot <- bubble_plot + labs(x = "Anzahl Parlamentsreden",
                                  y = "Anzahl Tweets (von Politikern und Parteiangehörigen)",
                                  size = "Anzahl\nPrintbeiträge",
                                  fill = "⌀ Tonalität der Printbeiträge")

bubble_plot <- bubble_plot + theme(panel.background = element_blank(),
                                   panel.grid.major = element_line(colour = "grey90"),
                                   legend.key = element_blank(),
                                   #legend.position = "bottom",
                                   legend.box = "vertical",
                                   legend.box.just = "left",
                                   legend.margin = unit(1, "char"),
                                   axis.title.x = element_text(margin = margin(t = 15, r = 0, b = 0, l = 0)),
                                   axis.title.y = element_text(margin = margin(t = 0, r = 15, b = 0, l = 0)),
                                   axis.line = element_blank(),
                                   axis.ticks = element_line(colour = "grey90"),
                                   axis.text = element_text(size = 12),
                                   text = element_text(size = 12,
                                                       family = "Liberation Serif"))

bubble_plot <- bubble_plot + guides(size = guide_legend(order = 2,
                                                        override.aes = list(colour = "grey",
                                                                            alpha = 1)),
                                    fill = guide_legend(order = 1,
                                                        override.aes = list(size = 14)))
bubble_plot

# ggplot-Plot umwandeln in Plotly-Plot
bubble_ggplotly <- ggplotly(bubble_plot)
bubble_ggplotly <- layout(p = bubble_ggplotly, hovermode = "closest", margin = list(t = 80, r = 200, b = 80, l = 80))
bubble_ggplotly <- config(p = bubble_ggplotly, scrollZoom = TRUE, displaylogo = FALSE, showLink = FALSE)
bubble_ggplotly # totaler Murks! Farben und Legende fehlen, Hoverinfo nicht anpassbar...

# Funktion definieren, um Hoverinfo von Plotly-Plots zu "bereinigen" (letzte Zeile (size) entfernen)
clean_hoverinfo <- function(plotly_plot) {
  cleaned_plot <- plotly_build(l = plotly_plot)
  hovertext_fixed <- strsplit(cleaned_plot$data[[1]]$text, split = "<br>(\\(|-|Print|Parl)")
  hovertext_fixed <- lapply(hovertext_fixed, function(x) x[1])
  hovertext_fixed <- as.character(hovertext_fixed)
  cleaned_plot$data[[1]]$text <- hovertext_fixed
  return(cleaned_plot)
}

# Bubble-Plot 1 mittels Plotly generieren
bubble_plotly <- plot_ly(data = subset(Abstimmungsvorlagen, Vorlagentyp == "I"),
                         x = Parlamentsreden_Kerngeschäft_Anzahl,
                         y = Tweets_Treffer_Anzahl,
                         color = Printbeiträge_Tonalität,
                         size = Printbeiträge_Anzahl,
                         text = paste0(Abstimmung,
                                       "<br><br>Anzahl Printbeiträge: ", Printbeiträge_Anzahl,
                                       "<br>Anzahl Parlamentsreden (zum Kerngeschäft): ", Parlamentsreden_Kerngeschäft_Anzahl,
                                       "<br>Anzahl Tweets (Kreisgrösse): ", Tweets_Treffer_Anzahl,
                                       "<br>⌀ Tonalität der Printbeiträge: ", ifelse(Printbeiträge_Tonalität > 0, "+", ""), Printbeiträge_Tonalität,
                                       "<br>⌀ Tonalität der Parlamentsreden (Kreisfarbe): ", ifelse(Parlamentsreden_Kerngeschäft_Tonalität > 0, "+", ""), round(Parlamentsreden_Kerngeschäft_Tonalität)),
                         type = "scatter",
                         mode = "markers",
                         opacity = 0.8,
                         marker = list(colorbar = list(title = "⌀ Tonalität<br>Parlamentsreden",
                                                       titlefont = list(size = 16))),
                         hoverinfo = "text")

bubble_plotly <- layout(p = bubble_plotly,
                        xaxis = list(title = "Anzahl Printbeiträge",
                                     tickfont  = list(size = 13)),
                        yaxis = list(title = "Anzahl Parlamentsreden",
                                     tickfont  = list(size = 13)),
                        font = list(family = "Liberation Serif",
                                    size = 14))

bubble_plotly <- config(p = bubble_plotly,
                        displaylogo = FALSE,
                        showLink = FALSE,
                        displayModeBar = FALSE)

bubble_plotly <- clean_hoverinfo(bubble_plotly)
bubble_plotly

#plotly_POST(bubble_plotly, filename = "DDJ-Blogeintrag 2/Bubble Plot 2D", fileopt = "overwrite", sharing = "public")


# Lineare Regression für (Tweets_Treffer_Anzahl ~ Printbeiträge_Anzahl) rechnen
model_Tw_PB <- lm(formula = Tweets_Treffer_Anzahl ~ Printbeiträge_Anzahl,
                  data = Abstimmungsvorlagen,
                  subset = Vorlagentyp == "I")

predictions_Tw_PB <- predict(model_Tw_PB, interval = "confidence")

data_Tw_PB <- data.frame(x = Abstimmungsvorlagen$Printbeiträge_Anzahl[Abstimmungsvorlagen$Vorlagentyp == "I"],
                         fit = predictions_Tw_PB[, "fit"],
                         upr = predictions_Tw_PB[, "upr"],
                         lwr = predictions_Tw_PB[, "lwr"])

data_Tw_PB <- arrange(data_Tw_PB, x)

# Bubble-Plot 2 mittels Plotly generieren
bubble_plotly_2 <- plot_ly(data = subset(Abstimmungsvorlagen, Vorlagentyp == "I"),
                           x = Printbeiträge_Anzahl,
                           y = Tweets_Treffer_Anzahl,
                           color = Zustimmung,
                           size = Parlamentsreden_Anzahl,
                           text = paste0(Abstimmung,
                                         "<br><br>Anzahl Printbeiträge: ", Printbeiträge_Anzahl,
                                         "<br>Anzahl Tweets: ", Tweets_Treffer_Anzahl,
                                         "<br>Anzahl Parlamentsreden (Kreisgrösse): ", Parlamentsreden_Kerngeschäft_Anzahl,
                                         "<br>Ja-Stimmenanteil (Kreisfarbe): ", format(Zustimmung, nsmall = 1), " %"),
                           type = "scatter",
                           mode = "markers",
                           opacity = 0.8,
                           marker = list(colorbar = list(title = "Ja-Stimmenanteil<br>in %",
                                                         titlefont = list(size = 16),
                                                         outlinecolor = "#999999",
                                                         len = 0.7),
                                         sizeref = 3),
                           hoverinfo = "text",
                           showlegend = FALSE)

bubble_plotly_2 <- add_trace(p = bubble_plotly_2,
                             x = data_Tw_PB$x,
                             y = data_Tw_PB$fit,
                             line = list(shape = "linear",
                                         color = "rgba(34, 147, 139, 0.3)"),
                             name = "Regressionsgerade<br>inkl. 95%-Konfidenz-<br>intervall",
                             legendgroup = "fitted",
                             mode = "lines",
                             hoverinfo = "none",
                             visible = TRUE)

bubble_plotly_2 <- add_trace(p = bubble_plotly_2,
                             x = data_Tw_PB$x,
                             y = data_Tw_PB$upr,
                             line = list(shape = "spline",
                                         width = 0),
                             fill = "tonexty",
                             fillcolor = "rgba(163, 173, 204, 0.2)",
                             legendgroup = "fitted",
                             showlegend = FALSE,
                             hoverinfo = "none",
                             visible = TRUE)

bubble_plotly_2 <- add_trace(p = bubble_plotly_2,
                             x = data_Tw_PB$x,
                             y = data_Tw_PB$lwr,
                             line = list(shape = "spline",
                                         width = 0),
                             fill = "tonexty",
                             fillcolor = "rgba(163, 173, 204, 0.4)",
                             legendgroup = "fitted",
                             showlegend = FALSE,
                             hoverinfo = "none",
                             visible = TRUE)

bubble_plotly_2 <- layout(p = bubble_plotly_2,
                          title = "Zusammenhang zwischen der Anzahl Printbeiträge und Tweets 「Volksinitiativen」",
                          xaxis = list(title = "Anzahl Printbeiträge",
                                       tickfont  = list(size = 13)),
                          yaxis = list(title = "Anzahl Tweets",
                                       tickfont  = list(size = 13)),
                          font = list(family = "Liberation Serif",
                                      size = 14),
                          margin = list(l = 65, r = 0, t = 40, b = 50, pad = 0, autoexpand = TRUE))

bubble_plotly_2 <- config(p = bubble_plotly_2,
                          displaylogo = FALSE,
                          showLink = FALSE,
                          displayModeBar = FALSE)

bubble_plotly_2 <- clean_hoverinfo(bubble_plotly_2)
bubble_plotly_2

plotly_POST(bubble_plotly_2, filename = "DDJ-Blogeintrag 2/Bubble Plot 2D 2", fileopt = "overwrite", sharing = "public")


# Lineare Regression für (Parlamentsreden_Kerngeschäft_Tonalität ~ Printbeiträge_Tonalität) rechnen
model_PRT_PBT <- lm(formula = Parlamentsreden_Kerngeschäft_Tonalität ~ Printbeiträge_Tonalität,
                    data = Abstimmungsvorlagen,
                    subset = Vorlagentyp == "I")

predictions_PRT_PBT <- predict(model_PRT_PBT, interval = "confidence")

data_PRT_PBT <- data.frame(x = Abstimmungsvorlagen$Printbeiträge_Tonalität[Abstimmungsvorlagen$Vorlagentyp == "I"],
                           fit = predictions_PRT_PBT[, "fit"],
                           upr = predictions_PRT_PBT[, "upr"],
                           lwr = predictions_PRT_PBT[, "lwr"])

data_PRT_PBT <- arrange(data_PRT_PBT, x)

# Bubble-Plot 3 mittels Plotly generieren
bubble_plotly_3 <- plot_ly(data = subset(Abstimmungsvorlagen, Vorlagentyp == "I"),
                           x = Printbeiträge_Tonalität,
                           y = Parlamentsreden_Kerngeschäft_Tonalität,
                           color = Zustimmung,
                           size = Parlamentsreden_Kerngeschäft_Anzahl,
                           text = paste0(Abstimmung,
                                         "<br><br>⌀ Tonalität der Printbeiträge: ", ifelse(Printbeiträge_Tonalität > 0, "+", ""), Printbeiträge_Tonalität,
                                         "<br>⌀ Tonalität der Parlamentsreden: ", ifelse(Parlamentsreden_Kerngeschäft_Tonalität > 0, "+", ""), round(Parlamentsreden_Kerngeschäft_Tonalität),
                                         "<br>Anzahl Parlamentsreden (zum Kerngeschäft; Kreisgrösse): ", Parlamentsreden_Kerngeschäft_Anzahl,
                                         "<br>Ja-Stimmenanteil (Kreisfarbe): ", format(Zustimmung, nsmall = 1), " %"),
                           type = "scatter",
                           mode = "markers",
                           opacity = 0.8,
                           marker = list(colorbar = list(title = "Ja-Stimmenanteil<br>in %",
                                                         titlefont = list(size = 16),
                                                         outlinecolor = "#999999",
                                                         len = 0.7),
                                         sizeref = 3),
                           hoverinfo = "text",
                           showlegend = FALSE)

bubble_plotly_3 <- add_trace(p = bubble_plotly_3,
                             x = data_PRT_PBT$x,
                             y = data_PRT_PBT$fit,
                             line = list(shape = "linear",
                                         color = "rgba(34, 147, 139, 0.3)"),
                             name = "Regressionsgerade<br>inkl. 95%-Konfidenz-<br>intervall",
                             legendgroup = "fitted",
                             mode = "lines",
                             hoverinfo = "none",
                             visible = TRUE)

bubble_plotly_3 <- add_trace(p = bubble_plotly_3,
                             x = data_PRT_PBT$x,
                             y = data_PRT_PBT$upr,
                             line = list(shape = "spline",
                                         width = 0),
                             fill = "tonexty",
                             fillcolor = "rgba(163, 173, 204, 0.2)",
                             legendgroup = "fitted",
                             showlegend = FALSE,
                             hoverinfo = "none",
                             visible = TRUE)

bubble_plotly_3 <- add_trace(p = bubble_plotly_3,
                             x = data_PRT_PBT$x,
                             y = data_PRT_PBT$lwr,
                             line = list(shape = "spline",
                                         width = 0),
                             fill = "tonexty",
                             fillcolor = "rgba(163, 173, 204, 0.4)",
                             legendgroup = "fitted",
                             showlegend = FALSE,
                             hoverinfo = "none",
                             visible = TRUE)

bubble_plotly_3 <- layout(p = bubble_plotly_3,
                          title = "Zusammenhang zwischen Tonalität von Printjournalismus und Parlamentsreden<br>「Volksinitiativen」",
                          xaxis = list(title = "⌀ Tonalität der Printbeiträge",
                                       tickfont  = list(size = 13)),
                          yaxis = list(title = "⌀ Tonalität der Parlamentsreden",
                                       tickfont  = list(size = 13)),
                          font = list(family = "Liberation Serif",
                                      size = 14),
                          margin = list(l = 65, r = 0, t = 80, b = 50, pad = 0, autoexpand = TRUE))

bubble_plotly_3 <- config(p = bubble_plotly_3,
                          displaylogo = FALSE,
                          showLink = FALSE,
                          displayModeBar = FALSE)

bubble_plotly_3 <- clean_hoverinfo(bubble_plotly_3)
bubble_plotly_3

plotly_POST(bubble_plotly_3, filename = "DDJ-Blogeintrag 2/Bubble Plot 2D 3", fileopt = "overwrite", sharing = "public")


# Lineare Regression für (Parlamentsreden_Kerngeschäft_Tonalität ~ Printbeiträge_Tonalität) rechnen (alle Abstimmungen)
model_PRT_PBT_all <- lm(formula = Parlamentsreden_Kerngeschäft_Tonalität ~ Printbeiträge_Tonalität,
                        data = Abstimmungsvorlagen)

predictions_PRT_PBT_all <- predict(model_PRT_PBT_all, interval = "confidence")

data_PRT_PBT_all <- data.frame(x = Abstimmungsvorlagen$Printbeiträge_Tonalität,
                               fit = predictions_PRT_PBT_all[, "fit"],
                               upr = predictions_PRT_PBT_all[, "upr"],
                               lwr = predictions_PRT_PBT_all[, "lwr"])

data_PRT_PBT_all <- arrange(data_PRT_PBT_all, x)

# Bubble-Plot 4 mittels Plotly generieren
bubble_plotly_4 <- plot_ly(data = Abstimmungsvorlagen,
                           x = Printbeiträge_Tonalität,
                           y = Parlamentsreden_Kerngeschäft_Tonalität,
                           color = Zustimmung,
                           size = Parlamentsreden_Kerngeschäft_Anzahl,
                           text = paste0(Abstimmung, " 「", Vorlagentyp, "」",
                                         "<br><br>⌀ Tonalität der Printbeiträge: ", ifelse(Printbeiträge_Tonalität > 0, "+", ""), Printbeiträge_Tonalität,
                                         "<br>⌀ Tonalität der Parlamentsreden: ", ifelse(Parlamentsreden_Kerngeschäft_Tonalität > 0, "+", ""), round(Parlamentsreden_Kerngeschäft_Tonalität),
                                         "<br>Anzahl Parlamentsreden (zum Kerngeschäft; Kreisgrösse): ", Parlamentsreden_Kerngeschäft_Anzahl,
                                         "<br>Ja-Stimmenanteil (Kreisfarbe): ", format(Zustimmung, nsmall = 1), " %"),
                           type = "scatter",
                           mode = "markers",
                           opacity = 0.8,
                           marker = list(colorbar = list(title = "Ja-Stimmenanteil<br>in %",
                                                         titlefont = list(size = 16),
                                                         outlinecolor = "#999999",
                                                         len = 0.7),
                                         sizeref = 3),
                           hoverinfo = "text",
                           showlegend = FALSE)

bubble_plotly_4 <- add_trace(p = bubble_plotly_4,
                             x = data_PRT_PBT_all$x,
                             y = data_PRT_PBT_all$fit,
                             line = list(shape = "linear",
                                         color = "rgba(34, 147, 139, 0.3)"),
                             name = "Regressionsgerade<br>inkl. 95%-Konfidenz-<br>intervall",
                             legendgroup = "fitted",
                             mode = "lines",
                             hoverinfo = "none",
                             visible = TRUE)

bubble_plotly_4 <- add_trace(p = bubble_plotly_4,
                             x = data_PRT_PBT_all$x,
                             y = data_PRT_PBT_all$upr,
                             line = list(shape = "spline",
                                         width = 0),
                             fill = "tonexty",
                             fillcolor = "rgba(163, 173, 204, 0.2)",
                             legendgroup = "fitted",
                             showlegend = FALSE,
                             hoverinfo = "none",
                             visible = TRUE)

bubble_plotly_4 <- add_trace(p = bubble_plotly_4,
                             x = data_PRT_PBT_all$x,
                             y = data_PRT_PBT_all$lwr,
                             line = list(shape = "spline",
                                         width = 0),
                             fill = "tonexty",
                             fillcolor = "rgba(163, 173, 204, 0.4)",
                             legendgroup = "fitted",
                             showlegend = FALSE,
                             hoverinfo = "none",
                             visible = TRUE)

bubble_plotly_4 <- layout(p = bubble_plotly_4,
                          title = "Zusammenhang zwischen Tonalität von Printjournalismus und Parlamentsreden<br>「alle Vorlagen」",
                          xaxis = list(title = "⌀ Tonalität der Printbeiträge",
                                       tickfont  = list(size = 13)),
                          yaxis = list(title = "⌀ Tonalität der Parlamentsreden",
                                       tickfont  = list(size = 13)),
                          font = list(family = "Liberation Serif",
                                      size = 14),
                          margin = list(l = 65, r = 0, t = 80, b = 50, pad = 0, autoexpand = TRUE))

bubble_plotly_4 <- config(p = bubble_plotly_4,
                          displaylogo = FALSE,
                          showLink = FALSE,
                          displayModeBar = FALSE)

bubble_plotly_4 <- clean_hoverinfo(bubble_plotly_4)
bubble_plotly_4

plotly_POST(bubble_plotly_4, filename = "DDJ-Blogeintrag 2/Bubble Plot 2D 4", fileopt = "overwrite", sharing = "public")


# 3D-Bubble-Plot mittels Plotly generieren
bubble_plotly_3D <- plot_ly(data = subset(Abstimmungsvorlagen, Vorlagentyp == "I"),
                            x = Printbeiträge_Anzahl,
                            y = Parlamentsreden_Kerngeschäft_Anzahl,
                            z = Tweets_Treffer_Anzahl,
                            type = "scatter3d",
                            mode = "markers",
                            sizemode = "diameter",
                            size = Printbeiträge_Tonalität - (min(Printbeiträge_Tonalität) + 1),
                            color = Parlamentsreden_Kerngeschäft_Tonalität,
                            marker = list(colorbar = list(title = "⌀ Tonalität<br>Parlamentsreden",
                                                          titlefont = list(size = 16),
                                                          outlinecolor = "#999999",
                                                          len = 0.8),
                                          sizeref = 3),
                            text = paste0(Abstimmung,
                                          "<br><br>Anzahl Printbeiträge: ", Printbeiträge_Anzahl,
                                          "<br>Anzahl Parlamentsreden: ", Parlamentsreden_Kerngeschäft_Anzahl,
                                          "<br>Anzahl Tweets: ", Tweets_Treffer_Anzahl,
                                          "<br>⌀ Tonalität der Printbeiträge (Kreisgrösse): ", ifelse(Printbeiträge_Tonalität > 0, "+", ""), Printbeiträge_Tonalität,
                                          "<br>⌀ Tonalität der Parlamentsreden (Kreisfarbe): ", ifelse(Parlamentsreden_Kerngeschäft_Tonalität > 0, "+", ""), round(Parlamentsreden_Kerngeschäft_Tonalität),
                                          "<br>Ja-Stimmenanteil: ", format(Zustimmung, nsmall = 1), " %"),
                            hoverinfo = "text")

bubble_plotly_3D <- layout(p = bubble_plotly_3D,
                           title = "Gesamtübersicht (3D) 「Volksinitiativen」",
                           scene = list(xaxis = list(title = "Anzahl Printbeiträge",
                                                     tickfont  = list(size = 13)),
                                        yaxis = list(title = "Anzahl Parlamentsreden",
                                                     tickfont  = list(size = 13)),
                                        zaxis = list(title = "Anzahl Tweets",
                                                     tickfont  = list(size = 13))),
                           font = list(family = "Liberation Serif",
                                       size = 14),
                           margin = list(l = 0, r = 0, t = 40, b = 0, pad = 0, autoexpand = TRUE))

bubble_plotly_3D <- config(p = bubble_plotly_3D,
                           displaylogo = FALSE,
                           showLink = FALSE,
                           displayModeBar = FALSE)

bubble_plotly_3D <- clean_hoverinfo(bubble_plotly_3D)
bubble_plotly_3D

plotly_POST(bubble_plotly_3D, filename = "DDJ-Blogeintrag 2/Bubble 3D", fileopt = "overwrite", sharing = "public")


# 2. 3D-Bubble-Plot mittels Plotly generieren
bubble_plotly_3D_2 <- plot_ly(data = subset(Abstimmungsvorlagen, Vorlagentyp == "I"),
                              x = Printbeiträge_Tonalität,
                              y = Parlamentsreden_Kerngeschäft_Tonalität,
                              z = Zustimmung,
                              type = "scatter3d",
                              mode = "markers",
                              sizemode = "diameter",
                              size = Printbeiträge_Anzahl,
                              color = Parlamentsreden_Kerngeschäft_Anzahl,
                              marker = list(colorbar = list(title = "Anzahl Parlamentsreden",
                                                            titlefont = list(size = 16),
                                                            outlinecolor = "#999999",
                                                            len = 0.8)),
                              #color = colorRampPalette(rev(brewer.pal(11, "RdYlGn")))(21)),
                              text = paste0(Abstimmung,
                                            "<br><br>⌀ Tonalität der Printbeiträge: ", ifelse(Printbeiträge_Tonalität > 0, "+", ""), Printbeiträge_Tonalität,
                                            "<br>⌀ Tonalität der Parlamentsreden: ", ifelse(Parlamentsreden_Kerngeschäft_Tonalität > 0, "+", ""), round(Parlamentsreden_Kerngeschäft_Tonalität),
                                            "<br>Ja-Stimmen-Anteil ", format(Zustimmung, nsmall = 1), " %",
                                            "<br>Anzahl Printbeiträge (Kreisgrösse): ", Printbeiträge_Anzahl,
                                            "<br>Anzahl Parlamentsreden (Kreisfarbe): ", Parlamentsreden_Kerngeschäft_Anzahl),
                              hoverinfo = "text")

bubble_plotly_3D_2 <- layout(p = bubble_plotly_3D_2,
                             scene = list(xaxis = list(title = "⌀ Tonalität der Printbeiträge",
                                                       tickfont  = list(size = 13)),
                                          yaxis = list(title = "⌀ Tonalität der Parlamentsreden",
                                                       tickfont  = list(size = 13),
                                                       autorange = "reversed"),
                                          zaxis = list(title = "Ja-Stimmenanteil",
                                                       tickfont  = list(size = 13))),
                             font = list(family = "Liberation Serif",
                                         size = 14),
                             margin = list(l = 0, r = 0, t = 0, b = 0, pad = 0, autoexpand = TRUE))

bubble_plotly_3D_2 <- config(p = bubble_plotly_3D_2,
                             displaylogo = FALSE,
                             showLink = FALSE,
                             displayModeBar = FALSE)

bubble_plotly_3D_2 <- clean_hoverinfo(bubble_plotly_3D_2)
bubble_plotly_3D_2

plotly_POST(bubble_plotly_3D_2, filename = "DDJ-Blogeintrag 2/Bubble 3D 2", fileopt = "overwrite", sharing = "public")


# Line Chart Plot mittels Plotly generieren
line_plotly <- plot_ly(data = subset(Abstimmungsvorlagen, Vorlagentyp == "I"),
                       x = Abstimmung,
                       y = Printbeiträge_Anzahl,
                       name = "Printbeiträge",
                       opacity = 0.7,
                       marker = list(color = "#f2521c",
                                     symbol = "circle-dot",
                                     size = 10),
                       line = list(color = "#f2521c",
                                   width = 4),
                       type = "scatter",
                       mode = "markers+lines",
                       showlegend = TRUE)

line_plotly <- add_trace(p = line_plotly,
                         x = Abstimmung,
                         y = Tweets_Treffer_Anzahl,
                         name = "Tweets",
                         opacity = 0.7,
                         marker = list(color = "#1da1f2",
                                       symbol = "circle-dot",
                                       size = 10),
                         line = list(color = "#1da1f2",
                                     width = 4),
                         type = "scatter",
                         mode = "markers+lines",
                         showlegend = TRUE,
                         visible = TRUE)

line_plotly <- add_trace(p = line_plotly,
                         x = Abstimmung,
                         y = Parlamentsreden_Anzahl,
                         name = "Parlamentsreden",
                         opacity = 0.7,
                         marker = list(color = "#52f21c",
                                       symbol = "circle-dot",
                                       size = 10),
                         line = list(color = "#52f21c",
                                     width = 4),
                         type = "scatter",
                         mode = "markers+lines",
                         showlegend = TRUE,
                         visible = TRUE)

line_plotly <- layout(p = line_plotly,
                      title = "Anzahl Printbeiträge, Tweets und Parlamentsreden 「Volksinitiativen」",
                      xaxis = list(title = ""),
                      yaxis = list(title = ""),
                      font = list(family = "Liberation Serif",
                                  size = 14),
                      margin = list(l = 30, r = 0, t = 40, b = 272, pad = 0, autoexpand = TRUE),
                      height = 800)

line_plotly <- config(p = line_plotly,
                      displaylogo = FALSE,
                      showLink = FALSE,
                      displayModeBar = FALSE)

line_plotly

plotly_POST(line_plotly, filename = "DDJ-Blogeintrag 2/Line Plot", fileopt = "overwrite", sharing = "public")


### Diverse Hilfscodeschnipsel
# Abstimmungs-Treffer-Tweets anzeigen
Abstimmung <- "Steuerfreie Kinder- und Ausbildungszulagen"

hits <- as.numeric(unlist(strsplit(Abstimmungsvorlagen$Tweets_Treffer_Indizes[Abstimmungsvorlagen$Abstimmung==Abstimmung], split = ",")))
for ( hit in hits ) print(tweets$text[hit])

# Tweets nach Abstimmungs-Treffern durchsuchen und anzeigen
grep(pattern = regexs[1],
     x = tweets$text[index_observation_period_start:index_observation_period_end],
     ignore.case = FALSE,
     perl = TRUE,
     value = TRUE)

# grep-Test
grep(pattern = "(?i:(?=.*geistig))(?=.*Hektik)", x = c("Operative Hektik ersetzt geistige Windstille.", "GEISTIG ohne Hektik?", "hektik geistig"), ignore.case = F, perl = T)

bla <- parliament_speeches_bruno[grep(pattern = "(?i)", x = parliament_speeches_bruno$titel, perl = TRUE), ]
View(bla)
table(bla$curiaVista)
unique(parliament_speeches_bruno$titel[parliament_speeches_bruno$curiaVista=="13.3374"])
View(subset(bla, curiaVista=="12.4230"))

# Alle Vorlagen ermitteln mit mehreren Schlussabstimmungen
for ( Abstimmung in Abstimmungsvorlagen$Abstimmung ) {
  curia_vista_IDs <- unlist(strsplit(Abstimmungsvorlagen$Curia_Vista_IDs[Abstimmungsvorlagen$Abstimmung==Abstimmung], split = ","))
  affair_ID <- unlist(strsplit(curia_vista_IDs[1], split = "\\."))
  affair_ID[1] <- paste0(ifelse(as.numeric(affair_ID[1]) < 80, "20", "19"), affair_ID[1])
  affair_ID[2] <- paste0(ifelse(nchar(affair_ID[2]) < 4, "0", ""), affair_ID[2])
  affair_ID <- paste0(affair_ID, collapse = "")
  votes_overview <- get_votes(url = paste0("http://ws-old.parlament.ch/votes/affairs/", affair_ID))
  final_votes_count <- length(grep(pattern = "(?i)(schlussabstimmung|vote final)", x = votes_overview$divisionText))
  if ( final_votes_count > 1 ) print(Abstimmung)
}

# Alle Parlamentarier-IDs einer Vorlage ausgeben
ps <- unlist(strsplit(Abstimmungsvorlagen$Parlamentarier_IDs_Kerngeschäft[Abstimmungsvorlagen$Abstimmung=="Verbot Nahrungsmittelspekulation"], split = ","))
for (i in ps) {
  print(paste0(parliamentarians$number[parliamentarians$id==i], " : ", parliamentarians$firstName[parliamentarians$id==i], " ", parliamentarians$lastName[parliamentarians$id==i]))
}

# Grobe Häufigkeit eindeutiger Stimmabgaben bei den Schlussabstimmungen berechnen
( length(unlist(strsplit(Abstimmungsvorlagen$Parlamentarier_IDs_Kerngeschäft_Pro, split = ","))) +
  length(unlist(strsplit(Abstimmungsvorlagen$Parlamentarier_IDs_Kerngeschäft_Contra, split = ","))) ) /
  length(unlist(strsplit(Abstimmungsvorlagen$Parlamentarier_IDs_Kerngeschäft, split = ",")))