Skip to content

Instantly share code, notes, and snippets.

@ablaette
ablaette / gist:bde61153ff3844677e61601787bb8676
Last active November 20, 2024 15:04
subset of speeches as dfm
library(dplyr)
library(polmineR)
library(data.table)
# library(quanteda)
dict <- c("Migration", "Flucht", "Asyl.*")
speeches <- corpus("GERMAPARL2") %>%
subset(protocol_year %in% 2013:2021) %>%
as.speeches(s_attribute_date = "protocol_date", s_attribute_name = "speaker_who")
@ablaette
ablaette / pdf2count.R
Created September 3, 2024 19:44
Extract text from pdf and count tokens
library(pdftools)
library(dplyr)
library(tibble)
fname <- "https://www.gruene-bw.de/wp-content/uploads/2021/01/GrueneBW-Landtagswahlprogramm-2021-Wachsen-wir-ueber-uns-hinaus.pdf"
tmp <- tempfile(fileext = ".pdf")
download.file(url = fname, destfile = tmp)
doc <- pdftools::pdf_text(tmp) %>%
gsub("-\\n\\s*", "", .) %>%
@ablaette
ablaette / timeseries_of_shares.R
Last active July 8, 2024 13:35
Time series of shares
# Plot time series for multiple queries
library(polmineR)
library(dplyr)
library(tidyr)
library(xts)
library(lubridate) # we need lubridate::floor_date()
corpus_id <- "GERMAPARL2"
look_up <- "'[Ii]ndustriepolit.*'"
@ablaette
ablaette / illegal_migration.R
Created November 27, 2023 21:42
illegal migration in Bundestag debates
# snippet for 'Methodenanwendung in Praxisfeldern':
# Framing immigration as "illegal"
library(polmineR)
library(dplyr)
library(xts)
library(lubridate) # we need lubridate::floor_date()
look_up <- '"([iI]llegal.*|[fF]alsch.*|[uU]ndoku.*)" "(Migr.*|Flüchtl.*|Asyl.*)"'
@ablaette
ablaette / gist:0ceec5cdbc7090fe850cce6da48ee22a
Created November 27, 2023 15:26
Install cwbtools and GermaParl2
install.packages("remotes")
remotes::install_github("PolMine/cwbtools", ref = "dev")
library(cwbtools)
corpus_install(doi = "10.5281/zenodo.7949074")
@ablaette
ablaette / timeseries_by_party.R
Last active June 29, 2023 08:51
time series analysis
# last edit: 2023-06-23
library(polmineR)
library(data.table)
library(xts)
afd <- corpus("GERMAPARL2") %>%
subset(speaker_party == "AfD") %>%
subset(protocol_lp == "19") %>%
subset(p_type == "speech")
library(polmineR)
afd <- corpus("GERMAPARL2") %>%
subset(speaker_party == "AfD") %>%
subset(protocol_lp == "19") %>%
subset(p_type == "speech")
afd_count <- afd %>%
count(p_attribute = "word")
@ablaette
ablaette / ts_multiple_queries.R
Created May 8, 2023 12:44
Plot time series for multiple queries
# Plot time series for multiple queries
library(polmineR)
library(dplyr)
library(tidyr)
library(xts)
library(lubridate) # we need lubridate::floor_date()
corpus_id <- "NADIRATAZ"
look_up <- c("'Einwanderung.*'", "'Zuwanderung.*'")
@ablaette
ablaette / topic_distribution.R
Created January 9, 2023 20:23
Topic distribution over time
library(topicmodels)
library(dplyr)
library(magrittr)
library(lubridate)
library(data.table)
library(xts)
lda <- readRDS("~/Downloads/germaparl_lda_speeches_250.rds")
topic_to_get <- 133 # Staatsangehörigkeit / Integration
@ablaette
ablaette / ts.R
Last active December 12, 2022 20:27
# Plot time series for query matches using polmineR/dplyr/xts
library(polmineR)
library(dplyr)
library(xts)
library(lubridate) # we need lubridate::floor_date()
look_up <- '"Arbeitslosigkeit"'
corpus("GERMAPARL") %>%