This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
elections = tribble( | |
~year,~state_po,~party_Detailed,~candidatevotes,~totalvotes, | |
1976,"AL","DEMOCRAT",500,1000, | |
1976,"AL","REPUBLICAN",450,1000, | |
1976,"AL","x",30,1000, | |
1976,"AL","x",10,1000, | |
1976,"AL","x",7,1000, | |
1976,"AL","x",2,1000, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
install.packages("udpipe") | |
install.packages("rsyntax") | |
library(udpipe) | |
tokens = udpipe('John Doe, who is a great guy, said yesterday that all was well', 'english') |> | |
as_tokenindex() | |
plot_tree(tokens, token, lemma, upos) | |
verbs = c("tell", "show", "acknowledge", "admit", "affirm", "allege", | |
"announce", "assert", "attest", "avow", "call", "claim", "comment", |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Plot histogram and normal curve for (simulated) data | |
library(tidyverse) | |
library(moments) | |
library(glue) | |
plot_distribution = function (x) { | |
m = mean(x) | |
sd = sd(x) | |
skewness=skewness(x) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#' Run an SVD for collaborative filtering and process the results to be more tidyverse-friendly | |
#' @param ratingsmatrix A item-user review matrix | |
#' @param ndimensions the number of dimensions to use, defaults to 10 | |
#' @return a list with the original u, d, and v matrices from the svd function and | |
#' item_values - a long-format tibble with the values per item per dimension | |
#' user_values - a long-format tibble with the values per user per dimension | |
#' predictions - a long-format tibble with the predictions per user per item | |
#' @note (c) 2022 Wouter van Atteveldt, license: CC-0 | |
run_svd = function(ratingsmatrix, ndimensions=10) { |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
cat("Arrr") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# demo: scraping | |
library(httr) | |
r = GET('https://opendata.cbs.nl/ODataApi/odata/85275NED/UntypedDataSet') | |
r$status_code | |
content(r, as="text") |> str_sub(end=500) |> cat() | |
d = content(r, as="parsed") |> as_tibble() | |
# demo: cbs data |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(readtext) | |
library(quanteda) | |
library(topicmodels) | |
d = readtext("/home/wva/ccsbook/chapter*/*.tex") | |
dfm = corpus(d) |> corpus_reshape(to="paragraphs") |> tokens(split_tags=F, remove_symbols = T, remove_punct = T) |> tokens_remove("\\W", valuetype="regex") |> dfm() | |
set.seed(123) | |
m = dfm |> dfm_trim(min_termfreq = 0.8, termfreq_type = "quantile", | |
max_docfreq = 0.1, docfreq_type = "prop") |> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
elections <- dataverse::get_dataframe_by_name( | |
filename = "1976-2020-president.tab", | |
dataset = "doi:10.7910/DVN/42MVDX", | |
server = "dataverse.harvard.edu") | |
totals = elections |> filter(year == 2020) |> select(state_po, totalvotes) |> unique() | |
d = elections |> | |
filter(year %in% c(2016,2020), party_simplified == "DEMOCRAT", candidatevotes > 1000) |> | |
mutate(percentage =candidatevotes / totalvotes * 100) |> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Demo 1: Rtweet en word clouds | |
# install.pacakges("rtweet") | |
library(tidyverse) | |
library(rtweet) | |
library(quanteda) | |
library(quanteda.textplots) | |
library(RColorBrewer) | |
auth_setup_default() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#################################################### | |
# # | |
# Bonjour a tous! # | |
# # | |
# Ca va‽️ # | |
# # | |
#################################################### | |
# Embedding-based tools for (semi-)automatic dictionary |