Wouter van Atteveldt vanatteveldt

## question7.R
library(tidyverse)

elections = tribble(
  ~year,~state_po,~party_Detailed,~candidatevotes,~totalvotes,
  1976,"AL","DEMOCRAT",500,1000,
  1976,"AL","REPUBLICAN",450,1000,
  1976,"AL","x",30,1000,
  1976,"AL","x",10,1000,
  1976,"AL","x",7,1000,
  1976,"AL","x",2,1000,

## rsyntax_example.R
install.packages("udpipe")
install.packages("rsyntax")
library(udpipe)
tokens = udpipe('John Doe, who is a great guy, said yesterday that all was well', 'english') |>
  as_tokenindex()

plot_tree(tokens, token, lemma, upos)

verbs = c("tell", "show", "acknowledge", "admit", "affirm", "allege",
          "announce", "assert", "attest", "avow", "call", "claim", "comment",

## sweness_kurtosis_plots.r
# Plot histogram and normal curve for (simulated) data

library(tidyverse)
library(moments)
library(glue)

plot_distribution = function (x) {
  m = mean(x)
  sd = sd(x)
  skewness=skewness(x)

## run_svd.R
#' Run an SVD for collaborative filtering and process the results to be more tidyverse-friendly
#' @param ratingsmatrix A item-user review matrix
#' @param ndimensions the number of dimensions to use, defaults to 10
#' @return a list with the original u, d, and v matrices from the svd function and
#'         item_values - a long-format tibble with the values per item per dimension
#'         user_values - a long-format tibble with the values per user per dimension
#'         predictions - a long-format tibble with the predictions per user per item
#' @note (c) 2022 Wouter van Atteveldt, license: CC-0
run_svd = function(ratingsmatrix, ndimensions=10) {

## test.r
cat("Arrr")

## hc2_demo.R
# demo: scraping

library(httr)
r = GET('https://opendata.cbs.nl/ODataApi/odata/85275NED/UntypedDataSet')
r$status_code
content(r, as="text") |> str_sub(end=500) |> cat()
d = content(r, as="parsed") |> as_tibble()

# demo: cbs data

## hoed.R
library(tidyverse)
library(readtext)
library(quanteda)
library(topicmodels)

d = readtext("/home/wva/ccsbook/chapter*/*.tex")
dfm = corpus(d) |> corpus_reshape(to="paragraphs") |> tokens(split_tags=F, remove_symbols = T, remove_punct = T) |> tokens_remove("\\W", valuetype="regex") |> dfm()
set.seed(123)
m = dfm |> dfm_trim(min_termfreq = 0.8, termfreq_type = "quantile",
           max_docfreq = 0.1, docfreq_type = "prop") |>

## auc2_plot.R
library(tidyverse)
elections <- dataverse::get_dataframe_by_name(
  filename = "1976-2020-president.tab",
  dataset = "doi:10.7910/DVN/42MVDX",
  server = "dataverse.harvard.edu")

totals = elections |> filter(year == 2020) |> select(state_po, totalvotes) |> unique()
d = elections |>
  filter(year %in% c(2016,2020), party_simplified == "DEMOCRAT", candidatevotes > 1000) |>
  mutate(percentage =candidatevotes / totalvotes * 100) |>

## rdemo1.R
# Demo 1: Rtweet en word clouds
# install.pacakges("rtweet")

library(tidyverse)
library(rtweet)
library(quanteda)
library(quanteda.textplots)
library(RColorBrewer)

auth_setup_default()

## atteveldt_icademo.r

####################################################
#                                                  #
#               Bonjour a tous!                    #
#                                                  #
#                     Ca va‽️                       #
#                                                  #
####################################################

# Embedding-based tools for (semi-)automatic dictionary
	library(tidyverse)

	elections = tribble(
	~year,~state_po,~party_Detailed,~candidatevotes,~totalvotes,
	1976,"AL","DEMOCRAT",500,1000,
	1976,"AL","REPUBLICAN",450,1000,
	1976,"AL","x",30,1000,
	1976,"AL","x",10,1000,
	1976,"AL","x",7,1000,
	1976,"AL","x",2,1000,
	install.packages("udpipe")
	install.packages("rsyntax")
	library(udpipe)
	tokens = udpipe('John Doe, who is a great guy, said yesterday that all was well', 'english') \|>
	as_tokenindex()

	plot_tree(tokens, token, lemma, upos)

	verbs = c("tell", "show", "acknowledge", "admit", "affirm", "allege",
	"announce", "assert", "attest", "avow", "call", "claim", "comment",
	# Plot histogram and normal curve for (simulated) data

	library(tidyverse)
	library(moments)
	library(glue)

	plot_distribution = function (x) {
	m = mean(x)
	sd = sd(x)
	skewness=skewness(x)
	#' Run an SVD for collaborative filtering and process the results to be more tidyverse-friendly
	#' @param ratingsmatrix A item-user review matrix
	#' @param ndimensions the number of dimensions to use, defaults to 10
	#' @return a list with the original u, d, and v matrices from the svd function and
	#' item_values - a long-format tibble with the values per item per dimension
	#' user_values - a long-format tibble with the values per user per dimension
	#' predictions - a long-format tibble with the predictions per user per item
	#' @note (c) 2022 Wouter van Atteveldt, license: CC-0
	run_svd = function(ratingsmatrix, ndimensions=10) {
	# demo: scraping

	library(httr)
	r = GET('https://opendata.cbs.nl/ODataApi/odata/85275NED/UntypedDataSet')
	r$status_code
	content(r, as="text") \|> str_sub(end=500) \|> cat()
	d = content(r, as="parsed") \|> as_tibble()

	# demo: cbs data
	library(tidyverse)
	library(readtext)
	library(quanteda)
	library(topicmodels)

	d = readtext("/home/wva/ccsbook/chapter/.tex")
	dfm = corpus(d) \|> corpus_reshape(to="paragraphs") \|> tokens(split_tags=F, remove_symbols = T, remove_punct = T) \|> tokens_remove("\\W", valuetype="regex") \|> dfm()
	set.seed(123)
	m = dfm \|> dfm_trim(min_termfreq = 0.8, termfreq_type = "quantile",
	max_docfreq = 0.1, docfreq_type = "prop") \|>
	library(tidyverse)
	elections <- dataverse::get_dataframe_by_name(
	filename = "1976-2020-president.tab",
	dataset = "doi:10.7910/DVN/42MVDX",
	server = "dataverse.harvard.edu")

	totals = elections \|> filter(year == 2020) \|> select(state_po, totalvotes) \|> unique()
	d = elections \|>
	filter(year %in% c(2016,2020), party_simplified == "DEMOCRAT", candidatevotes > 1000) \|>
	mutate(percentage =candidatevotes / totalvotes * 100) \|>
	# Demo 1: Rtweet en word clouds
	# install.pacakges("rtweet")

	library(tidyverse)
	library(rtweet)
	library(quanteda)
	library(quanteda.textplots)
	library(RColorBrewer)

	auth_setup_default()

	####################################################
	# #
	# Bonjour a tous! #
	# #
	# Ca va‽️ #
	# #
	####################################################

	# Embedding-based tools for (semi-)automatic dictionary