Wouter van Atteveldt vanatteveldt

## labels.R
library(tidyverse)

get_label = function(var) tibble(var=var, label=d[[var]] %>% attr(which = "label"))
map(colnames(d), get_label) |> list_rbind()

var = "sex_rec"

get_value_labels <- function(var) {
    value_labels <- attr(d[[var]], which="labels")
    tibble(var=var, value=value_labels, label=names(value_labels))

## concat_pdf_even.py
import subprocess
from pathlib import Path
from PyPDF2 import PdfReader, PdfWriter

outPdf=PdfWriter()
for inf in Path.cwd().glob("*.html"):
    pdff = inf.with_suffix(".pdf")
    if not pdff.exists():
        print(f"*1* {inf} -> {pdff}")
        subprocess.check_call(["wkhtmltopdf", str(inf), str(pdff)])

## diarization2.py
import csv
import sys
import numpy
from pyannote.audio.pipelines.utils.hook import ProgressHook
import collections
import whisper
from pyannote.audio import Pipeline
import torch
from pyannote.audio import Audio
from pyannote.audio.pipelines.speaker_verification import PretrainedSpeakerEmbedding

## tidy_svd.R
library(tidyverse)
tidy_svd = function(long_data, rows_from, columns_from, values_from, ndimensions=10) {
  # center the data
  long_data[[values_from]] = long_data[[values_from]] - mean(long_data[[values_from]], na.rm=TRUE)

  # pivot and cast to wide matrix
  m <- long_data |>
    select(all_of(c(rows_from, columns_from, values_from))) |>
    na.omit() |>
    pivot_wider(names_from=columns_from, values_from=values_from, values_fill = 0) |>

## tijdschrijven.R
library(gh)
library(tidyverse)

get_all_pages <- function(url, ...) {
    result <- list()
    for (page in 1:99999) {
        message(str_c("[", page, "] ", do.call(glue::glue, list(url, ...))))
        p <- gh(url, ..., page=page)
        if (length(p) == 0) break
        result[[as.character(page)]] <- p

## lab2.Rmd
---
title: 'Lab 2: exploring the US elections (template)'
author: "(Your name)"
output:
  pdf_document:
editor_options:
  chunk_output_type: console
---

```{r setup, include=FALSE}

## vu_demo_spss.R
library(psych)
library(tidyverse)
library(haven)

d = read_sav("~/Downloads/Project 2 GGD_February 21, 2023_03.46.sav")

cleaned = d |> filter(status != 1) |>
  rename_with(~str_replace(., "Q37", "Vertrouwen"), starts_with("Q37")) |>
  rename_with(~str_replace(., "Q38", "Privacygevoeligheid"), starts_with("Q38")) |>
  rename_with(~str_replace(., "Q40", "Perceptie_medewerkers"), starts_with("Q40")) |>

## example.jats.xml
<?xml version='1.0' encoding='utf-8'?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Archiving and Interchange DTD v1.2 20190208//EN" "JATS-archivearticle1.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="1.2" article-type="other">
<front>
<journal-meta>
<journal-id/>
<journal-title-group>
<journal-title>Computational Communication Research</journal-title>
</journal-title-group>
<issn publication-format="electronic">2665-9085</issn>

## original.sps
* Encoding: UTF-8.

DATASET ACTIVATE DataSet1.
FREQUENCIES VARIABLES=Q14 Q37 A4.1_ A4.2_ A4.3_ Q37_1 Q37_2 Q37_3 Q37_4 Q37_5 Q37_6 Q37_7 Q37_8
    Q38_1 Q38_2 Q38_3 Q38_4 Q38_5 Q38_6 Q38_7 Q38_8 Q38_9 Q40_1 Q40_2 Q40_3 Q40_4 Q40_5 Q40_6 Q40_7
    Q40_8 Q15
  /STATISTICS=MEAN MEDIAN MODE
  /ORDER=ANALYSIS.

*** hieronder de variabele veranderd (andere naam)

## output.txt
Text: Wij zijn Wouter van Atteveldt en Nel Ruigrok.
10.000 nieuwe stamcel- en bloeddonoren na oproep PSV-perschef Thijs Slegers.
Ongeneeslijk ziek Opvallend veel mannen meldden zich aan als donor na een oproep van de ongeneeslijk zieke Slegers, Matchis kreeg 7.000 nieuwe aanmeldingen, Sanquin 3.000.


Model: pdelobelle/robbert-v2-dutch-ner

NER output:
{'entity_group': 'PER', 'score': 0.9998577, 'word': ' Wouter van Atte', 'start': 9, 'end': 24, 'full_word': 'Wouter van Atteveldt'}
{'entity_group': 'PER', 'score': 0.9999995, 'word': ' Nel Ruig', 'start': 33, 'end': 41, 'full_word': 'Nel Ruigrok'}
	library(tidyverse)

	get_label = function(var) tibble(var=var, label=d[[var]] %>% attr(which = "label"))
	map(colnames(d), get_label) \|> list_rbind()

	var = "sex_rec"

	get_value_labels <- function(var) {
	value_labels <- attr(d[[var]], which="labels")
	tibble(var=var, value=value_labels, label=names(value_labels))
	import subprocess
	from pathlib import Path
	from PyPDF2 import PdfReader, PdfWriter

	outPdf=PdfWriter()
	for inf in Path.cwd().glob("*.html"):
	pdff = inf.with_suffix(".pdf")
	if not pdff.exists():
	print(f"1 {inf} -> {pdff}")
	subprocess.check_call(["wkhtmltopdf", str(inf), str(pdff)])
	import csv
	import sys
	import numpy
	from pyannote.audio.pipelines.utils.hook import ProgressHook
	import collections
	import whisper
	from pyannote.audio import Pipeline
	import torch
	from pyannote.audio import Audio
	from pyannote.audio.pipelines.speaker_verification import PretrainedSpeakerEmbedding
	library(tidyverse)
	tidy_svd = function(long_data, rows_from, columns_from, values_from, ndimensions=10) {
	# center the data
	long_data[[values_from]] = long_data[[values_from]] - mean(long_data[[values_from]], na.rm=TRUE)

	# pivot and cast to wide matrix
	m <- long_data \|>
	select(all_of(c(rows_from, columns_from, values_from))) \|>
	na.omit() \|>
	pivot_wider(names_from=columns_from, values_from=values_from, values_fill = 0) \|>
	library(gh)
	library(tidyverse)

	get_all_pages <- function(url, ...) {
	result <- list()
	for (page in 1:99999) {
	message(str_c("[", page, "] ", do.call(glue::glue, list(url, ...))))
	p <- gh(url, ..., page=page)
	if (length(p) == 0) break
	result[[as.character(page)]] <- p
	---
	title: 'Lab 2: exploring the US elections (template)'
	author: "(Your name)"
	output:
	pdf_document:
	editor_options:
	chunk_output_type: console
	---

	```{r setup, include=FALSE}
	library(psych)
	library(tidyverse)
	library(haven)

	d = read_sav("~/Downloads/Project 2 GGD_February 21, 2023_03.46.sav")

	cleaned = d \|> filter(status != 1) \|>
	rename_with(~str_replace(., "Q37", "Vertrouwen"), starts_with("Q37")) \|>
	rename_with(~str_replace(., "Q38", "Privacygevoeligheid"), starts_with("Q38")) \|>
	rename_with(~str_replace(., "Q40", "Perceptie_medewerkers"), starts_with("Q40")) \|>
	<?xml version='1.0' encoding='utf-8'?>
	<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Archiving and Interchange DTD v1.2 20190208//EN" "JATS-archivearticle1.dtd">
	<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="1.2" article-type="other">
	<front>
	<journal-meta>
	<journal-id/>
	<journal-title-group>
	<journal-title>Computational Communication Research</journal-title>
	</journal-title-group>
	<issn publication-format="electronic">2665-9085</issn>
	* Encoding: UTF-8.

	DATASET ACTIVATE DataSet1.
	FREQUENCIES VARIABLES=Q14 Q37 A4.1_ A4.2_ A4.3_ Q37_1 Q37_2 Q37_3 Q37_4 Q37_5 Q37_6 Q37_7 Q37_8
	Q38_1 Q38_2 Q38_3 Q38_4 Q38_5 Q38_6 Q38_7 Q38_8 Q38_9 Q40_1 Q40_2 Q40_3 Q40_4 Q40_5 Q40_6 Q40_7
	Q40_8 Q15
	/STATISTICS=MEAN MEDIAN MODE
	/ORDER=ANALYSIS.

	*** hieronder de variabele veranderd (andere naam)
	Text: Wij zijn Wouter van Atteveldt en Nel Ruigrok.
	10.000 nieuwe stamcel- en bloeddonoren na oproep PSV-perschef Thijs Slegers.
	Ongeneeslijk ziek Opvallend veel mannen meldden zich aan als donor na een oproep van de ongeneeslijk zieke Slegers, Matchis kreeg 7.000 nieuwe aanmeldingen, Sanquin 3.000.


	Model: pdelobelle/robbert-v2-dutch-ner

	NER output:
	{'entity_group': 'PER', 'score': 0.9998577, 'word': ' Wouter van Atte', 'start': 9, 'end': 24, 'full_word': 'Wouter van Atteveldt'}
	{'entity_group': 'PER', 'score': 0.9999995, 'word': ' Nel Ruig', 'start': 33, 'end': 41, 'full_word': 'Nel Ruigrok'}