Skip to content

Instantly share code, notes, and snippets.

View vanatteveldt's full-sized avatar

Wouter van Atteveldt vanatteveldt

  • VU University
  • Amsterdam
View GitHub Profile
library(tidyverse)
get_label = function(var) tibble(var=var, label=d[[var]] %>% attr(which = "label"))
map(colnames(d), get_label) |> list_rbind()
var = "sex_rec"
get_value_labels <- function(var) {
value_labels <- attr(d[[var]], which="labels")
tibble(var=var, value=value_labels, label=names(value_labels))
import subprocess
from pathlib import Path
from PyPDF2 import PdfReader, PdfWriter
outPdf=PdfWriter()
for inf in Path.cwd().glob("*.html"):
pdff = inf.with_suffix(".pdf")
if not pdff.exists():
print(f"*1* {inf} -> {pdff}")
subprocess.check_call(["wkhtmltopdf", str(inf), str(pdff)])
import csv
import sys
import numpy
from pyannote.audio.pipelines.utils.hook import ProgressHook
import collections
import whisper
from pyannote.audio import Pipeline
import torch
from pyannote.audio import Audio
from pyannote.audio.pipelines.speaker_verification import PretrainedSpeakerEmbedding
library(tidyverse)
tidy_svd = function(long_data, rows_from, columns_from, values_from, ndimensions=10) {
# center the data
long_data[[values_from]] = long_data[[values_from]] - mean(long_data[[values_from]], na.rm=TRUE)
# pivot and cast to wide matrix
m <- long_data |>
select(all_of(c(rows_from, columns_from, values_from))) |>
na.omit() |>
pivot_wider(names_from=columns_from, values_from=values_from, values_fill = 0) |>
library(gh)
library(tidyverse)
get_all_pages <- function(url, ...) {
result <- list()
for (page in 1:99999) {
message(str_c("[", page, "] ", do.call(glue::glue, list(url, ...))))
p <- gh(url, ..., page=page)
if (length(p) == 0) break
result[[as.character(page)]] <- p
---
title: 'Lab 2: exploring the US elections (template)'
author: "(Your name)"
output:
pdf_document:
editor_options:
chunk_output_type: console
---
```{r setup, include=FALSE}
library(psych)
library(tidyverse)
library(haven)
d = read_sav("~/Downloads/Project 2 GGD_February 21, 2023_03.46.sav")
cleaned = d |> filter(status != 1) |>
rename_with(~str_replace(., "Q37", "Vertrouwen"), starts_with("Q37")) |>
rename_with(~str_replace(., "Q38", "Privacygevoeligheid"), starts_with("Q38")) |>
rename_with(~str_replace(., "Q40", "Perceptie_medewerkers"), starts_with("Q40")) |>
<?xml version='1.0' encoding='utf-8'?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Archiving and Interchange DTD v1.2 20190208//EN" "JATS-archivearticle1.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="1.2" article-type="other">
<front>
<journal-meta>
<journal-id/>
<journal-title-group>
<journal-title>Computational Communication Research</journal-title>
</journal-title-group>
<issn publication-format="electronic">2665-9085</issn>
* Encoding: UTF-8.
DATASET ACTIVATE DataSet1.
FREQUENCIES VARIABLES=Q14 Q37 A4.1_ A4.2_ A4.3_ Q37_1 Q37_2 Q37_3 Q37_4 Q37_5 Q37_6 Q37_7 Q37_8
Q38_1 Q38_2 Q38_3 Q38_4 Q38_5 Q38_6 Q38_7 Q38_8 Q38_9 Q40_1 Q40_2 Q40_3 Q40_4 Q40_5 Q40_6 Q40_7
Q40_8 Q15
/STATISTICS=MEAN MEDIAN MODE
/ORDER=ANALYSIS.
*** hieronder de variabele veranderd (andere naam)
Text: Wij zijn Wouter van Atteveldt en Nel Ruigrok.
10.000 nieuwe stamcel- en bloeddonoren na oproep PSV-perschef Thijs Slegers.
Ongeneeslijk ziek Opvallend veel mannen meldden zich aan als donor na een oproep van de ongeneeslijk zieke Slegers, Matchis kreeg 7.000 nieuwe aanmeldingen, Sanquin 3.000.
Model: pdelobelle/robbert-v2-dutch-ner
NER output:
{'entity_group': 'PER', 'score': 0.9998577, 'word': ' Wouter van Atte', 'start': 9, 'end': 24, 'full_word': 'Wouter van Atteveldt'}
{'entity_group': 'PER', 'score': 0.9999995, 'word': ' Nel Ruig', 'start': 33, 'end': 41, 'full_word': 'Nel Ruigrok'}