Skip to content

Instantly share code, notes, and snippets.

library(tidytext)
library(tidyverse)
library(tidygraph)
library(ggraph)
library(ggiraph)
library(udpipe)
source("conceptmap.R")
stopwords= stopwords::stopwords("nl", source = "snowball")
library(tidytext)
library(tidyverse)
#' Compute the KDE smoothing of the occurrence of 'target' in the 'tokens'
#'
#' @param tokens a character vector of words in the corpus
#' @param target a word to look for in the corpus
#' @param n the number of points to sample
#' @param bw the bandwidth of the smoothing
kde <- function(tokens, target, n=1000, bw=5000) {
library(tidytext)
library(tidyverse)
#' Compute the KDE smoothing of the occurrence of 'target' in the 'tokens'
#'
#' @param tokens a character vector of words in the corpus
#' @param target a word to look for in the corpus
#' @param n the number of points to sample
#' @param bw the bandwidth of the smoothing
kde <- function(tokens, target, n=1000, bw=5000) {
library(igraph)
library(tidyverse)
dtm = readRDS("/tmp/nieuws.rds")
g = RNewsflow::newsflow_compare(dtm, dtm, date='date',
min_similarity = 0.75, ## similarity threshold
hour_window = c(-1*24, 1*24), ## tijd window: tussen 0 en 7 dagen na publicatie persbericht
measure = 'cosine', ## cosine similarity. Je kunt ook overlap_pct gebruiken voor assymetrische vergelijking
tf_idf=T) ## weeg woorden die minder vaak voorkomen zwaarder mee
def scrape_pb(url):
url = URL_ROOT + url
print(url)
page = requests.get(url)
tree = html.fromstring(page.text)
if get_css(tree, "div.alert.alert-info"):
continue
else:
medium = get_css(tree, "h1.resultheader-publicatietype")
try:
def scrape_pb(url):
url = URL_ROOT + url
print(url)
page = requests.get(url)
tree = html.fromstring(page.text)
medium = get_css(tree, "h1.resultheader-publicatietype")
try:
headline = get_css(tree, "h1.title")
except:
pass
@nruigrok
nruigrok / kv.py
Last active January 4, 2020 17:14
import urllib.request
import json
import amcatclient
from amcatclient import AmcatAPI
URL_TEMPLATE = 'https://www.openkamer.org/api/kamervraag/?limit=10&offset={offset}'
def get_json(url):
req = urllib.request.Request(url)
@nruigrok
nruigrok / kv.py
Created January 4, 2020 13:17
kamervragen van api
import urllib.request
import json
url = 'https://www.openkamer.org/api/kamervraag/?format=json&offset=1'
req = urllib.request.Request(url)
##parsing response
r = urllib.request.urlopen(req).read()
cont = json.loads(r.decode('utf-8'))
import django
django.setup()
from amcat.models import Article,Project, CodingJob, CodingSchemaField, ArticleSet
import sys, datetime, csv
cjs = list(CodingJob.objects.filter(project__id=1))
import requests
url = 'https://www.jancisrobinson.com/tastings/search?perpage=100&page=1'
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
response = requests.get(url, headers=headers)
print(response.content)