Nel Ruigrok nruigrok

## gist:52d6dffe6ad33f2924895ae909b49f74
library(tidytext)
library(tidyverse)
library(tidygraph)
library(ggraph)
library(ggiraph)
library(udpipe)

source("conceptmap.R")

stopwords= stopwords::stopwords("nl", source = "snowball")

## conceptmap.R
library(tidytext)
library(tidyverse)

#' Compute the KDE smoothing of the occurrence of 'target' in the 'tokens'
#'
#' @param tokens a character vector of words in the corpus
#' @param target a word to look for in the corpus
#' @param n the number of points to sample
#' @param bw the bandwidth of the smoothing
kde <- function(tokens, target, n=1000, bw=5000) {

## kde.R
library(tidytext)
library(tidyverse)

#' Compute the KDE smoothing of the occurrence of 'target' in the 'tokens'
#'
#' @param tokens a character vector of words in the corpus
#' @param target a word to look for in the corpus
#' @param n the number of points to sample
#' @param bw the bandwidth of the smoothing
kde <- function(tokens, target, n=1000, bw=5000) {

## compare.R
library(igraph)
library(tidyverse)

dtm = readRDS("/tmp/nieuws.rds")

g = RNewsflow::newsflow_compare(dtm, dtm, date='date',
                                min_similarity = 0.75,       ## similarity threshold
                                hour_window = c(-1*24, 1*24),   ## tijd window: tussen 0 en 7 dagen na publicatie persbericht
                                measure = 'cosine',         ## cosine similarity. Je kunt ook overlap_pct gebruiken voor assymetrische vergelijking
                                tf_idf=T)                   ## weeg woorden die minder vaak voorkomen zwaarder mee

## gist:e3bcae831d9da99d6635572dbda84abc
def scrape_pb(url):
    url = URL_ROOT + url
    print(url)
    page = requests.get(url)
    tree = html.fromstring(page.text)
    if get_css(tree, "div.alert.alert-info"):
        continue
    else:
        medium = get_css(tree, "h1.resultheader-publicatietype")
        try:

## kamerstukken
def scrape_pb(url):
    url = URL_ROOT + url
    print(url)
    page = requests.get(url)
    tree = html.fromstring(page.text)
    medium = get_css(tree, "h1.resultheader-publicatietype")
    try:
        headline = get_css(tree, "h1.title")
    except:
        pass

## kv.py
import urllib.request
import json
import amcatclient
from amcatclient import AmcatAPI


URL_TEMPLATE = 'https://www.openkamer.org/api/kamervraag/?limit=10&offset={offset}'

def get_json(url):
    req = urllib.request.Request(url)

## kv.py
import urllib.request
import json

url = 'https://www.openkamer.org/api/kamervraag/?format=json&offset=1'
req = urllib.request.Request(url)

##parsing response
r = urllib.request.urlopen(req).read()
cont = json.loads(r.decode('utf-8'))

## media.py
import django
django.setup()

from amcat.models import Article,Project, CodingJob, CodingSchemaField, ArticleSet
import sys, datetime, csv

cjs = list(CodingJob.objects.filter(project__id=1))


## jancis2
import requests

url = 'https://www.jancisrobinson.com/tastings/search?perpage=100&page=1'
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}

response = requests.get(url, headers=headers)
print(response.content)
	library(tidytext)
	library(tidyverse)
	library(tidygraph)
	library(ggraph)
	library(ggiraph)
	library(udpipe)

	source("conceptmap.R")

	stopwords= stopwords::stopwords("nl", source = "snowball")
	library(tidytext)
	library(tidyverse)

	#' Compute the KDE smoothing of the occurrence of 'target' in the 'tokens'
	#'
	#' @param tokens a character vector of words in the corpus
	#' @param target a word to look for in the corpus
	#' @param n the number of points to sample
	#' @param bw the bandwidth of the smoothing
	kde <- function(tokens, target, n=1000, bw=5000) {
	library(igraph)
	library(tidyverse)

	dtm = readRDS("/tmp/nieuws.rds")

	g = RNewsflow::newsflow_compare(dtm, dtm, date='date',
	min_similarity = 0.75, ## similarity threshold
	hour_window = c(-124, 124), ## tijd window: tussen 0 en 7 dagen na publicatie persbericht
	measure = 'cosine', ## cosine similarity. Je kunt ook overlap_pct gebruiken voor assymetrische vergelijking
	tf_idf=T) ## weeg woorden die minder vaak voorkomen zwaarder mee
	def scrape_pb(url):
	url = URL_ROOT + url
	print(url)
	page = requests.get(url)
	tree = html.fromstring(page.text)
	if get_css(tree, "div.alert.alert-info"):
	continue
	else:
	medium = get_css(tree, "h1.resultheader-publicatietype")
	try:
	import urllib.request
	import json
	import amcatclient
	from amcatclient import AmcatAPI


	URL_TEMPLATE = 'https://www.openkamer.org/api/kamervraag/?limit=10&offset={offset}'

	def get_json(url):
	req = urllib.request.Request(url)
	import urllib.request
	import json

	url = 'https://www.openkamer.org/api/kamervraag/?format=json&offset=1'
	req = urllib.request.Request(url)

	##parsing response
	r = urllib.request.urlopen(req).read()
	cont = json.loads(r.decode('utf-8'))
	import django
	django.setup()

	from amcat.models import Article,Project, CodingJob, CodingSchemaField, ArticleSet
	import sys, datetime, csv

	cjs = list(CodingJob.objects.filter(project__id=1))
	import requests

	url = 'https://www.jancisrobinson.com/tastings/search?perpage=100&page=1'
	headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}

	response = requests.get(url, headers=headers)
	print(response.content)