Skip to content

Instantly share code, notes, and snippets.

@nruigrok
Last active January 4, 2020 17:14
Show Gist options
  • Save nruigrok/0ea991a56f0bcd0da2be913b82e01297 to your computer and use it in GitHub Desktop.
Save nruigrok/0ea991a56f0bcd0da2be913b82e01297 to your computer and use it in GitHub Desktop.
import urllib.request
import json
import amcatclient
from amcatclient import AmcatAPI
URL_TEMPLATE = 'https://www.openkamer.org/api/kamervraag/?limit=10&offset={offset}'
def get_json(url):
req = urllib.request.Request(url)
r = urllib.request.urlopen(req).read()
cont = json.loads(r.decode('utf-8'))
return cont
def read_doc(doc):
url = doc['url']
title = doc['document']['title_full']
print(title)
date = doc['document']['date_published']
vragen = []
for v in doc['vragen']:
text = v['text']
vragen.append(text)
vragen2 = ''.join(str(e) for e in vragen)
indieners = {}
for s in doc['document']['submitters']:
name = s['person']['fullname']
try:
partij = s['party']['name_short']
indieners[name] = partij
except TypeError:
pass
voetnoten ={}
for f in doc['document']['foot_notes']:
fn = f['text']
if not fn:
fn = "-"
link = f['url']
if not link:
link ="-"
voetnoten[fn] = link
if not voetnoten:
print("not fn")
continue
else:
v1 = "".join(voetnoten.keys())
print(f"dit is fn{v1}")
v2 = "".join(voetnoten.values())
print(v1)
return {"title": title,
"text": vragen2,
"date": date,
"indienernaam": name,
"indienerpartij": partij,
"medium": "Kamervragen",
"voetnoten": v1,
"linksvoetnoten": v2,
"url": url}
def get_links():
for i in range(10,100, 10):
offset = i
url = URL_TEMPLATE.format(**locals())
yield url
#read_doc(cont['results'][1])
from amcatclient import AmcatAPI
conn = AmcatAPI("http://localhost:8000")
for link in get_links():
print(link)
cont = get_json(link)
for r in cont['results']:
a = read_doc(r)
conn.create_articles(2, 42, [a])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment