Skip to content

Instantly share code, notes, and snippets.

View LvanWissen's full-sized avatar

Leon van Wissen LvanWissen

View GitHub Profile
@LvanWissen
LvanWissen / vele-handen@1.csv
Last active September 13, 2023 15:24
Get Metadata from SAA Beeldbank OpenSearch for VeleHanden data
We can't make this file beautiful and searchable because it's too large.
"velehanden_scan_id","memorix_id","beeldbank_id","gcps"
"7","ANWD00674000001","b8d437e8-b31b-7d7b-8629-99b635efd30f","{""id"":""marker_1"",""title"":"""",""imageLat"":""85.03914893626745"",""imageLng"":""-179.66371536254883"",""imageGeoLat"":""808.0000000005821"",""imageGeoLng"":""1958.9999999998836"",""geoLat"":""52.38581563110102"",""geoLng"":""4.882950782775879""}"
"7","ANWD00674000001","e0145944-3ee2-f6ad-73f9-20b5f8b8fd92","{""id"":""marker_3"",""title"":"""",""imageLat"":""85.0357484008423"",""imageLng"":""-179.64242935180664"",""imageGeoLat"":""1037.0000000003492"",""imageGeoLng"":""2083"",""geoLat"":""52.36034868684877"",""geoLng"":""4.904108047485351""}"
"7","ANWD00674000001","2cbe704f-fac4-6dd0-5303-e0aa847f056a","{""id"":""marker_4"",""title"":"""",""imageLat"":""85.03793154442138"",""imageLng"":""-179.62783813476562"",""imageGeoLat"":""890.0000000001164"",""imageGeoLng"":""2168"",""geoLat"":""52.37570397280555"",""geoLng"":""4.92161750793457""}"
"11","ANWG00507000001","a93dd690-6ee7-96aa-5ca9-f623
@LvanWissen
LvanWissen / sparql2rdfs.rq
Created December 10, 2021 10:22
SPARQL query to construct a RDFS vocabulary from a dataset.
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
CONSTRUCT {
?classType a rdfs:Class ;
rdfs:label ?classLabel .
?property a rdf:Property ;
rdfs:domain ?classType ;
@LvanWissen
LvanWissen / CT_2021_02_01.json
Last active February 1, 2021 22:07
Get a list of all CERL Thesaurus identifiers (cnc, cni, cnp, cnl)
["cnc00000001", "cnc00000008", "cnc00000009", "cnc00000013", "cnc00000014", "cnc00000015", "cnc00000016", "cnc00000017", "cnc00000018", "cnc00000019", "cnc00000020", "cnc00000021", "cnc00000023", "cnc00000024", "cnc00000025", "cnc00000029", "cnc00000030", "cnc00000031", "cnc00000032", "cnc00000033", "cnc00000034", "cnc00000035", "cnc00000036", "cnc00000037", "cnc00000041", "cnc00000049", "cnc00000050", "cnc00000052", "cnc00000054", "cnc00000055", "cnc00000058", "cnc00000059", "cnc00000062", "cnc00000064", "cnc00000065", "cnc00000066", "cnc00000068", "cnc00000069", "cnc00000070", "cnc00000071", "cnc00000072", "cnc00000073", "cnc00000076", "cnc00000079", "cnc00000081", "cnc00000084", "cnc00000086", "cnc00000087", "cnc00000088", "cnc00000089", "cnc00000090", "cnc00000091", "cnc00000092", "cnc00000093", "cnc00000095", "cnc00000096", "cnc00000097", "cnc00000098", "cnc00000099", "cnc00000100", "cnc00000101", "cnc00000102", "cnc00000103", "cnc00000106", "cnc00000108", "cnc00000109", "cnc00000110", "cnc00000111", "cn
@LvanWissen
LvanWissen / a2a_saa.py
Created November 19, 2020 12:17
Download alle A2A-records van 8 indexen voor Golden Agents
import os
from sickle import Sickle
from sickle.iterator import OAIResponseIterator
def main(url: str):
for (setSpec,
name) in [("08953f2f-309c-baf9-e5b1-0cefe3891b37",
"SAA-ID-001_SAA_Index_op_notarieel_archief"),
("f6e5401f-c486-5f3d-6a5c-6e277e12628e",
@LvanWissen
LvanWissen / eye_afficheproject.py
Last active November 12, 2020 17:52
Download van metadata (csv + json) van alle affiches uit het Afficheproject van het Geheugen van Nederland (Delpher)
import json
import requests
import pandas as pd
from bs4 import BeautifulSoup
URL = "https://geheugen.delpher.nl/nl/geheugen/results?query=Eye+AND+afficheproject&maxperpage=1000&coll=ngvn&page="
def main(destination, pages, URL=URL):
@LvanWissen
LvanWissen / main.py
Created June 28, 2020 10:29
Repertorium van ambtsdragers
import requests
from bs4 import BeautifulSoup
from time import sleep
import json
import datetime
import calendar
from dateutil import relativedelta
@LvanWissen
LvanWissen / alto2text.py
Last active December 3, 2019 18:14
Convert Alto to text
"""
Usage for one file:
$ python alto2text.py filename.xml > filename.txt
Usage for multiple files:
$ for f in *.xml; do python alto2text.py $f > $f.txt; done
"""
import sys
from lxml import etree
from collections import defaultdict
from itertools import count
from rdflib import Graph, URIRef, Literal, XSD, Namespace, RDFS, BNode
from rdfalchemy import rdfSubject, rdfMultiple, rdfSingle
import pandas as pd
DATA = pd.read_csv('gedichtenGGD_STCN_Steur_stripped.csv', sep=';')
@LvanWissen
LvanWissen / getAddressAmsterdam.py
Created November 7, 2019 11:41
Fetch a centroid of a 'nummeraanduidingobject' from the Amsterdam BAG API
"""
Using the Amsterdam address BAG data, fetch a 'centroid' by address (street + number).
See: https://api.data.amsterdam.nl/atlas/search/adres/
"""
import re
import requests
def get_centroid(q):
@LvanWissen
LvanWissen / OpenRotatingFiles.py
Created October 22, 2018 11:40
Use one of the file handlers of the python logging module to automatically split an output file into multiple files, based on a maximum size.
import logging
from logging.handlers import BaseRotatingHandler, RotatingFileHandler
class OpenRotatingFiles(RotatingFileHandler):
def __init__(self, filename, mode='a', maxBytes=0, backupCount=0, encoding=None, delay=False, prefix="", suffix=""):
"""
Addition to RotatingFileHandler to use this as a file handler that
creates a new file on maxBytes. Uses an optional prefix and suffix that
is printed after and before each rotation. The backupCount setting should