DIRKMJK

## .block
height: 620
scrolling: no
license: mit

## read_sps.py
"""Read .sps file and convert to dataframe
For caveats please see:
https://dirkmjk.nl/en/2017/04/python-script-import-sps-files
"""

import re
import pandas as pd

PATTERN = r'\"(.*?)\"'

## process_data.py
import time
from pathlib import Path
import requests
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
import numpy as np
import geopy.distance

OSM = Path('../data/osm')

## delpher.md

      
              2 files
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                DIRKMJK
                / delpher.md
            
            
              Last active
              October 16, 2021 18:49
            
              
                Count articles on Delpher
              
          
    Counting articles on Delpher

Delpher is a huge archive containing digitized historic newspapers, journals, books and other sources. Developed by the National Library of the Netherlands, it is a valuable resource for both academic and informal research. I have myself used it to analyse Dutch words for bicycle.
As far as I know, there is no api to access Delpher data. For my bicycle terms analysis, I manually looked up the number of search results per decade. It would be rather laborious to look up results per year, especially if you’d want to do so for a number of terms.
Therefore, I wrote a Python script that will look up the number of results per year for a given query. Optionally, it will also look up metadata for the first 50 results per year (publication, date of publication, title and snippet), but this will take longer.

  
## download.py
"""Download traffic decisions from officielebekendmakingen.nl"""

from pathlib import Path
import requests
from bs4 import BeautifulSoup as bs

BASE_URL = 'https://zoek.officielebekendmakingen.nl'
START_URL = 'https://zoek.officielebekendmakingen.nl/resultaten?q=(c.product-area==%22officielepublicaties%22)and(dt.available%253e=%222016-01-01%22)and((w.publicatienaam==%22Staatscourant%22))%20AND%20w.verkeersbordcode==%22A1%22&zv=&pg=10&col=Staatscourant&svel=Publicatiedatum&svol=Aflopend&sf=vb|A1'
START_URL = 'https://zoek.officielebekendmakingen.nl/resultaten?q=(c.product-area==%22officielepublicaties%22)and((w.publicatienaam==%22Staatscourant%22))and((cql.textAndIndexes=%2230+km%22+or+cql.textAndIndexes=%2230km%22))&zv=%252230+km%2522+OR++%252230km%2522&col=Staatscourant'
DIR_HTML = Path('../data/html')

## get_kvk.py
"""Download open data from the Dutch Company Register, unzip and store as csv.
Not guaranteed to yield complete and accurate data. For background see:
https://dirkmjk.nl/en/187/open-company-data-in-the-netherlands
"""

from pathlib import Path
import io
import zipfile
from zipfile import BadZipFile
import xml.etree.ElementTree as ET
	"""Read .sps file and convert to dataframe
	For caveats please see:
	https://dirkmjk.nl/en/2017/04/python-script-import-sps-files
	"""

	import re
	import pandas as pd

	PATTERN = r'\"(.*?)\"'
	import time
	from pathlib import Path
	import requests
	import pandas as pd
	import geopandas as gpd
	from shapely.geometry import Point
	import numpy as np
	import geopy.distance

	OSM = Path('../data/osm')
	"""Download traffic decisions from officielebekendmakingen.nl"""

	from pathlib import Path
	import requests
	from bs4 import BeautifulSoup as bs

	BASE_URL = 'https://zoek.officielebekendmakingen.nl'
	START_URL = 'https://zoek.officielebekendmakingen.nl/resultaten?q=(c.product-area==%22officielepublicaties%22)and(dt.available%253e=%222016-01-01%22)and((w.publicatienaam==%22Staatscourant%22))%20AND%20w.verkeersbordcode==%22A1%22&zv=&pg=10&col=Staatscourant&svel=Publicatiedatum&svol=Aflopend&sf=vb\|A1'
	START_URL = 'https://zoek.officielebekendmakingen.nl/resultaten?q=(c.product-area==%22officielepublicaties%22)and((w.publicatienaam==%22Staatscourant%22))and((cql.textAndIndexes=%2230+km%22+or+cql.textAndIndexes=%2230km%22))&zv=%252230+km%2522+OR++%252230km%2522&col=Staatscourant'
	DIR_HTML = Path('../data/html')
	"""Download open data from the Dutch Company Register, unzip and store as csv.
	Not guaranteed to yield complete and accurate data. For background see:
	https://dirkmjk.nl/en/187/open-company-data-in-the-netherlands
	"""

	from pathlib import Path
	import io
	import zipfile
	from zipfile import BadZipFile
	import xml.etree.ElementTree as ET