DIRKMJK

## get_kvk.py
"""Download open data from the Dutch Company Register, unzip and store as csv.
Not guaranteed to yield complete and accurate data. For background see:
https://dirkmjk.nl/en/187/open-company-data-in-the-netherlands
"""

from pathlib import Path
import io
import zipfile
from zipfile import BadZipFile
import xml.etree.ElementTree as ET

## download.py
"""Download traffic decisions from officielebekendmakingen.nl"""

from pathlib import Path
import requests
from bs4 import BeautifulSoup as bs

BASE_URL = 'https://zoek.officielebekendmakingen.nl'
START_URL = 'https://zoek.officielebekendmakingen.nl/resultaten?q=(c.product-area==%22officielepublicaties%22)and(dt.available%253e=%222016-01-01%22)and((w.publicatienaam==%22Staatscourant%22))%20AND%20w.verkeersbordcode==%22A1%22&zv=&pg=10&col=Staatscourant&svel=Publicatiedatum&svol=Aflopend&sf=vb|A1'
START_URL = 'https://zoek.officielebekendmakingen.nl/resultaten?q=(c.product-area==%22officielepublicaties%22)and((w.publicatienaam==%22Staatscourant%22))and((cql.textAndIndexes=%2230+km%22+or+cql.textAndIndexes=%2230km%22))&zv=%252230+km%2522+OR++%252230km%2522&col=Staatscourant'
DIR_HTML = Path('../data/html')

## delpher.md

      
              2 files
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                DIRKMJK
                / delpher.md
            
            
              Last active
              October 16, 2021 18:49
            
              
                Count articles on Delpher
              
          
    Counting articles on Delpher

Delpher is a huge archive containing digitized historic newspapers, journals, books and other sources. Developed by the National Library of the Netherlands, it is a valuable resource for both academic and informal research. I have myself used it to analyse Dutch words for bicycle.
As far as I know, there is no api to access Delpher data. For my bicycle terms analysis, I manually looked up the number of search results per decade. It would be rather laborious to look up results per year, especially if you’d want to do so for a number of terms.
Therefore, I wrote a Python script that will look up the number of results per year for a given query. Optionally, it will also look up metadata for the first 50 results per year (publication, date of publication, title and snippet), but this will take longer.

  
## process_data.py
import time
from pathlib import Path
import requests
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
import numpy as np
import geopy.distance

OSM = Path('../data/osm')

## read_sps.py
"""Read .sps file and convert to dataframe
For caveats please see:
https://dirkmjk.nl/en/2017/04/python-script-import-sps-files
"""

import re
import pandas as pd

PATTERN = r'\"(.*?)\"'

## .block
height: 620
scrolling: no
license: mit
	"""Download open data from the Dutch Company Register, unzip and store as csv.
	Not guaranteed to yield complete and accurate data. For background see:
	https://dirkmjk.nl/en/187/open-company-data-in-the-netherlands
	"""

	from pathlib import Path
	import io
	import zipfile
	from zipfile import BadZipFile
	import xml.etree.ElementTree as ET
	"""Download traffic decisions from officielebekendmakingen.nl"""

	from pathlib import Path
	import requests
	from bs4 import BeautifulSoup as bs

	BASE_URL = 'https://zoek.officielebekendmakingen.nl'
	START_URL = 'https://zoek.officielebekendmakingen.nl/resultaten?q=(c.product-area==%22officielepublicaties%22)and(dt.available%253e=%222016-01-01%22)and((w.publicatienaam==%22Staatscourant%22))%20AND%20w.verkeersbordcode==%22A1%22&zv=&pg=10&col=Staatscourant&svel=Publicatiedatum&svol=Aflopend&sf=vb\|A1'
	START_URL = 'https://zoek.officielebekendmakingen.nl/resultaten?q=(c.product-area==%22officielepublicaties%22)and((w.publicatienaam==%22Staatscourant%22))and((cql.textAndIndexes=%2230+km%22+or+cql.textAndIndexes=%2230km%22))&zv=%252230+km%2522+OR++%252230km%2522&col=Staatscourant'
	DIR_HTML = Path('../data/html')
	import time
	from pathlib import Path
	import requests
	import pandas as pd
	import geopandas as gpd
	from shapely.geometry import Point
	import numpy as np
	import geopy.distance

	OSM = Path('../data/osm')
	"""Read .sps file and convert to dataframe
	For caveats please see:
	https://dirkmjk.nl/en/2017/04/python-script-import-sps-files
	"""

	import re
	import pandas as pd

	PATTERN = r'\"(.*?)\"'