thlor/ckan_crawler.py

## ckan_crawler.py
# First install CKANapi module from the command line:
# pip3 install ckanapi

from  ckanapi import RemoteCKAN
import json

with RemoteCKAN("https://www.data.gv.at/katalog/", get_only=True) as ckan:
    page = 0
    rows = 100
    limit_pages = 10 # Limit number of pages to be crawled. DEBUG reasons. Set this to -1 to crawl unlimited pages.

    while True:
        metadatas = ckan.action.package_search(rows=rows, start=page * rows)
        page = page + 1

        if len(metadatas["results"]) == 0:
            break

        if page == limit_pages:
            break

        for metadata in metadatas["results"]:

            # place logic working with the "metadata" variable here:
            print(json.dumps(metadata)[0:100] + " ...")
	# First install CKANapi module from the command line:
	# pip3 install ckanapi

	from ckanapi import RemoteCKAN
	import json

	with RemoteCKAN("https://www.data.gv.at/katalog/", get_only=True) as ckan:
	page = 0
	rows = 100
	limit_pages = 10 # Limit number of pages to be crawled. DEBUG reasons. Set this to -1 to crawl unlimited pages.

	while True:
	metadatas = ckan.action.package_search(rows=rows, start=page * rows)
	page = page + 1

	if len(metadatas["results"]) == 0:
	break

	if page == limit_pages:
	break

	for metadata in metadatas["results"]:

	# place logic working with the "metadata" variable here:
	print(json.dumps(metadata)[0:100] + " ...")