Created
November 14, 2018 15:54
-
-
Save thlor/d32a3af0fdbd02cb5ecee95734796ef1 to your computer and use it in GitHub Desktop.
CKAN crawler
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# First install CKANapi module from the command line: | |
# pip3 install ckanapi | |
from ckanapi import RemoteCKAN | |
import json | |
with RemoteCKAN("https://www.data.gv.at/katalog/", get_only=True) as ckan: | |
page = 0 | |
rows = 100 | |
limit_pages = 10 # Limit number of pages to be crawled. DEBUG reasons. Set this to -1 to crawl unlimited pages. | |
while True: | |
metadatas = ckan.action.package_search(rows=rows, start=page * rows) | |
page = page + 1 | |
if len(metadatas["results"]) == 0: | |
break | |
if page == limit_pages: | |
break | |
for metadata in metadatas["results"]: | |
# place logic working with the "metadata" variable here: | |
print(json.dumps(metadata)[0:100] + " ...") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment