Skip to content

Instantly share code, notes, and snippets.

@tomkralidis
Last active December 31, 2015 19:29
Show Gist options
  • Save tomkralidis/8033897 to your computer and use it in GitHub Desktop.
Save tomkralidis/8033897 to your computer and use it in GitHub Desktop.
Test to exemplify CKAN API issue of false positive harvested objects
# test to exemplify CKAN API issue of false positive harvested objects
# Example invocation:
# python test_ckan_harvest_objects.py https://data.noaa.gov 1c7cb5dd-f2ca-4aad-b971-af251c1acf4d
import sys
from urllib2 import urlopen
import requests
if len(sys.argv) < 3:
print 'Usage: %s <ckan_url> <dataset_id>' % sys.argv[0]
sys.exit(1)
ckan_url = sys.argv[1]
dataset_id = sys.argv[2]
url = '%s/api/search/dataset?qjson={"fl":"id,metadata_modified,extras_harvest_object_id,extras_metadata_source", "q":"harvest_object_id:[\\"\\" TO *]", "limit":1000, "start":0}' % ckan_url
response = requests.get(url)
listing = response.json()
print 'Testing for dataset id: %s' % dataset_id
for dataset in listing['results']:
if dataset['id'] == dataset_id:
print 'Found dataset id: %s' % dataset_id
api_url = '%s/api/search/dataset?qjson={"id":"%s"}' % (ckan_url, dataset_id)
ui_url = '%s/dataset/%s' % (ckan_url, dataset_id)
harvest_object_id = dataset['extras']['harvest_object_id']
harvest_url = '%s/harvest/object/%s' % (ckan_url, harvest_object_id)
print 'dataset via API: %s (HTTP status code: %s)' % (api_url, requests.get(api_url).status_code)
print 'dataset via UI: %s (HTTP status code: %s)' % (ui_url, requests.get(ui_url).status_code)
print 'dataset\'s harvest_object_id: %s' % harvest_object_id
print 'harvest_object_id via API: %s (HTTP status code: %s)' % (harvest_url, requests.get(harvest_url).status_code)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment