Skip to content

Instantly share code, notes, and snippets.

@amercader
Created November 4, 2013 10:29
Show Gist options
  • Save amercader/7300751 to your computer and use it in GitHub Desktop.
Save amercader/7300751 to your computer and use it in GitHub Desktop.
Recreate remote harvest sources using the dataset search (while https://github.com/okfn/ckanext-harvest/issues/73 is not fixed)
import json
import requests
REMOTE_SOURCES_URL = 'http://catalog.data.gov/api/3/action/package_search?q=type:harvest%20AND%20organization:usgs-gov'
HOST = 'localhost:5000'
API_KEY = YOUR_API_KEY_HERE
ACTION_URL_CREATE_SOURCE = 'http://{host}/api/action/harvest_source_create'.format(host=HOST)
def do():
headers = {'Content-type': 'application/json', 'Authorization': API_KEY}
# Get remote sources (datasets)
r = requests.post(REMOTE_SOURCES_URL, data={})
source_datasets = json.loads(r.content)['result']['results']
def get_extra(key, dataset_dict, default=None):
for extra in dataset_dict.get('extras', []):
if key == extra['key']:
return extra['value']
return default
for source_dataset in source_datasets:
source_dict = {
'name': source_dataset.get('name'),
'title': source_dataset.get('title'),
'url': source_dataset.get('url'),
'source_type': get_extra('source_type', source_dataset),
'frequency': get_extra('frequency', source_dataset),
'config': get_extra('config', source_dataset),
}
r = requests.post(ACTION_URL_CREATE_SOURCE, data=json.dumps(
source_dict
), headers=headers)
if r.status_code != 200:
print r.status_code
print r.content
continue
else:
#print r.content
print 'Source created {0}'.format(source_dataset.get('name'))
break
if __name__ == '__main__':
do()
print 'Done'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment