Skip to content

Instantly share code, notes, and snippets.

@rossjones
Last active August 29, 2015 14:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rossjones/fed1cb99b55c2b19a13e to your computer and use it in GitHub Desktop.
Save rossjones/fed1cb99b55c2b19a13e to your computer and use it in GitHub Desktop.
Transfer dataset from an old CKAN to a single organisation on datahub.io
#!/usr/bin/env python
# mkvirtualenv transfer
# pip install ckanapi
SOURCE = "http://br.ckan.net"
TARGET_ORGANISATION = "open-knowledge-brasil"
SOURCE_TAG = "meta.imported-from-br-ckan-net"
import ckanapi
from ckanapi.errors import NotFound, ValidationError
datahub = ckanapi.RemoteCKAN("http://datahub.io/", apikey="Ross' API KEY")
source = ckanapi.RemoteCKAN(SOURCE)
# Get the Organisation, we only need the name
org = datahub.action.organization_show(id=TARGET_ORGANISATION)
# Get a list of all the packages on br.ckan.net
pkg_names = source.action.package_list()
print "Found {0} datasets on source".format(len(pkg_names))
for name in pkg_names:
# Get the dataset off br.ckan.net
dataset = source.action.package_show(id=name)
del dataset['id']
try:
existing = datahub.action.package_show(id=dataset['name'])
print "{0} already exists".format(existing['name'])
continue
except NotFound:
# Great, let's carry on
pass
# Set the new owning organisation
dataset['owner_org'] = org['name']
# Some datasets have no resources, and we don't like that any more ...
if not 'resources' in dataset:
dataset['resources'] = []
# Add a nice tag so we can find them all again
dataset['tags'].append({'name': SOURCE_TAG })
try:
new_dataset = datahub.action.package_create(**dataset)
print "Created {}".format(new_dataset['name'])
except ValidationError:
print "Failed to upload {}".format(dataset['name'])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment