Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rufuspollock/bd8ae3575950d180cce33da59c021299 to your computer and use it in GitHub Desktop.
Save rufuspollock/bd8ae3575950d180cce33da59c021299 to your computer and use it in GitHub Desktop.
Convert Data Package to CKAN Package
# python 3+
def convert_data_package_to_ckan_package(data_package):
'''
Documentation of CKAN metadata structure ...
https://docs.ckan.org/en/2.8/api/index.html#ckan.logic.action.create.package_create
https://docs.ckan.org/en/2.8/api/index.html#ckan.logic.action.create.resource_create
'''
out = dict(data_package)
out['extras'] = []
# special case atm
# future look through all fields not in ckan special list
if 'tableschema' in out:
out['extras'].append({
'key': 'tableschema',
'value': json.dumps(out['tableschema'])
})
out['resources'] = [ convert_data_resource_to_ckan_resource(res)
for res in out['resources']]
return out
def convert_data_resource_to_ckan_resource(resource):
out = dict(resource)
out['url'] = out['path']
del out['path']
if 'bytes' in out:
out['size'] = out['bytes']
# flatten as json strings all nested data
for k in out.keys():
value = out[k]
if (isinstance(value, list) or isinstance(value, dict)):
out[k] = json.dumps(value)
return out
import collections
def dict_merge(dct, merge_dct):
'''Recursive dict merge.
'''
for k, v in merge_dct.items():
if (k in dct and isinstance(dct[k], dict)
and isinstance(merge_dct[k], collections.Mapping)):
dict_merge(dct[k], merge_dct[k])
else:
dct[k] = merge_dct[k]
return dct
def test_convert_data_package_to_ckan_package():
dp = {
'name': 'test-ckan-sync',
'resources': [
{
'name': 'datafile1',
'path': 'https://storage.googleapis.com/datopian-nhs/csv/DPI_DETAIL_PRESCRIBING_201401.csv',
'bytes': 585,
"alternates": [ # subresources ...
{
"name": 'DPI_DETAIL_PRESCRIBING_201401.zip',
"format": "zip",
"bytes": 589888562
}
],
}
]
}
out = convert_data_package_to_ckan_package(dp)
exp = {
'name': 'test-ckan-sync',
'resources': [
{
'name': 'datafile1',
'url': 'https://storage.googleapis.com/datopian-nhs/csv/DPI_DETAIL_PRESCRIBING_201401.csv',
'bytes': 585,
'size': 585,
'alternates': json.dumps(dp['resources'][0]['alternates'])
}
]
}
assert out == exp
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment