Created
April 30, 2020 19:05
-
-
Save rufuspollock/bd8ae3575950d180cce33da59c021299 to your computer and use it in GitHub Desktop.
Convert Data Package to CKAN Package
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# python 3+ | |
def convert_data_package_to_ckan_package(data_package): | |
''' | |
Documentation of CKAN metadata structure ... | |
https://docs.ckan.org/en/2.8/api/index.html#ckan.logic.action.create.package_create | |
https://docs.ckan.org/en/2.8/api/index.html#ckan.logic.action.create.resource_create | |
''' | |
out = dict(data_package) | |
out['extras'] = [] | |
# special case atm | |
# future look through all fields not in ckan special list | |
if 'tableschema' in out: | |
out['extras'].append({ | |
'key': 'tableschema', | |
'value': json.dumps(out['tableschema']) | |
}) | |
out['resources'] = [ convert_data_resource_to_ckan_resource(res) | |
for res in out['resources']] | |
return out | |
def convert_data_resource_to_ckan_resource(resource): | |
out = dict(resource) | |
out['url'] = out['path'] | |
del out['path'] | |
if 'bytes' in out: | |
out['size'] = out['bytes'] | |
# flatten as json strings all nested data | |
for k in out.keys(): | |
value = out[k] | |
if (isinstance(value, list) or isinstance(value, dict)): | |
out[k] = json.dumps(value) | |
return out | |
import collections | |
def dict_merge(dct, merge_dct): | |
'''Recursive dict merge. | |
''' | |
for k, v in merge_dct.items(): | |
if (k in dct and isinstance(dct[k], dict) | |
and isinstance(merge_dct[k], collections.Mapping)): | |
dict_merge(dct[k], merge_dct[k]) | |
else: | |
dct[k] = merge_dct[k] | |
return dct |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def test_convert_data_package_to_ckan_package(): | |
dp = { | |
'name': 'test-ckan-sync', | |
'resources': [ | |
{ | |
'name': 'datafile1', | |
'path': 'https://storage.googleapis.com/datopian-nhs/csv/DPI_DETAIL_PRESCRIBING_201401.csv', | |
'bytes': 585, | |
"alternates": [ # subresources ... | |
{ | |
"name": 'DPI_DETAIL_PRESCRIBING_201401.zip', | |
"format": "zip", | |
"bytes": 589888562 | |
} | |
], | |
} | |
] | |
} | |
out = convert_data_package_to_ckan_package(dp) | |
exp = { | |
'name': 'test-ckan-sync', | |
'resources': [ | |
{ | |
'name': 'datafile1', | |
'url': 'https://storage.googleapis.com/datopian-nhs/csv/DPI_DETAIL_PRESCRIBING_201401.csv', | |
'bytes': 585, | |
'size': 585, | |
'alternates': json.dumps(dp['resources'][0]['alternates']) | |
} | |
] | |
} | |
assert out == exp |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment