Skip to content

Instantly share code, notes, and snippets.

@happyrainb
Created September 25, 2016 20:27
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save happyrainb/d7e95fd8a730a9033d5ca658925b6c56 to your computer and use it in GitHub Desktop.
This script will update ckan records using a .csv as the input data. Made in 05/2016, tested in testing site, never used in production. It uses the provided connection script to make api call.
import ckanapi
import csv
from copy import deepcopy
def update_dataset_api_call(connection, dataset_data):
'''
This function uses the provided connection to make an api call that updates the dataset title
:param connection:
:return: None
'''
connection.call_action('package_update', dataset_data)
def get_dataset_api_call(connection, dataset_data):
dataset = {
'id': dataset_data.get('id')
}
return connection.call_action('package_show', dataset)
def get_package_list(connection):
return connection.call_action('package_list')
def rename_extra_field_for_all_datasets(connection, field_name, new_field_name):
packages = get_package_list(connection)
total = len(packages)
count = 0
modified_count = 0
for dataset_name in packages:
current_data = get_dataset_api_call(connection, {'id': dataset_name})
extras = current_data.get('extras', [])
was_modified = False
for extra in extras:
if extra.get('key').lower().strip() == field_name.lower().strip():
extra['key'] = new_field_name
was_modified = True
break
if was_modified:
update_dataset_api_call(connection, current_data)
print('Renamed "'+field_name+'" in package with id: ' + str(current_data['id']) + ' to "'+new_field_name+'" sucessfully')
modified_count += 1
count += 1
print("Completed: {0} of {1}".format(count, total))
print("Renamed a total of {0} from {1} datasets.".format(modified_count, total))
def do_csv_update(connection, csv_filename, nested_columns):
with open(csv_filename, 'rb') as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
# comes from csv file which are the new modifications
update_data = deepcopy(row)
# comes from api call to get current data
current_data = get_dataset_api_call(connection, update_data)
for column in update_data:
if column not in current_data:
# update extras
extra_was_not_found = True
for extra in current_data['extras']:
if extra.get('key') == column:
extra['value'] = update_data[column]
extra_was_not_found = False
if extra_was_not_found:
current_data['extras'].append(
{
'key': column,
'value': update_data[column]
}
)
else:
current_data[column] = update_data[column]
try:
update_dataset_api_call(connection, current_data)
print('Updated package with id: ' + str(current_data['id']) + ' sucessfully')
except ckanapi.errors.NotFound as err:
print('Package with id: ' + str(current_data['id']) + ' not found for update.')
# this main is now useless btu i will leave it here
def main_manual_update():
mysite_connection = ckanapi.RemoteCKAN('http://xxx',
apikey='xxx',
user_agent='ckanapiexample/1.0 (+http://example.com/my/website)')
# do you see that this is still the same as what i had before?
# i made the function more generlized, now it will work with any dataset id and data i give it
# for example i want to this:
# manual data
data = {
"id": "7285f5c2-412a-4264-95fe-39c658d8be3z2",
"title": 'Updated Title UPDATED AGAIN',
'author': "Jhon Doe",
'extras': [
{'key': 'Restricitons', 'value': 'Does not matter.'},
{'key': 'Creator', 'value': 'This is joe'}
]
}
# just calling it here with the connection i got
update_dataset_api_call(mysite_connection, data)
print("done manual.")
# main that uses csv data to update
def update_from_csv(connection):
filename = 'data/update_datasets.csv'
nested_columns = [
'restrictions',
'another_column'
]
do_csv_update(connection, filename, nested_columns)
print("done processing: " + filename)
def main():
mysite_connection = ckanapi.RemoteCKAN('http://xxx',
apikey='xxxx')
# execute updates form data/update_datasets.csv
#update_from_csv(mysite_connection)
# rename an extra field
rename_extra_field_for_all_datasets(mysite_connection, 'frequency', 'Update Frequency')
if __name__ == "__main__":
# main_manual_update()
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment