Skip to content

Instantly share code, notes, and snippets.

@nightsh
Created July 23, 2020 14:00
Show Gist options
  • Save nightsh/47cc0d61f1c8f87e97cd89c305342cff to your computer and use it in GitHub Desktop.
Save nightsh/47cc0d61f1c8f87e97cd89c305342cff to your computer and use it in GitHub Desktop.
hide_publisher_datasets.py
from __future__ import print_function
import os
import sys
from ckanapi import RemoteCKAN
url = os.getenv('ED_CKAN_URL', None)
apiKey = os.getenv('ED_CKAN_KEY', None)
publisher_id = os.getenv('ED_CKAN_ORG_ID', None)
def package_search(data_dict):
return remote.call_action(action='package_search', data_dict=data_dict)
def get_publisher_packages(publisher_id):
rows = 1000
data_dict = {
'q' : 'organization:' + str(publisher_id),
'rows' : rows,
'start' : 0,
'type' : 'dataset',
'include_private': True
}
result = package_search(data_dict=data_dict)
if not result:
return []
packages = []
packages.extend(result.get('results',[]))
count = result.get('count')
number_of_pages = (count // rows if count % rows == 0 else count // rows + 1)
if number_of_pages == 1:
return packages
else:
for page_number in range(1, number_of_pages):
data_dict = {
'q' : 'organization:' + str(publisher_id),
'rows': rows,
'start': page_number * rows,
'type' : 'dataset',
'include_private': True
}
result = package_search(data_dict=data_dict)
packages.extend(result.get('results',[]))
return packages
def get_package(id):
try:
package = remote.action.package_show(id=id)
except Exception as e:
#print(str(e))
#print('Package id: ', id)
return None
return package
def update_package(package):
return remote.call_action(action='package_update', data_dict=package)
def hide_one_dataset(dataset_id):
package = get_package(dataset_id)
if package is None:
return
print('Package to update: ', package.get('id'))
print('name: ', package.get('name'))
package['private'] = True
result = update_package(package)
print('Updated package: ', result)
def hide_publisher_datasets(publisher_id):
packages = get_publisher_packages(publisher_id)
total_packages = len(packages)
current = 1
errors = []
print('Number of packages to update: ', total_packages)
for package in packages:
package = get_package(package.get('id'))
if package is None:
continue
print('[{}/{}] Updating package {}... '.format(current,
total_packages,
package.get('name')), end='')
package['private'] = True
try:
result = update_package(package)
print('done!')
except:
errors.append(package)
print('failed!')
current = current + 1
if len(errors):
print('=================================')
print('Failed updating {} package{}:'.format(len(errors), ('s' if len(errors) > 1 else '')))
for e in errors:
print(e['name'])
if __name__ == '__main__':
#hide_one_dataset('')
errors = []
if not url:
errors.append('ED_CKAN_URL environment variable is needed.')
if not apiKey:
errors.append('ED_CKAN_KEY environment variable is needed.')
if not publisher_id:
errors.append('ED_CKAN_ORG_ID environment variable is needed.')
if len(errors):
for e in errors:
print(e)
sys.exit(1)
remote = RemoteCKAN(url, apiKey)
print('CKAN URL: {}'.format(url))
print('Publisher: {}'.format(publisher_id))
hide_publisher_datasets(publisher_id)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment