Last active
January 10, 2019 09:56
-
-
Save knudmoeller/51762ebd2199f9e9f085f05fec3cc9c5 to your computer and use it in GitHub Desktop.
Ruby script to generate a JSON dump of all CKAN datasets, using the CKAN API via HTTP.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
require 'json' | |
require 'uri' | |
require 'net/https' | |
require 'optparse' | |
require 'logger' | |
# Send an HTTP request, interprete response as JSON | |
# and return as Ruby object. | |
# | |
# +uri+:: Where to send the request | |
def get_data(uri) | |
uri = URI(uri) | |
http = Net::HTTP.new(uri.host, uri.port) | |
http.use_ssl = (uri.scheme == "https") | |
json = "" | |
http.get(uri) do |chunk| | |
json += chunk | |
end | |
response = JSON.parse(json) | |
return response['result'] | |
end | |
options = {} | |
logger = Logger.new(STDOUT) | |
usage = "Usage: ruby export_ckan.rb [options]" | |
OptionParser.new do |opts| | |
opts.banner = usage | |
opts.separator "" | |
opts.separator "Options:" | |
opts.on("-s", "--source STRING", String, "Required: The source CKAN URI (e.g., https://datenregister.berlin.de).") do |source| | |
options[:source] = source | |
end | |
opts.on("-t", "--target STRING", String, "Required: The target JSON file.") do |target| | |
options[:target] = target | |
end | |
end.parse! | |
# make options required | |
if !options.include?(:source) || !options.include?(:target) | |
puts "You need to specify --source and --target options." | |
exit | |
end | |
dataset_dump = Array.new | |
datasets = get_data(File.join(options[:source], "api/3/action/package_list")) | |
total = datasets.count | |
index = 1 | |
datasets[0..5].each do |dataset| | |
full_path = File.join(options[:source], "api/3/action/package_show?id=#{dataset}") | |
logger.info "#{index.to_s.rjust(4, '0')} of #{total.to_s.rjust(4, '0')}: #{full_path}..." | |
data = get_data(full_path) | |
dataset_dump << data | |
index += 1 | |
end | |
output = JSON.pretty_generate(dataset_dump) | |
File.open(options[:target], 'wb') do |file| | |
file.write(output) | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment