Skip to content

Instantly share code, notes, and snippets.

@pebbie
Created August 13, 2014 21:00
Show Gist options
  • Save pebbie/439eca9f0f05c1845aee to your computer and use it in GitHub Desktop.
Save pebbie/439eca9f0f05c1845aee to your computer and use it in GitHub Desktop.
dump all data from ckan website
"""
call : dump_ckan.py <ckan_website>
ckan_website : e.g. http://data.ukp.go.id
author : Peb Ruswono Aryan
"""
from __future__ import print_function
import json
import os
import sys
from urlparse import urlparse
if __name__=="__main__":
if len(sys.argv)>1:
url = sys.argv[1]
domain = urlparse(url)[1]
if not os.path.exists(domain):
os.mkdir(domain)
cname = domain+".json"
if not os.path.exists(cname):
os.system("curl -L %s/api/3/action/package_list > %s.json" % (url, domain))
packages = json.load(file(cname))
for pname in packages["result"]:
print(pname)
fname = os.path.join(domain,pname+".json")
if not os.path.exists(fname):
os.system("curl -L %s/api/3/action/package_show?id=%s > %s/%s.json" % (url, pname, domain, pname))
dataset = json.load(file(fname))
for resources in dataset["result"]["resources"]:
res_url = resources["url"]
res_ext = os.path.splitext(res_url)[1]
rname = os.path.join(domain, pname+"_"+resources["id"]+res_ext)
if not os.path.exists(rname):
os.system("curl -L %s > %s" % (res_url, rname))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment