Skip to content

Instantly share code, notes, and snippets.

@seanh
Forked from rufuspollock/data.json
Created November 19, 2012 12:46
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save seanh/4110454 to your computer and use it in GitHub Desktop.
Save seanh/4110454 to your computer and use it in GitHub Desktop.
A script that pulls some demo data (gold prices, Malawi aid projects, etc.) from datahub.io and pushes them to your CKAN instance.

Installation

virtualenv load_demo_data
. load_demo_data/bin/activate
mkdir load_demo_data/src
cd load_demo_data/src
git clone git://gist.github.com/4110454.git load_demo_data
pip install -r load_demo_data/pip-requirements.txt

Usage

See ./load.py -h and ./getdata.py -h`.

{
"datasets": {
"adur_district_spending": {
"author": "Lucy Chambers",
"author_email": "",
"extras": {
"spatial-text": "Adur, West Sussex, South East England, England, United Kingdom",
"spatial": "{ \"type\": \"Polygon\", \"coordinates\": [ [ [-0.3715, 50.8168],[-0.3715, 50.8747], [-0.2155, 50.8747], [-0.2155, 50.8168], [-0.3715, 50.8168] ] ] }"
},
"license": "License Not Specified",
"license_id": "notspecified",
"license_title": "License Not Specified",
"maintainer": "",
"maintainer_email": "",
"name": "adur_district_spending",
"notes": "From Spikes Cavell, Spotlight on Spend. \r\n\r\nFor Ardur, records from April 2009-March 2010 are currently available (2011-008-04) ",
"relationships": [],
"resources": [
{
"cache_last_updated": null,
"created": null,
"description": "Adur District Council April 2009",
"format": "CSV",
"hash": "",
"id": "281dffa6-ea9b-4446-be41-05dced06591f",
"last_modified": null,
"mimetype": "",
"mimetype_inner": "",
"name": "",
"resource_type": "file",
"size": null,
"url": "http://ckan.net/storage/f/file/3ffdcd42-5c63-4089-84dd-c23876259973"
},
{
"cache_last_updated": null,
"created": null,
"description": "Mapping Metadata for Adur",
"format": "JSON",
"hash": "",
"id": "6cce3936-b169-4d12-82ba-65fcb79734a0",
"last_modified": null,
"mimetype": "",
"mimetype_inner": "",
"name": "",
"openspending_hint": "model",
"resource_type": "file",
"size": null,
"url": "http://ckan.net/storage/f/file/c8ce520c-c2e6-463a-99a3-ad24b023ccb4"
},
{
"cache_last_updated": null,
"created": null,
"description": "Revised CSV for import",
"format": "CSV",
"hash": "",
"id": "04127ad5-77e5-4a08-9f40-12d3c383e460",
"last_modified": null,
"mimetype": "",
"mimetype_inner": "",
"name": "",
"openspending_hint": "data",
"resource_type": "file",
"size": null,
"url": "http://mk.ucant.org/info/data/adur.csv"
}
],
"state": "active",
"tags": [
"country-uk",
"date-2009",
"openspending",
"regional"
],
"title": "UK: Adur District Council Spending Data",
"url": "http://www.spotlightonspend.org.uk/Downloads/1038",
"version": ""
},
"afghanistan-election-data": {
"author": "",
"author_email": "",
"extras": {
"spatial-text": "Afghanistan",
"spatial": "{ \"type\": \"Polygon\", \"coordinates\": [ [ [74.898827, 29.394159],[74.898827, 38.453041], [60.50526, 38.453041], [60.50526, 29.394159], [74.898827, 29.394159] ] ] }"
},
"license": "License Not Specified",
"license_id": "notspecified",
"license_title": "License Not Specified",
"maintainer": "",
"maintainer_email": "",
"name": "afghanistan-election-data",
"notes": "### About\r\n\r\nFrom website:\r\n\r\n> Welcome to AfghanistanElectionData.org. This website, created by the National Democratic Institute (NDI) in partnership with Development Seed, a Washington, D.C.-based online communications consultancy, is designed to make data from the August 20, 2009, Afghanistan presidential election accessible and transparent. The reports below provide examples of the analysis that this website facilitates.\r\n\r\nWe have provided one sample csv file as there is no download for the whole dataset. There are many more files on the website.",
"relationships": [],
"resources": [
{
"cache_last_updated": null,
"created": null,
"description": "District Names",
"format": "csv",
"hash": "",
"id": "f6331f99-51f6-44d9-95b9-b20f3b74f360",
"last_modified": null,
"mimetype": "",
"mimetype_inner": "",
"name": "",
"resource_type": "",
"size": null,
"url": "http://afghanistanelectiondata.org/sites/default/files/district_centerpoints.csv"
}
],
"state": "active",
"tags": [
"country-afghanistan",
"election",
"politics",
"poll",
"transparency"
],
"title": "Afghanistan Election Districts",
"url": "http://afghanistanelectiondata.org/",
"version": ""
},
"afterfibre": {
"author": "Steve Song",
"author_email": "stephen.song@gmail.com",
"extras": {
"spatial-text": "Africa",
"spatial": "{ \"type\": \"Polygon\", \"coordinates\": [ [ [50.8, -34.2],[50.8, 36.7], [-19.9, 36.7], [-19.9, -34.2], [50.8, -34.2] ] ] }"
},
"license": null,
"license_id": "",
"license_title": "",
"maintainer": "",
"maintainer_email": "",
"name": "afterfibre",
"notes": "Geodata showing African terrestrial fibre optic cable projects. Data is available as CSV or JSON (+ GeoJSON) via the DataStore API.\r\n\r\n### Data\r\n\r\nPrimary data file is the CSV resource: <http://thedatahub.org/dataset/afterfibre/resource/f5d81da5-2e55-4302-8ed2-58401d2c139e>\r\n\r\nHowever, this data needed to be cleaned up and converted to geojson for storing in the DataHub DataStore and visualization. Details of this below.\r\n\r\n#### Cleaning Process\r\n\r\nUse data package manager (dpm) to clone the DataHub dataset.\r\n\r\n # say yes to downloading of data files\r\n dpm clone http://thedatahub.org/dataset/afterfibre .\r\n\r\nYou will now have an afterfibre directory containing the dataset.\r\n\r\n cd afterfibre\r\n\r\nGrab scripts from the code repository\r\n\r\n git clone https://github.com/rgrp/dp-afterfibre .\r\n\r\nNow run cleanup - note you will need to install geojson (pip install geojson)\r\n\r\n python convert.py\r\n\r\nNow you have json version of data in `data/AfTerFibre_21nov2011.json`. We will upload this to the DataHub DataStore for the original CSV.\r\n\r\nNote: you will need the DataStore client from https://gist.github.com/1950581\r\n\r\n ckan-datastore.py upload http://thedatahub.org/api/data/f5d81da5-2e55-4302-8ed2-58401d2c139e data/AfTerFibre_21nov2011.json \r\n\r\nLet's check the result, visit: <http://thedatahub.org/api/data/f5d81da5-2e55-4302-8ed2-58401d2c139e/_search?size=5&pretty=true>\r\n\r\nNow the data's ready for easy visualization in javascript! (Check ou the visualization resource).\r\n\r\n",
"relationships": [],
"resources": [
{
"cache_last_updated": null,
"created": null,
"description": "Data on terrestrial fibre optic cable projects in Africa",
"format": "text/csv",
"hash": "md5:6939ae135bb3f274e9d5d346bffeb309",
"id": "f5d81da5-2e55-4302-8ed2-58401d2c139e",
"last_modified": "2011-11-24T11:20:43",
"mimetype": "text/csv",
"mimetype_inner": "",
"name": "african-terrestial-fibre.geojson.csv",
"resource_type": "file.upload",
"size": 637379,
"url": "https://raw.github.com/rgrp/dp-afterfibre/master/data/african-terrestial-fibre.csv"
}
],
"state": "active",
"tags": [
"africa",
"bandwidth",
"broadband",
"cables",
"fibre",
"optic",
"terrestrial"
],
"title": "African Terrestrial Fibre Optic Cables",
"url": "http://manypossibilities.net/afterfibre/",
"version": ""
},
"gold-prices": {
"author": "Bundesbank",
"author_email": "",
"extras": {
"created": "2008-10-07",
"source": ""
},
"license": "Open Data Commons Public Domain Dedication and Licence (PDDL)",
"license_id": "odc-pddl",
"license_title": "Open Data Commons Public Domain Dedication and Licence (PDDL)",
"license_url": "http://www.opendefinition.org/licenses/odc-pddl",
"maintainer": "Rufus Pollock",
"maintainer_email": "",
"name": "gold-prices",
"notes": "Monthly gold prices (USD) in London from Bundesbank.\r\n\r\nGeneral: 1 ounce of fine gold = 31.1034768g. Method of calculation:\r\n\r\n* Since 1 April 1968, calculated from the daily morning fixing;\r\n* From January 1950 to 21 March 1954, calculated using the Bank of England's gold purchasing price (1 ounce of fine = pound 12.40) in connection with the average exchange rate for the pound in New York (up to the end of 1952; source: Federal Reserve Bulletin) and, from January 1953, midpoint exchange rates for the US dollar in London (source: Financial Times (FT)).\r\n* From 22 March 1954 to December 1959, calculated using the fixing price for gold bars of approx. 12 1/2 kg and 995/1000 fineness and over (so-called standard bars) according to data from Metallgesellschaft AG, Frankfurt am Main, in connection with the average midpoint exchange rates for the US dollar in London (source: FT).\r\n* From January 1960 to 14 March 1968, average fixing price for standard bars as specified in the Bank of England's Quarterly Bulletin.\r\n* On 15 March 1968, fixing price suspended. Gold market split into an official (reserved for central banks) and a free market as a result of the Washington Communique of 17 March 1968. Gold trading suspended from 18 to 29 March 1968.\r\n* Sources for daily prices: April 1968 - March 1974: FT; April 1974 - December 1980: Samuel Montagu & Co. Ltd.; January 1981 - December 2005: FT; January 2006 - present: Reuters.\r\n* Comment on 1968-03: Average from 1 to 14 March 1968.\r\n\r\nLicense: PDDL (Source indicates no restrictions on data).",
"relationships": [],
"resources": [
{
"cache_last_updated": null,
"created": null,
"description": "CSV file extracted and cleaned from source excel.",
"format": "csv",
"hash": "9d599bcf3b8db2b5c6aea528bc37d728c856b09c",
"id": "b9aae52b-b082-4159-b46f-7bb9c158d013",
"last_modified": "2012-05-04T12:40:59.181686",
"mimetype": "text/plain",
"mimetype_inner": "",
"name": "CSV ",
"resource_type": "file",
"size": "14502",
"url": "https://raw.github.com/datasets/gold-prices/master/data/data.csv"
}
],
"state": "active",
"tags": [
"economics",
"gold",
"price",
"time-series"
],
"title": "Gold Prices in London 1950-2008 (Monthly)",
"url": "http://www.bundesbank.de/statistik/statistik_zeitreihen.en.php?tr=www_s332_b01015_",
"version": ""
},
"italyregionalaccounts": {
"author": "Simona De Luca, Aline Pennisi",
"author_email": "",
"extras": {
"currency": "EUR"
},
"license": "License Not Specified",
"license_id": "notspecified",
"license_title": "License Not Specified",
"maintainer": "",
"maintainer_email": "",
"name": "italyregionalaccounts",
"notes": "The Regional Public Accounts (RPA) measure public financial flows at the regional level providing information on central and local government revenues and expenditures (either on current and capital account) by sector.\r\n\r\nData is available since 1996 for General Government or the wider Public Sector and allows for analysis of various sub-aggregates covering different macro-areas and administrative regions, sector classifications, economic categories, definitions of government expenditure and final expenditure recipients.\r\n\r\nData is produced by the Department for Development Policies, Ministry of Economic Development, ITALY and will be released in CSV/TXT format soon on the RPA website.\r\n\r\n\r\n\r\nTraduzione italiana:\r\n\r\nI Conti Pubblici Territoriali misurano i flussi finanziari pubblici a livello regionale fornendo informazioni sulle entrate e le spese (sia correnti che in conto capitale) delle amministrazioni centrali, regionali e locali per settore.\r\n\r\nI dati sono disponibili dal 1996 per il totale della pubblica amministrazione o il piu' ampio settore pubblico e consente analisi di per diverse macro-aree, regioni, settori, categorie economiche, livelli di governo e beneficiari ultimi della spesa.\r\n\r\nI dati sono prodotti dal Dipartimento per lo Sviluppo e la Coesione Economica del Ministero dello Sviluppo economico italiano.",
"relationships": [],
"resources": [
{
"cache_last_updated": null,
"created": null,
"description": "Budget, 2 levels, CSV split",
"format": "CSV",
"hash": "",
"id": "d81037dc-6fb8-46f9-a79a-2d447bc2b12b",
"last_modified": null,
"mimetype": "",
"mimetype_inner": "",
"name": "",
"openspending_hint": "data",
"resource_type": "file",
"size": null,
"url": "http://opendatalabs.org/italy/Italy-TOT-1996-2008-pubexp.csv"
},
{
"cache_last_updated": null,
"created": null,
"description": "OpenSpending Model",
"format": "JSON",
"hash": "",
"id": "1b2b09d2-8673-45f4-8c79-0fb0cd896a11",
"last_modified": null,
"mimetype": "",
"mimetype_inner": "",
"name": "",
"openspending_hint": "model",
"resource_type": "file",
"size": null,
"url": "https://bitbucket.org/pudo/dpkg-italy-ra/raw/tip/model.js"
}
],
"state": "active",
"tags": [],
"title": "Italian Regional Public Accounts",
"url": "http://www.dps.tesoro.it/cpt/banca_dati_home.asp",
"version": ""
},
"malawi-aid-projects": {
"author": "",
"author_email": "",
"extras": {
"spatial-text": "Malawi",
"spatial": "{ \"type\": \"Polygon\", \"coordinates\": [ [ [32.689701, -17.135811],[32.689701, -9.373335], [35.92416, -9.373335], [35.92416, -17.135811], [32.689701, -17.135811] ] ] }"
},
"license": "License Not Specified",
"license_id": "notspecified",
"license_title": "License Not Specified",
"maintainer": "",
"maintainer_email": "",
"name": "malawi-aid-projects",
"notes": "Geocoded data on aid projects from the Government of Malawi's Aid Management Platform. It includes sub-national geocodes for approximately 550 aid projects undertaken in Malawi since 2000, representing nearly $5.3 billion in total commitments from over 30 donors or roughly 80% of all aid reported to the Ministry of Finance during that time. The work is the result of a collaboration between AidData, the Malawi Ministry of Finance, and Climate Change and African Political Stability Program (CCAPS) at the University of Texas.\r\n\r\nCitation: Peratsakis, Christian, Joshua Powell, Michael Findley, and Catherine Weaver. 2012. Geocoded Activity-Level Data from the Government of Malawi's Aid Management Platform. Washington D.C. AidData and the Robert S. Strauss Center for International Security and Law.",
"relationships": [],
"resources": [
{
"cache_last_updated": null,
"created": null,
"description": "This dataset is based on the donor-reported aid information captured in the Malawi Aid Management Platform (AMP), hosted by the Malawi Ministry of Finance. Using project documents, gathered from in-country donor offices during three missions to Lilongwe, the AidData and CCAPS teams added standardized geocodes using the UCDP/AidData methodology. In total, projects from 30 donor agencies were geocoded for 548 projects, representing $5.3 billion in total commitments (approximately 80% of the total external assistance to Malawi reported to the government from 2000-2011). It represents the first effort to sub-nationally geocode all donors in a single partner country, and the first initiative of the sort envisioned by the Open Aid Partnership, an initiative spearheaded by the World Bank to increase the openness and effectiveness of development assistance at the subnational level. An interactive map displaying these data along with data on armed conflict, governance, and climate security vulnerability can be viewed at www.strausscenter.org/ccaps/mappingtool.",
"format": "text/csv",
"hash": "48043ad8461ec907e1932fa36a5a8a4cceb1df74",
"id": "b717c20e-2006-4ad4-82d2-59b57ebc1ab0",
"last_modified": "2012-04-21T16:31:14.727127",
"mimetype": "text/csv",
"mimetype_inner": "",
"name": "Malawi_release_17april2012.csv",
"resource_type": "file.upload",
"size": "1694283",
"url": "https://commondatastorage.googleapis.com/ckannet-storage/2012-04-21T162401/Malawi_release_17april2012.csv"
},
{
"cache_last_updated": null,
"created": null,
"description": "Variable and field definitions for the Malawi data.",
"format": "text/csv",
"hash": "bda05bcb9c20c491396a6c8d948c4f3688569aac",
"id": "f8e0c219-fd9a-4035-961d-8aa535b1ed54",
"last_modified": "2012-04-21T16:39:42.009473",
"mimetype": "text/csv",
"mimetype_inner": "",
"name": "Malawi_release_17april2012.metadata.csv",
"resource_type": "file.upload",
"size": "3345",
"url": "https://commondatastorage.googleapis.com/ckannet-storage/2012-04-21T163145/Malawi_release_17april2012.metadata.csv"
}
],
"state": "active",
"tags": [
"aid",
"country.mw",
"geocoded"
],
"title": "Malawi Aid Projects",
"url": "http://blog.aiddata.org/2012/04/where-are-donors-working-in-malawi-new.html",
"version": ""
},
"newcastle-city-council-payments-over-500": {
"author": "",
"author_email": "",
"extras": {
"spatial-text": "Newcastle upon Tyne, North East England, England, United Kingdom",
"spatial": "{ \"type\": \"Polygon\", \"coordinates\": [ [ [-1.5308, 54.9588],[-1.5308, 55.0796], [-1.7753, 55.0796], [-1.7753, 54.9588], [-1.5308, 54.9588] ] ] }"
},
"license": null,
"license_id": "",
"license_title": "",
"maintainer": "",
"maintainer_email": "",
"name": "newcastle-city-council-payments-over-500",
"notes": "Newcastle City Council spending data over \u00a3500. Data is published monthly and shows who was paid, how much was paid, and what this was for.\r\n\r\nThis will include:\r\n\r\n* all items we purchase\r\n* payments we make to contractors carrying out work on our behalf\r\n* other spend we incur in carrying out our business.\r\n\r\nWhat will be excluded:\r\n\r\n* payments made to staff\r\n* housing benefit payments\r\n* sensitive social services information which could put\r\n* vulnerable individuals at risk\r\n* personal information, such as the names of individuals receiving payments\r\n* confidential information, such as council tax or business rate refund.\r\n\r\nThe site offers monthly spending reports, only the last three are included here.",
"relationships": [],
"resources": [
{
"cache_last_updated": null,
"created": null,
"description": "December 2011",
"format": "text/csv",
"hash": "f4f94ec6f3297e608a562434e52a3993a3c3f7e4",
"id": "0c2bd47a-6ac5-412e-a337-1b45a952e07e",
"last_modified": "2012-03-13T22:04:31.993982",
"mimetype": "text/csv",
"mimetype_inner": "",
"name": "December 2011",
"resource_type": "file",
"size": "1081957",
"url": "http://www.newcastle.gov.uk/sites/drupalncc.newcastle.gov.uk/files/wwwfileroot/your-council/local_transparency/december_2011.csv"
},
{
"cache_last_updated": null,
"created": null,
"description": "November 2011",
"format": "text/csv",
"hash": "",
"id": "52a0c92e-99a3-427d-93a6-73ef5cdccc11",
"last_modified": null,
"mimetype": "",
"mimetype_inner": "",
"name": "November 2011",
"resource_type": "file",
"size": null,
"url": "http://www.newcastle.gov.uk/sites/drupalncc.newcastle.gov.uk/files/wwwfileroot/your-council/local_transparency/november_2011.csv"
},
{
"cache_last_updated": null,
"created": null,
"description": "October 2011",
"format": "text/csv",
"hash": "93762ff3919569fb9280de38ad62dff8c2a1473d",
"id": "1d1c9089-7037-48a3-a70d-fbecf146238f",
"last_modified": "2012-03-13T22:04:28.872581",
"mimetype": "text/csv",
"mimetype_inner": "",
"name": "October 2011",
"resource_type": "file",
"size": "850440",
"url": "http://www.newcastle.gov.uk/sites/drupalncc.newcastle.gov.uk/files/wwwfileroot/your-council/local_transparency/october_2011.csv"
},
{
"cache_last_updated": null,
"created": "2012-06-06T15:54:48.643419",
"description": "September 2011",
"format": "text/csv",
"hash": "1a7a576dfd490153f6097602bb64a2312079aa47",
"id": "ff9038ef-41fd-4b00-9b1e-e942fbdaaf25",
"last_modified": "2012-06-13T07:57:00.966643",
"mimetype": "text/csv",
"mimetype_inner": "",
"name": "September 2011",
"resource_type": "file",
"size": "764773",
"url": "http://www.newcastle.gov.uk/sites/drupalncc.newcastle.gov.uk/files/wwwfileroot/your-council/local_transparency/september_2011.csv"
},
{
"cache_last_updated": null,
"created": "2012-08-14T12:39:24.519707",
"description": "",
"format": "text/csv",
"hash": "224ba4b7482d3cdd7e6b2373679a9cfaf8eb8dac",
"id": "d51c9bd4-8256-4289-bdd7-962f8572efb0",
"last_modified": "2012-08-14T12:39:35.638235",
"mimetype": "text/csv",
"mimetype_inner": "",
"name": "January 2012",
"resource_type": "file",
"size": "1080880",
"url": "http://www.newcastle.gov.uk/sites/drupalncc.newcastle.gov.uk/files/wwwfileroot/your-council/local_transparency/january_2012.csv"
},
{
"cache_last_updated": null,
"created": "2012-08-14T12:47:25.494670",
"description": "",
"format": "text/csv",
"hash": "4cd5541587fe172ceaf59dc941e00b755ff1bdb5",
"id": "5b958b29-a399-41aa-86ff-1d198a124140",
"last_modified": "2012-08-14T12:47:35.250641",
"mimetype": "text/csv",
"mimetype_inner": "",
"name": "February 2012",
"resource_type": "file",
"size": "951982",
"url": "http://www.newcastle.gov.uk/sites/drupalncc.newcastle.gov.uk/files/wwwfileroot/your-council/local_transparency/february_2012.csv"
}
],
"state": "active",
"tags": [
"city-newcastle-upon-tyne",
"country.uk",
"spending"
],
"title": "Newcastle City Council: Payments over \u00a3500",
"url": "http://www.newcastle.gov.uk/your-council/local-transparency/payments-over-500",
"version": ""
},
"us-national-foreclosure-statistics-january-2012": {
"author": "",
"author_email": "",
"extras": {},
"license": null,
"license_id": "",
"license_title": "",
"maintainer": "",
"maintainer_email": "",
"name": "us-national-foreclosure-statistics-january-2012",
"notes": "County data: http://www.npr.org/templates/story/story.php?storyId=111494514\r\n\r\nState data: http://statehealthfacts.org/comparetable.jsp?ind=649&cat=1",
"relationships": [],
"resources": [
{
"cache_last_updated": null,
"created": null,
"description": "US National Foreclosure Statistics - By State - January 2012 ",
"format": "CSV",
"hash": "eed152b03fc646a5fce2ab2c57e4dd021d864a17",
"id": "57e9e5e5-c659-4f08-af32-90ebef881fe5",
"last_modified": "2012-03-10T16:58:54.270508",
"mimetype": "text/csv",
"mimetype_inner": "",
"name": "1-2012-Foreclosures-by-State",
"resource_type": "file.upload",
"size": "1013",
"url": "https://commondatastorage.googleapis.com/ckannet-storage/2012-03-10T165422/us.foreclosures.jan.2012.by.state.csv"
},
{
"cache_last_updated": null,
"created": null,
"description": "US National Foreclosure Statistics - By County - January 2012",
"format": "CSV",
"hash": "117599a151557a028fa2ace0229c7eac95c8259f",
"id": "7c959599-82d4-4ab7-99fe-8c7da3d601b2",
"last_modified": "2012-03-10T16:58:54.525724",
"mimetype": "text/csv",
"mimetype_inner": "",
"name": "version control - old 1-2012-Foreclosures-by-County",
"resource_type": "file.upload",
"size": "450684",
"url": "https://commondatastorage.googleapis.com/ckannet-storage/2012-03-10T165700/us.foreclosures.jan.2012.by.county.csv"
},
{
"cache_last_updated": null,
"created": null,
"description": "Final version of US National Foreclosure Statistics - By County - January 2012",
"format": "CSV",
"hash": "d5b4d379006410d06ec818e7e6904ba3f9205246",
"id": "a716759d-7950-415b-954c-c5f84f9bc65e",
"last_modified": "2012-03-10T17:04:18.716252",
"mimetype": "text/csv",
"mimetype_inner": "",
"name": "1-2012-Foreclosures-by-County",
"resource_type": "file.upload",
"size": "164202",
"url": "https://commondatastorage.googleapis.com/ckannet-storage/2012-03-10T170330/us.foreclosures.jan.2012.by.county.csv"
}
],
"state": "active",
"tags": [
"USA",
"banks",
"credit",
"foreclosure",
"foreclosures",
"homes",
"housing",
"mortgage",
"mortgages",
"occupy"
],
"title": "US National Foreclosure Statistics January 2012",
"url": "",
"version": ""
}
},
"groups": {
"explorer-examples": {
"description": "This group contains various real datasets that show CKAN's data previewer in action. The previewer shows a configurable grid view of tabular data, plots columns of data on a graph, and shows geo-coded data on an interactive map. It can also preview image files and web pages.",
"image_url": "http://farm8.staticflickr.com/7129/7041988029_411d985015_c.jpg",
"name": "data-explorer",
"packages": [
"adur_district_spending",
"afghanistan-election-data",
"gold-prices",
"italyregionalaccounts",
"newcastle-city-council-payments-over-500",
"us-national-foreclosure-statistics-january-2012"
],
"title": "Data Explorer Examples"
},
"geo-examples": {
"description": "CKAN can plot both latitude and longitude as well as GeoJSON on a map. For more information, see http://ckan.org/features/geospatial/",
"image_url": "http://farm6.staticflickr.com/5117/6944276022_06ea83e528.jpg",
"name": "geo-examples",
"packages": [
"afghanistan-election-data",
"afterfibre",
"malawi-aid-projects"
],
"title": "Geospatial Data Explorer examples",
"type": "group"
}
},
"schemas": {
"https://raw.github.com/datasets/gold-prices/master/data/data.csv": [
{
"id": "date",
"type": "timestamp"
},
{
"id": "price",
"type": "float8"
}
]
}
}
#!/usr/bin/env python
import urllib
import sys
import json
base = 'http://datahub.io/api'
demodatafile = 'data.json'
current = json.load(open(demodatafile))
datasets = [
'adur_district_spending',
"afghanistan-election-data",
'afterfibre',
"gold-prices",
"italyregionalaccounts",
'malawi-aid-projects',
"newcastle-city-council-payments-over-500",
"us-national-foreclosure-statistics-january-2012"
]
def sync():
for name in datasets:
print 'Retrieving: %s' % name
out = get_dataset(name)
current['datasets'][name] = out
outfo = open(demodatafile, 'w')
json.dump(current, outfo, indent=2, sort_keys=True)
def get_dataset(name):
url = base + '/rest/dataset/' + name
fo = urllib.urlopen(url)
parsed = json.load(fo)
for resource in parsed['resources']:
for key in ['webstore_url', 'webstore_last_updated',
'resource_group_id', 'package_id', 'position',
'tracking_summary', 'cache_url']:
del resource[key]
for key in ['isopen', 'groups', 'ckan_url', 'download_url',
'notes_rendered', 'ratings_average', 'ratings_count', 'revision_id',
'tracking_summary', 'type', 'metadata_modified', 'metadata_created',
'id'
]:
del parsed[key]
return parsed
if __name__ == '__main__':
sync()
#!/usr/bin/env python
'''
TODO
* Related items
* US dataset
'''
import optparse
import csv
import urllib
import json
import ckanclient
demodata = json.load(open('data.json'))
def create_demo_data(client):
for name, dataset in demodata['datasets'].items():
create_dataset(client, dataset)
for name, group in demodata['groups'].items():
print 'Creating group: %s' % name
try:
client.group_register_post(group)
except ckanclient.CkanApiError:
client.group_entity_put(group)
def create_dataset(client, dataset):
print 'Uploading dataset: %s' % dataset['name']
try:
client.package_register_post(dataset)
except ckanclient.CkanApiError:
client.package_entity_put(dataset)
for resource in dataset['resources']:
# lookupname = '%s::%s' % (dataset['name'], resource['description'])
lookupname = resource['url']
if lookupname in demodata['schemas']:
print 'Updating datastore for %s' % lookupname
fields = demodata['schemas'][lookupname]
fmt = resource['format']
if fmt.lower() == 'csv':
data = [row for row in
csv.DictReader(urllib.urlopen(resource['url']))
]
elif fmt == 'json':
data = urllib.urlopen(resource['url']).read()
else:
print 'Cannot upload data from resource with format: %s' % resource['format']
continue
try:
client.action('datastore_create',
resource_id=resource['id'],
fields=fields,
records=data
)
except:
print client.last_message
def main():
parser = optparse.OptionParser()
parser.add_option("-b", "--base", default="http://localhost:5000/api",
help="Base URL for CKAN API to post to [default: '%default']")
parser.add_option("-a", "--apikey", default="tester",
help="API key to post with [default: '%default']")
options, args = parser.parse_args()
client = ckanclient.CkanClient(options.base, options.apikey)
create_demo_data(client)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment