Skip to content

Instantly share code, notes, and snippets.

Created November 10, 2013 17:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cclauss/7401534 to your computer and use it in GitHub Desktop.
Save cclauss/7401534 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
Socrata Open Data datasets are cached locally and printed out.
Datasets published by City of Austin @
import collections, contextlib, datetime, json, os.path, time
import urllib2
rawData = False # set to False for namedTuples to be printed
fmtDict = { 'cityName' : 'austintexas',
'dataFormat': 'json',
'dataSet' : None }
dataSets = { 'b6cd-bhbk': '{} {} {} {}',
'fksj-fw68': '{:>7} {} {:<24} {}',
'hh3n-3s7c': '{:>7} {} {} {}',
'iuw2-kwij': '{} {} {:>9}',
'uvma-gv9c': '{} {} {} {}' }
dataSets = { 'b6cd-bhbk': '{sample_site_no} {sample_date} {depth_in_meters:<4} {result:<5} {parameter}' }
fileNameFmt = '{dataSet}.{dataFormat}.py'
urlFmt = 'https://data.{cityName}.gov/api/views/{dataSet}/rows.{dataFormat}'
headerFmt = '{attribution} ({category}) - {id}: {name}'
def hoursAgo(inHoursAgo = 24):
deltaT = datetime.timedelta(hours=inHoursAgo)
return time.time() - deltaT.total_seconds()
def fileMustBeRefreshed(inFileName):
if (os.path.isfile(inFileName)
and os.path.getmtime(inFileName) > hoursAgo(24)):
return False # file exists and modified in past 24hrs
return True
def getWebPageSource(inURL):
with contextlib.closing(urllib2.urlopen(inURL)) as inFile:
def getDataDict(inFileName):
with open(inFileName) as inFile:
return json.loads(
def dataRowNamedTuple(inDataDict):
dataColumns = []
for columnsDict in inDataDict['meta']['view']['columns']:
if 'cachedContents' in columnsDict:
return collections.namedtuple('dataRow', dataColumns)
for dataSet in sorted(dataSets):
print('=' * 80)
fmtDict['dataSet'] = dataSet
fileName = fileNameFmt.format(**fmtDict)
if fileMustBeRefreshed(fileName):
theURL = urlFmt.format(**fmtDict)
print('Writing {} --> {}'.format(theURL, fileName))
with open(fileName, 'w') as outFile:
dataDict = getDataDict(fileName)
dataRow = dataRowNamedTuple(dataDict)
dictView = dataDict['meta']['view']
print('> {description}'.format(**dictView))
recordFmt = dataSets[dataSet]
for theItem in dataDict['data']:
#theDict = dataRow(*theItem[8:])._asdict()
# if rawData:
# print(recordFmt.format(*theItem[8:]))
# else:
# print(dataRow(*theItem[8:]))
#import pprint; pprint.pprint(dataDict['data'])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment