Skip to content

Instantly share code, notes, and snippets.

@lukemckinstry
Last active June 3, 2021 14:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lukemckinstry/0ade47db4255a90126aac30fd4da533d to your computer and use it in GitHub Desktop.
Save lukemckinstry/0ade47db4255a90126aac30fd4da533d to your computer and use it in GitHub Desktop.
gathers a hierarchical tree of entities in a list of open civic data identifiers
import csv
import json
import pprint
pp = pprint.PrettyPrinter(indent=4)
# setup:
# download the identifiers file for target country directly from the
# open civic identifiers public data repo:
# https://github.com/opencivicdata/ocd-division-ids/tree/master/identifiers
COUNTRY_SOURCE_FILE = 'country-us.csv'
def get_nested_key_from_strings(data, keys):
if not keys:
return data
if keys[0] not in data:
return False
return get_nested_key_from_strings(data[keys[0]], keys[1:])
def set_nested_key_from_strings(data, keys, val):
if len(keys) == 1:
data[keys[0]] = val
return
if keys[0] in data:
return set_nested_key_from_strings(data[keys[0]], keys[1:], val)
if keys[0] not in data:
data[keys[0]] = {}
set_nested_key_from_strings(data,keys,val)
def open_file():
with open(COUNTRY_SOURCE_FILE,'r') as f:
d = list(csv.DictReader(f))
d = list(set([i['id'] for i in d]))
d = [i.split('/') for i in d]
return d
def process(d):
c = {}
for i in d:
if i[0] != 'ocd-division':
raise Exception('unexpected format')
path = ['ocd-division']
count = 1
while True:
try:
lev = i[count].split(':')[0]
except:
break
path += [lev]
if not get_nested_key_from_strings(c,path):
set_nested_key_from_strings(c,path,{})
count += 1
return c
def view_collection(c):
pp.pprint(c)
def write_output(c):
with open('output.json','w') as f:
json.dump(c,f)
def main():
d = open_file()
c = process(d)
view_collection(c)
write_output(c)
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment