Last active
June 3, 2021 14:07
-
-
Save lukemckinstry/0ade47db4255a90126aac30fd4da533d to your computer and use it in GitHub Desktop.
gathers a hierarchical tree of entities in a list of open civic data identifiers
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
import json | |
import pprint | |
pp = pprint.PrettyPrinter(indent=4) | |
# setup: | |
# download the identifiers file for target country directly from the | |
# open civic identifiers public data repo: | |
# https://github.com/opencivicdata/ocd-division-ids/tree/master/identifiers | |
COUNTRY_SOURCE_FILE = 'country-us.csv' | |
def get_nested_key_from_strings(data, keys): | |
if not keys: | |
return data | |
if keys[0] not in data: | |
return False | |
return get_nested_key_from_strings(data[keys[0]], keys[1:]) | |
def set_nested_key_from_strings(data, keys, val): | |
if len(keys) == 1: | |
data[keys[0]] = val | |
return | |
if keys[0] in data: | |
return set_nested_key_from_strings(data[keys[0]], keys[1:], val) | |
if keys[0] not in data: | |
data[keys[0]] = {} | |
set_nested_key_from_strings(data,keys,val) | |
def open_file(): | |
with open(COUNTRY_SOURCE_FILE,'r') as f: | |
d = list(csv.DictReader(f)) | |
d = list(set([i['id'] for i in d])) | |
d = [i.split('/') for i in d] | |
return d | |
def process(d): | |
c = {} | |
for i in d: | |
if i[0] != 'ocd-division': | |
raise Exception('unexpected format') | |
path = ['ocd-division'] | |
count = 1 | |
while True: | |
try: | |
lev = i[count].split(':')[0] | |
except: | |
break | |
path += [lev] | |
if not get_nested_key_from_strings(c,path): | |
set_nested_key_from_strings(c,path,{}) | |
count += 1 | |
return c | |
def view_collection(c): | |
pp.pprint(c) | |
def write_output(c): | |
with open('output.json','w') as f: | |
json.dump(c,f) | |
def main(): | |
d = open_file() | |
c = process(d) | |
view_collection(c) | |
write_output(c) | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment