Skip to content

Instantly share code, notes, and snippets.

@rsinger
Created March 9, 2010 14:10
Show Gist options
  • Save rsinger/326605 to your computer and use it in GitHub Desktop.
Save rsinger/326605 to your computer and use it in GitHub Desktop.
import simplejson as json
def inspectJSON(fh):
fields = {"/type/edition":[]}
for line in fh:
resourceId, resourceType, jsonData = line.split("\t")
resource = json.loads(jsonData)
for k in resource:
if not k in fields[resourceType]:
fields[resourceType].append(k)
if (type(resource[k]) == list and len(resource[k]) > 0 and (type(resource[k][0]) == dict)) or type(resource[k])== dict:
if not fields.has_key(k):
fields[k] = []
if type(resource[k]) == list:
for val in resource[k]:
for v_key in val:
if not v_key in fields[k]:
fields[k].append(v_key)
else:
for v_key in resource[k].keys():
if not v_key in fields[k]:
fields[k].append(v_key)
return fields
if __name__ == "__main__":
file = open('/Volumes/External 7/shared/open-library/edition-2009-09-11.txt')
print inspectJSON(file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment