Skip to content

Instantly share code, notes, and snippets.

@HansNelsen
Created June 27, 2016 18:46
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save HansNelsen/aeec93279dcd1792855d39fc37bead2e to your computer and use it in GitHub Desktop.
Save HansNelsen/aeec93279dcd1792855d39fc37bead2e to your computer and use it in GitHub Desktop.
Example of looping over openFDA drug event zip files.
#!/usr/bin/env python
''' Simple example of reading all of the zip files from the openFDA download
and doing something with them. In this case, we are building an index
of all the medicinalproduct and drugcharacterization values and a count
of how often each occurred.
'''
from collections import defaultdict
import czipfile as zipfile
import glob
import simplejson as json
from os.path import basename
# Change to the location of where you downloaded the zip files.
DATA_FILES = './data/export/2016-02-05/drug/event/*/*.zip'
def build_histogram():
hist = defaultdict(lambda: defaultdict(int))
for filename in glob.glob(DATA_FILES):
json_file = basename(filename).replace('.zip', '')
print 'Processing file %s from %s' % (json_file, filename)
datafile = zipfile.ZipFile(filename, 'r')
json_data = json.load(datafile.open(json_file))
for row in json_data['results']:
for drug in row.get('patient', {}).get('drug', []):
characterization = drug.get('drugcharacterization', 'unknown')
product = drug.get('medicinalproduct', 'unknown')
hist[characterization][product] += 1
return hist
histogram = build_histogram()
with open('histogram.json', 'w') as json_out:
json.dump(histogram, json_out, indent=2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment