Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Example of looping over openFDA drug event zip files.
#!/usr/bin/env python
''' Simple example of reading all of the zip files from the openFDA download
and doing something with them. In this case, we are building an index
of all the medicinalproduct and drugcharacterization values and a count
of how often each occurred.
'''
from collections import defaultdict
import czipfile as zipfile
import glob
import simplejson as json
from os.path import basename
# Change to the location of where you downloaded the zip files.
DATA_FILES = './data/export/2016-02-05/drug/event/*/*.zip'
def build_histogram():
hist = defaultdict(lambda: defaultdict(int))
for filename in glob.glob(DATA_FILES):
json_file = basename(filename).replace('.zip', '')
print 'Processing file %s from %s' % (json_file, filename)
datafile = zipfile.ZipFile(filename, 'r')
json_data = json.load(datafile.open(json_file))
for row in json_data['results']:
for drug in row.get('patient', {}).get('drug', []):
characterization = drug.get('drugcharacterization', 'unknown')
product = drug.get('medicinalproduct', 'unknown')
hist[characterization][product] += 1
return hist
histogram = build_histogram()
with open('histogram.json', 'w') as json_out:
json.dump(histogram, json_out, indent=2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.