HansNelsen/histogram_from_drug_event_example.py

## histogram_from_drug_event_example.py
#!/usr/bin/env python

''' Simple example of reading all of the zip files from the openFDA download
    and doing something with them. In this case, we are building an index
    of all the medicinalproduct and drugcharacterization values and a count
    of how often each occurred.
'''

from collections import defaultdict
import czipfile as zipfile
import glob
import simplejson as json
from os.path import basename

# Change to the location of where you downloaded the zip files.
DATA_FILES = './data/export/2016-02-05/drug/event/*/*.zip'


def build_histogram():
  hist = defaultdict(lambda: defaultdict(int))
  for filename in glob.glob(DATA_FILES):
    json_file = basename(filename).replace('.zip', '')
    print 'Processing file %s from %s' % (json_file, filename)
    datafile = zipfile.ZipFile(filename, 'r')
    json_data = json.load(datafile.open(json_file))

    for row in json_data['results']:
      for drug in row.get('patient', {}).get('drug', []):
        characterization = drug.get('drugcharacterization', 'unknown')
        product = drug.get('medicinalproduct', 'unknown')
        hist[characterization][product] += 1

  return hist


histogram = build_histogram()

with open('histogram.json', 'w') as json_out:
  json.dump(histogram, json_out, indent=2)
	#!/usr/bin/env python

	''' Simple example of reading all of the zip files from the openFDA download
	and doing something with them. In this case, we are building an index
	of all the medicinalproduct and drugcharacterization values and a count
	of how often each occurred.
	'''

	from collections import defaultdict
	import czipfile as zipfile
	import glob
	import simplejson as json
	from os.path import basename

	# Change to the location of where you downloaded the zip files.
	DATA_FILES = './data/export/2016-02-05/drug/event//.zip'


	def build_histogram():
	hist = defaultdict(lambda: defaultdict(int))
	for filename in glob.glob(DATA_FILES):
	json_file = basename(filename).replace('.zip', '')
	print 'Processing file %s from %s' % (json_file, filename)
	datafile = zipfile.ZipFile(filename, 'r')
	json_data = json.load(datafile.open(json_file))

	for row in json_data['results']:
	for drug in row.get('patient', {}).get('drug', []):
	characterization = drug.get('drugcharacterization', 'unknown')
	product = drug.get('medicinalproduct', 'unknown')
	hist[characterization][product] += 1

	return hist


	histogram = build_histogram()

	with open('histogram.json', 'w') as json_out:
	json.dump(histogram, json_out, indent=2)