thisismattmiller/explore.py

## explore.py
import json

with open('subjects.json') as infile:

	data = json.load(infile)


geo_lookup = {}

for record in data:

	if len(record['geographic']) >0:

		for geo in record['geographic']:


			if geo not in geo_lookup:
				geo_lookup[geo] = {}

			for topical in record['topical']:
				if topical not in geo_lookup[geo]:
					geo_lookup[geo][topical] = 0

				geo_lookup[geo][topical]+=1


with open('results.json','w') as outfile:
	json.dump(geo_lookup,outfile,indent=2)


## parse.py
from pymarc import parse_xml_to_array
import json

records = parse_xml_to_array('Moving-Image-Archive-dataset-MARC.xml')


subject_head_types = {}

all_data = []

for record in records:

	# print(record['245']['a'])
	data = {
		'title': None,
		'topical' : [],
		'geographic' : [],
		'form': [],
		'url': None
	}

	data['title'] = record['245']['a']

	data['url'] = record['856']['u']

	for f in record.get_fields('653'):

		# if f.indicator2 not in subject_head_types:
		# 	subject_head_types[f.indicator2] = 0
		# subject_head_types[f.indicator2]+=1

		if f.indicator2 == '0':
			data['topical'].append(f['a'])
		elif f.indicator2 == '5':
			data['geographic'].append(f['a'])
		elif f.indicator2 == '6':
			data['form'].append(f['a'])


	all_data.append(data)
	print(data)
	print('----')


with open('subjects.json','w') as outfile:
	json.dump(all_data, outfile, indent=2)
# print(subject_head_types)
	import json

	with open('subjects.json') as infile:

	data = json.load(infile)


	geo_lookup = {}

	for record in data:

	if len(record['geographic']) >0:

	for geo in record['geographic']:



	if geo not in geo_lookup:
	geo_lookup[geo] = {}

	for topical in record['topical']:
	if topical not in geo_lookup[geo]:
	geo_lookup[geo][topical] = 0

	geo_lookup[geo][topical]+=1


	with open('results.json','w') as outfile:
	json.dump(geo_lookup,outfile,indent=2)
	from pymarc import parse_xml_to_array
	import json

	records = parse_xml_to_array('Moving-Image-Archive-dataset-MARC.xml')


	subject_head_types = {}

	all_data = []

	for record in records:

	# print(record['245']['a'])
	data = {
	'title': None,
	'topical' : [],
	'geographic' : [],
	'form': [],
	'url': None
	}

	data['title'] = record['245']['a']

	data['url'] = record['856']['u']

	for f in record.get_fields('653'):

	# if f.indicator2 not in subject_head_types:
	# subject_head_types[f.indicator2] = 0
	# subject_head_types[f.indicator2]+=1

	if f.indicator2 == '0':
	data['topical'].append(f['a'])
	elif f.indicator2 == '5':
	data['geographic'].append(f['a'])
	elif f.indicator2 == '6':
	data['form'].append(f['a'])


	all_data.append(data)
	print(data)
	print('----')


	with open('subjects.json','w') as outfile:
	json.dump(all_data, outfile, indent=2)
	# print(subject_head_types)