Skip to content

Instantly share code, notes, and snippets.

@mikkelee
Last active August 25, 2017 13:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mikkelee/f5a5f1586a7d503a6037e95c5507716c to your computer and use it in GitHub Desktop.
Save mikkelee/f5a5f1586a7d503a6037e95c5507716c to your computer and use it in GitHub Desktop.
#!/urs/bin/env python
import os
import json
import csv
import re
fields = [
"AMT",
"HERRED",
"SOGN",
"titleno",
"oclc_record_number",
"author",
"title",
"film_no",
"film_title",
"digital_film_no",
"digital_film_rights",
"fs_indexed",
"url_FS_catalog_entry",
"url_FS_film_view",
"url_FS_index_search",
]
outfile = open('output.csv', 'w')
writer = csv.DictWriter(outfile, fields, delimiter="\t")
writer.writeheader()
ahs = re.compile(r'Den Danske Folkekirke\. ([^(]*)(?:\(([^),]+)(?:, (.*))?\))?')
for subdir, dirs, files in os.walk("json"):
for file in files:
data = json.load(open("/".join([subdir, file])))
for note in data["film_note"]:
out = dict()
out["titleno"] = data["titleno"][0]
out["oclc_record_number"] = data["oclc_record_number"][0]
out["author"] = data["author"][0]["display_text"][0]
out["title"] = data["title"][0]
result = ahs.match(out["author"])
if result:
out["SOGN"] = result.group(1).strip()
if result.group(3):
out["HERRED"] = result.group(2)
out["AMT"] = result.group(3)
elif result.group(2):
out["AMT"] = result.group(2)
else:
print("Kunne ikke matche: %s" % out["author"])
out["film_no"] = note["filmno"][0]
out["digital_film_no"] = note["digital_film_no"][0]
out["digital_film_rights"] = note["digital_film_rights"][0]
out["film_title"] = note["text"][0]
if note.has_key("fs_indexed"):
out["fs_indexed"] = note["fs_indexed"][0]
else:
out["fs_indexed"] = "N"
out["url_FS_catalog_entry"] = "https://www.familysearch.org/search/catalog/" + out["titleno"]
if note["digital_film_rights"][0] == "UNREST": #unrestricted; der findes ogsaa NO_ACC
out["url_FS_film_view"] = "https://www.familysearch.org/search/film/%09d?cat=%s" % (int(out["digital_film_no"]), out["titleno"])
if out["fs_indexed"] == "Y":
out["url_FS_index_search"] = "https://www.familysearch.org/search/record/results?count=20&query=%2Bfilm_number%3A" + out["film_no"]
writer.writerow({k:v.encode('utf8') for k,v in out.items()})
outfile.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment