Skip to content

Instantly share code, notes, and snippets.

@drewmccormack
Created February 19, 2020 14:21
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save drewmccormack/98694f6605867419ab7f7e60ccc73136 to your computer and use it in GitHub Desktop.
Save drewmccormack/98694f6605867419ab7f7e60ccc73136 to your computer and use it in GitHub Desktop.
Extracts terms from Apple lg glossary files, combining into a JSON file
#!/usr/bin/env python
import sys, os.path
import os
import json
from xml.dom.minidom import parse
rootdir = os.getcwd()
lgPaths = ["iOS/AuthKitUI.lg", "iOS/CalendarUIKit.lg", "iOS/iCloudDriveSettings.lg", "iOS/MobileNotes.lg", "macOS/CalendarUI.lg", "macOS/Finder_FE.lg", "macOS/Finder_FinderKit.lg", "macOS/iCloudPrefPane.lg", "macOS/Notes.lg", "macOS/Reminders.lg", "macOS/TextEdit.lg"]
languageDirs = ["Dutch", "French"]
resultDict = {}
for lang in languageDirs:
for lgPath in lgPaths:
path = os.path.join(rootdir, lang, lgPath)
dom = parse(path)
proj = dom.documentElement
files = proj.getElementsByTagName("File")
for f in files:
filepath = f.getElementsByTagName("Filepath")[0].childNodes[0].data
items = f.getElementsByTagName("TextItem")
for i in items:
position = i.getElementsByTagName("Position")[0].childNodes[0].data
idInGlossary = lgPath + "__" + filepath + "__" + position
transset = i.getElementsByTagName("TranslationSet")[0]
english = transset.getElementsByTagName("base")[0].childNodes[0].data
foreign = transset.getElementsByTagName("tran")[0].childNodes[0].data
langCode = transset.getElementsByTagName("tran")[0].getAttribute('loc')
entry = resultDict.setdefault(idInGlossary, {})
entry["en"] = english.encode('utf-8')
entry[langCode] = foreign.encode('utf-8')
# Remove any entry that doesn't include all languages
for k in resultDict.keys():
if len(resultDict[k]) != len(languageDirs)+1:
del resultDict[k]
# Convert dictionary to array
jsonList = []
for k in resultDict:
# Have to convert dictionary to list, because
# Swift Codable can't handle a dictionary where
# the keys are not strings
langDict = resultDict[k]
langList = []
for l in langDict:
langList.append(l)
langList.append(langDict[l])
entry = {"id":k, "textByLanguage":langList}
jsonList.append(entry)
with open('glossary.json', 'w') as outfile:
json.dump(jsonList, outfile, sort_keys=True, indent=4)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment