Skip to content

Instantly share code, notes, and snippets.

@douglashill
Last active August 24, 2022 05:39
Show Gist options
  • Star 5 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save douglashill/54a138f3de68790c29112b9d8a1fac9a to your computer and use it in GitHub Desktop.
Save douglashill/54a138f3de68790c29112b9d8a1fac9a to your computer and use it in GitHub Desktop.
Extracts the most common translations from Apple’s glossary files. Read more: https://douglashill.co/localisation-using-apples-glossaries/
#! /usr/bin/swift
// Douglas Hill, March 2020
/*
Extracts the most common translations from Apple’s glossary files.
This script helped with localisation for KeyboardKit (https://github.com/douglashill/KeyboardKit) by leveraging Apple’s existing translations.
More detail in the article at https://douglashill.co/localisation-using-apples-glossaries/
It generates each needed translation by looking up the most common translation for given English text.
## Adapting for other projects
1. Set the outputDirectory below.
2. Change neededLocalisations to the keys and English text your project needs (or adapt the script to read directly from your English .strings file).
## Generating the .strings files
1. Download all macOS and iOS glossary DMGs from the Apple Developer website (sign in required): https://developer.apple.com/download/more
2. Mount all of these DMGs on your Mac. There should be about 80. DiskImageMounter may get stuck if you try mounting ~20 or more at once, so opening in batches of ~15 is recommended.
3. Run this script. Look out for any errors in the console. That may indicate some DMGs failed to mount.
4. Manually edit all the .strings files for quality of translation.
## Adding new localised strings
1. Add an entry in neededLocalisations. This order of this array is matches the final order in the `.strings` files.
2. Follow the steps for generating above.
*/
import Foundation
// MARK: Input data
/// The directory containing the .lproj directories where the .strings files will be written.
let outputDirectory = URL(fileURLWithPath: "<#PUT A PATH TO WHERE THE LPROJ DIRECTORIES SHOULD BE PLACED HERE#>")
/// A localised strings entry that we want to extract from Apple’s glossary files.
struct NeededLocalisation {
/// The key to use in the generated .strings file.
let targetKey: String
/// The English text.
let english: String
}
// This would make more sense if it read in an existing English .strings file rather than defining all this in the script.
let neededLocalisations = [
NeededLocalisation(targetKey: "app_newWindow", english: "New Window"),
NeededLocalisation(targetKey: "app_settings", english: "Settings"),
NeededLocalisation(targetKey: "barButton_action", english: "Share"),
NeededLocalisation(targetKey: "barButton_add", english: "Add"),
// etc.
]
struct Localisation {
let code: String
let volumeName: String
}
let localisations = [
Localisation(code: "ar", volumeName: "Arabic"),
Localisation(code: "ca", volumeName: "Catalan"),
Localisation(code: "cs", volumeName: "Czech"),
Localisation(code: "da", volumeName: "Danish"),
Localisation(code: "de", volumeName: "German"),
Localisation(code: "el", volumeName: "Greek"),
Localisation(code: "en", volumeName: "???"),
Localisation(code: "en-AU", volumeName: "Australian English"),
Localisation(code: "en-GB", volumeName: "British English"),
Localisation(code: "es", volumeName: "Spanish"),
Localisation(code: "es-419", volumeName: "Latin"),
Localisation(code: "fi", volumeName: "Finnish"),
Localisation(code: "fr", volumeName: "Universal French"),
Localisation(code: "fr-CA", volumeName: "Canadian"),
Localisation(code: "he", volumeName: "Hebrew"),
Localisation(code: "hi", volumeName: "Hindi"),
Localisation(code: "hr", volumeName: "Croatian"),
Localisation(code: "hu", volumeName: "Hungarian"),
Localisation(code: "id", volumeName: "Indonesian"),
Localisation(code: "it", volumeName: "Italian"),
Localisation(code: "ja", volumeName: "Japanese"),
Localisation(code: "ko", volumeName: "Korean"),
Localisation(code: "ms", volumeName: "Malay"),
Localisation(code: "nb", volumeName: "Norwegian"),
Localisation(code: "nl", volumeName: "Dutch"),
Localisation(code: "pl", volumeName: "Polish"),
Localisation(code: "pt-BR", volumeName: "Brazilian"),
Localisation(code: "pt-PT", volumeName: "Portuguese"),
Localisation(code: "ro", volumeName: "Romanian"),
Localisation(code: "ru", volumeName: "Russian"),
Localisation(code: "sk", volumeName: "Slovak"),
Localisation(code: "sv", volumeName: "Swedish"),
Localisation(code: "th", volumeName: "Thai"),
Localisation(code: "tr", volumeName: "Turkish"),
Localisation(code: "uk", volumeName: "Ukrainian"),
Localisation(code: "vi", volumeName: "Vietnamese"),
Localisation(code: "zh-Hans", volumeName: "Simplified Chinese"),
Localisation(code: "zh-Hant", volumeName: "Traditional Chinese"),
Localisation(code: "zh-HK", volumeName: "Hong Kong"),
]
// MARK: - Support
extension Collection {
/// The only element in the collection, or nil if there are multiple or zero elements.
var single: Element? { count == 1 ? first! : nil }
}
extension URL {
public func appendingPathComponents(_ pathComponents: [String]) -> URL {
return pathComponents.enumerated().reduce(self) { url, pair in
return url.appendingPathComponent(pair.element, isDirectory: pair.offset + 1 < pathComponents.count)
}
}
}
extension XMLElement {
func singleChild(withName name: String) -> XMLElement? {
elements(forName: name).single
}
}
extension XMLNode {
var textOfSingleChild: String? {
guard let singleChild = children?.single, singleChild.kind == .text else {
return nil
}
return singleChild.stringValue
}
}
/// A localisation entry parsed from a glossary.
struct LocalisationEntry {
/// The English text.
let base: String
/// The localised text.
let translation: String
}
func readLocalisationEntriesFromFile(at fileURL: URL, allowedBases: Set<String>) -> [LocalisationEntry] {
autoreleasepool {
let doc = try! XMLDocument(contentsOf: fileURL, options: [.nodePreserveWhitespace])
return doc.rootElement()!.elements(forName: "File").flatMap { file -> [LocalisationEntry] in
file.elements(forName: "TextItem").compactMap { textItem -> LocalisationEntry? in
let translationSet = textItem.singleChild(withName: "TranslationSet")!
guard
let base = translationSet.singleChild(withName: "base")!.textOfSingleChild,
allowedBases.contains(base),
let translation = translationSet.singleChild(withName: "tran")!.textOfSingleChild
else {
return nil
}
return LocalisationEntry(base: base, translation: translation)
}
}
}
}
// MARK: - The script itself
let allowedBases = Set<String>(neededLocalisations.map{ $0.english })
let volumes = FileManager.default.mountedVolumeURLs(includingResourceValuesForKeys: nil, options: [])!
for localisation in localisations {
autoreleasepool {
let lines: [String]
if localisation.code == "en" {
lines = neededLocalisations.compactMap { neededLocalisation -> String? in
return """
"\(neededLocalisation.targetKey)" = "\(neededLocalisation.english)";
"""
}
} else {
let matchingVolumes = volumes.filter { fileURL -> Bool in
fileURL.lastPathComponent.contains(localisation.volumeName)
}
print("ℹ️ Localising \(localisation.volumeName) (\(localisation.code)) from \(matchingVolumes.count) volumes.") // There should be 2 volumes.
precondition(matchingVolumes.count == 2)
let localisationEntries = matchingVolumes.flatMap { volumeURL -> [LocalisationEntry] in
let glossaryFilePaths = try! FileManager.default.contentsOfDirectory(at: volumeURL, includingPropertiesForKeys: nil, options: [])
return glossaryFilePaths.flatMap { fileURL -> [LocalisationEntry] in
readLocalisationEntriesFromFile(at: fileURL, allowedBases: allowedBases)
}
}
print("✅ Read \(localisationEntries.count) localisation entries.")
var translationsByEnglishText: [String: [String: Int]] = [:]
for entry in localisationEntries {
var translationsForThisEnglishText = translationsByEnglishText[entry.base] ?? [:]
var countsForThisTranslation = translationsForThisEnglishText[entry.translation] ?? 0
countsForThisTranslation += 1
translationsForThisEnglishText[entry.translation] = countsForThisTranslation
translationsByEnglishText[entry.base] = translationsForThisEnglishText
}
print("✅ There are \(translationsByEnglishText.count) unique English strings.")
lines = neededLocalisations.compactMap { neededLocalisation -> String? in
let translations = translationsByEnglishText[neededLocalisation.english]!
let mostCommonTranslation = (translations.max {
$0.value < $1.value
}!).key
return """
"\(neededLocalisation.targetKey)" = "\(mostCommonTranslation)";
"""
}
}
let targetStringsFileURL = outputDirectory.appendingPathComponents(["\(localisation.code).lproj", "Localizable.strings"])
try! FileManager.default.createDirectory(at: targetStringsFileURL.deletingLastPathComponent(), withIntermediateDirectories: true, attributes: nil)
try! """
// This file was generated from Apple localisation glossaries by ExtractLocalisedStrings.
\(lines.joined(separator: "\n"))
""".write(to: targetStringsFileURL, atomically: false, encoding: .utf8)
}
}
/*
The MIT License (MIT)
Copyright 2020 Douglas Hill
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment