Skip to content

Instantly share code, notes, and snippets.

@ZenulAbidin
Last active April 27, 2023 11:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ZenulAbidin/7e2ca79abed667d23e09531e891f116a to your computer and use it in GitHub Desktop.
Save ZenulAbidin/7e2ca79abed667d23e09531e891f116a to your computer and use it in GitHub Desktop.
Scans a CLDR folder heirarchy for language files, and extracts the language names out of them.
# Copyright 2022 Ali Sherief
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
import xml.etree.ElementTree as ET
def parse_ldml(file_name):
tree = ET.parse(file_name)
root = tree.getroot()
language_map = {}
for node in root.iter('localeDisplayNames'):
for language in node.iter('languages'):
for name in language.iter('language'):
short_name = name.attrib['type']
long_name = name.text
language_map[short_name] = long_name
return language_map
def main():
dir_path = sys.argv[1]
for file_name in os.listdir(dir_path):
# Only parse the language files e.g. "en", "es", "fr".
# The language-country files such as en_US do not have language information
# and should be skipped.
if file_name.endswith('.xml') and '_' not in file_name:
language_map = parse_ldml(dir_path + '/' + file_name)
print(language_map)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment