Skip to content

Instantly share code, notes, and snippets.

@ZenulAbidin
Last active April 27, 2023 11:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ZenulAbidin/dac7a015bd365e85db70b719076617d1 to your computer and use it in GitHub Desktop.
Save ZenulAbidin/dac7a015bd365e85db70b719076617d1 to your computer and use it in GitHub Desktop.
Scans a CLDR folder heirarchy for language files, and extracts the month and week names out of them.
# Copyright 2023 Ali Sherief
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
import xml.etree.ElementTree as ET
def parse_ldml(file_name):
tree = ET.parse(file_name)
root = tree.getroot()
months_long = {}
months_short = {}
weeks_long = {}
weeks_short = {}
for node in root.iter('dates'):
for month_node in node.iter('months'):
for month in month_node.iter('monthContext'):
month_name = month.attrib['type']
if month_name == 'format':
for names in month.iter('monthWidth'):
name_type = names.attrib['type']
if name_type == 'wide':
for name in names.iter('month'):
month_string = name.attrib['type']
months_long[month_string] = name.text
elif name_type == 'abbreviated':
for name in names.iter('month'):
month_string = name.attrib['type']
months_short[month_string] = name.text
for week_node in node.iter('days'):
for week in week_node.iter('dayContext'):
week_name = week.attrib['type']
if week_name == 'format':
for names in week.iter('dayWidth'):
name_type = names.attrib['type']
if name_type == 'wide':
for name in names.iter('month'):
month_string = name.attrib['type']
months_long[month_string] = name.text
elif name_type == 'abbreviated':
for name in names.iter('month'):
month_string = name.attrib['type']
months_short[month_string] = name.text
for week_node in node.iter('days'):
for week in week_node.iter('dayContext'):
week_name = week.attrib['type']
if week_name == 'format':
for names in week.iter('dayWidth'):
name_type = names.attrib['type']
if name_type == 'wide':
for name in names.iter('day'):
week_string = name.attrib['type']
weeks_long[week_string] = name.text
elif name_type == 'abbreviated':
for name in names.iter('day'):
week_string = name.attrib['type']
weeks_short[week_string] = name.text
return months_long, months_short, weeks_long, weeks_short
def main():
dir_path = sys.argv[1]
for file_name in os.listdir(dir_path):
# Only parse the language files e.g. "en", "es", "fr".
# The language-country files such as en_US do not have date information
# and should be skipped.
if file_name.endswith('.xml') and '_' not in file_name:
print(file_name)
months_long, months_short, weeks_long, weeks_short = parse_ldml(dir_path + '/' + file_name)
print('LONG MONTHS:')
print(months_long)
print('SHORT MONTHS:')
print(months_short)
print('LONG WEEKS:')
print(weeks_long)
print('SHORT WEEKS:')
print(weeks_short)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment