ZenulAbidin/cldr-gen-locale-data.py

## cldr-gen-locale-data.py
# Copyright 2023 Ali Sherief
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import re
import sys
import xml.etree.ElementTree as ET

def decode_utf8(encoded_string):
    decoded_string = ''
    thres = 0
    for i in range(len(encoded_string)):
        if thres > i:
            continue
        if encoded_string[i] == "<" and re.search(r'<U[0-9A-Fa-f]{4}>', encoded_string[i:i+7]):
            code = encoded_string[i+2:i+6]
            code_int = int(code, 16)  # convert to base 16
            decoded_string += chr(code_int)
            thres = i+7
        else:
            decoded_string += encoded_string[i]
            thres = i+1
    return decoded_string

def read_locale_file(file_name, language_map, code):
    # Set the comment_char and escape_char options
    comment_char = '%'
    escape_char = '/'

    # Read in the given file
    with open(file_name) as f:
        all_lines = []
        line = "1"
        while line:
            try:
                line = f.readline()
                all_lines.append(line)
            except UnicodeDecodeError as e:
                # Some comment has invalid bytes as the real data
                # is only supposed to be in ASCII - just ignore it
                continue

    # Variables to store the current section and key-values
    current_section = ""
    kv_pairs = {"" : {}}
    continue_line = False

    # Iterate through all the lines
    for line in all_lines:

        line = line.replace('\t', ' ')
        line = decode_utf8(line.strip())
        # If the line starts with the comment character, blank,
        # or an END directive (e.g. END LC_MESSAGES), ignore it
        if line == '' or line.startswith("END") or not continue_line and line.startswith(comment_char):
            continue
        elif continue_line:
            if line[-1] == escape_char:
                continue_line = True
                line = line[:-1]
            else:
                continue_line = False
            value = line.replace('//', '/').split(';')
            kv_pairs[current_section][key] += [v for v in value if v != '']
        elif line.count(' ') > 0:
            key = line.split(' ')[0]
            value = line[len(key)+1:]
            # check for escape char (except for the escape_char key)
            if len(value) > 0 and value[-1] == escape_char and key != "escape_char":
                continue_line = True
                value = value[:-1]
            else:
                continue_line = False
            # Strip any whitespace
            key = key.strip()
            value = value.strip().replace('//', '/').split(';')
            # Add it to the dict
            if key in kv_pairs[current_section].keys():
                kv_pairs[current_section][key] += [v for v in value if v != '']
            else:
                kv_pairs[current_section][key] = [v for v in value if v != '']
        else:
            # The line is a section header
            # Get the section name
            current_section = line.strip()
            kv_pairs[current_section] = {}
    language_map[code]["locale_info"] = kv_pairs
    # STOP! Don't just return here, we need to make sure that LC_TIME fields are filled.
    if "copy" in kv_pairs["LC_TIME"].keys():
         ref_code = kv_pairs["LC_TIME"]["copy"][0].replace('"', '')
         language_map[ref_code] = {}
         language_map = read_locale_file("/usr/share/i18n/locales/{}".format(ref_code), language_map, ref_code)
         # Will it work?
         language_map[code]["locale_info"]["LC_TIME"] = language_map[ref_code]["locale_info"]["LC_TIME"]
    return language_map

def parse_ldml_language_map(file_name):
    tree = ET.parse(file_name)
    root = tree.getroot()

    language_map = {}

    for node in root.iter('localeDisplayNames'):
        for language in node.iter('languages'):
            for name in language.iter('language'):
                short_name = name.attrib['type']
                if "_" not in short_name:
                    long_name = name.text
                    language_map[short_name] = {}
                    language_map[short_name]["name"] = long_name.upper()

    return language_map


def parse_ldml_locales(language_map, file_name, iso_code):
    tree = ET.parse(file_name)
    root = tree.getroot()

    months_long = {}
    months_short = {}
    weeks_long = {}
    weeks_short = {}

    for node in root.iter('dates'):
        for node in node.find('calendars').iter('calendar'):
            calendar_name = node.attrib['type']
            if calendar_name == 'gregorian': # we only support gregorian calendars for now
                for month_node in node.iter('months'):
                    for month in month_node.iter('monthContext'):
                        month_name = month.attrib['type']
                        if month_name == 'format':
                            for names in month.iter('monthWidth'):
                                name_type = names.attrib['type']
                                if name_type == 'wide':
                                    for name in names.iter('month'):
                                        month_string = name.attrib['type']
                                        months_long[month_string] = name.text
                                elif name_type == 'abbreviated':
                                    for name in names.iter('month'):
                                        month_string = name.attrib['type']
                                        months_short[month_string] = name.text
                for week_node in node.iter('days'):
                    for week in week_node.iter('dayContext'):
                        week_name = week.attrib['type']
                        if week_name == 'format':
                            for names in week.iter('dayWidth'):
                                name_type = names.attrib['type']
                                if name_type == 'wide':
                                    for name in names.iter('month'):
                                        month_string = name.attrib['type']
                                        months_long[month_string] = name.text
                                elif name_type == 'abbreviated':
                                    for name in names.iter('month'):
                                        month_string = name.attrib['type']
                                        months_short[month_string] = name.text
                for week_node in node.iter('days'):
                    for week in week_node.iter('dayContext'):
                        week_name = week.attrib['type']
                        if week_name == 'format':
                            for names in week.iter('dayWidth'):
                                name_type = names.attrib['type']
                                if name_type == 'wide':
                                    for name in names.iter('day'):
                                        week_string = name.attrib['type']
                                        weeks_long[week_string] = name.text
                                elif name_type == 'abbreviated':
                                    for name in names.iter('day'):
                                        week_string = name.attrib['type']
                                        weeks_short[week_string] = name.text

    language_map[iso_code]["months_long"] = months_long
    language_map[iso_code]["months_short"] = months_short
    language_map[iso_code]["weeks_long"] = weeks_long
    language_map[iso_code]["weeks_short"] = weeks_short
    return language_map

def print_xdatetime_macros(language_map):
    print("// You must define an X_DATETIME_ONLY_LOCALE_* macro, which is only read if you don't want locales.")
    print("// English is set by default in the event that locales are disabled - which are also enabled by default.")
    print("// This means if you want to disable the English locale, you must undefine this macro before including this file.")
    print("#define X_DATETIME_ONLY_LOCALE_ENGLISH\n")
    print("#ifndef X_DATETIME_NO_LOCALES")
    for code, language in language_map.items():
        if "months_long" not in language.keys() or ("months_long" in language.keys() and len(language["months_long"]) == 0) or \
                "months_short" not in language.keys() or ("months_short" in language.keys() and len(language["months_short"]) == 0) or \
                "weeks_long" not in language.keys() or ("weeks_long" in language.keys() and len(language["weeks_long"]) == 0) or \
                "weeks_short" not in language.keys() or ("weeks_short" in language.keys() and len(language["weeks_short"]) == 0):
            continue

        name = language["name"]
        name = name.replace(',', '_')
        name = name.replace(' ', '_')
        name = name.replace('-', '_')
        for identifier, data in language["months_long"].items():
            print("    data.long_months[\"{}\"][\"{}\"] = u8\"{}\";".format(code, identifier, data))
        print("") # empty line

        for identifier, data in language["months_short"].items():
            print("    data.short_months[\"{}\"][\"{}\"] = u8\"{}\";".format(code, identifier, data))
        print("")

        for identifier, data in language["weeks_long"].items():
            print("    data.long_weeks[\"{}\"][\"{}\"] = u8\"{}\";".format(code, identifier, data))
        print("")

        for identifier, data in language["weeks_short"].items():
            print("    data.short_months[\"{}\"][\"{}\"] = u8\"{}\";".format(code, identifier, data))
        print("\n") # two empty lines
    print("#else")
    for code, language in language_map.items():
        if "months_long" not in language.keys() or ("months_long" in language.keys() and len(language["months_long"]) == 0) or \
                "months_short" not in language.keys() or ("months_short" in language.keys() and len(language["months_short"]) == 0) or \
                "weeks_long" not in language.keys() or ("weeks_long" in language.keys() and len(language["weeks_long"]) == 0) or \
                "weeks_short" not in language.keys() or ("weeks_short" in language.keys() and len(language["weeks_short"]) == 0):
            continue

        name = language["name"]
        name = name.replace(',', '_')
        name = name.replace(' ', '_')
        name = name.replace('-', '_')
        print("#ifdef X_DATETIME_ONLY_LOCALE_{}".format(name))
        for identifier, data in language["months_long"].items():
            print("    data.long_months[\"{}\"][\"{}\"] = u8\"{}\";".format(code, identifier, data))
        print("") # empty line

        for identifier, data in language["months_short"].items():
            print("    data.short_months[\"{}\"][\"{}\"] = u8\"{}\";".format(code, identifier, data))
        print("")

        for identifier, data in language["weeks_long"].items():
            print("    data.long_weeks[\"{}\"][\"{}\"] = u8\"{}\";".format(code, identifier, data))
        print("")

        for identifier, data in language["weeks_short"].items():
            print("    data.short_weeks[\"{}\"][\"{}\"] = u8\"{}\";".format(code, identifier, data))
        print("#endif\n")
        print("\n") # two empty lines
    print("#endif\n")


# The collective GNU C library community wisdom regarding abday, day, week, first_weekday, and first_workday states at https://sourceware.org/glibc/wiki/Locales the following:
#
# *  The value of the second week list item specifies the base of the abday and day lists.
#
# *  first_weekday specifies the offset of the first day-of-week in the abday and day lists.
#
# *  For compatibility reasons, all glibc locales should set the value of the second week list item to 19971130 (Sunday) and base the abday and day lists appropriately, and set first_weekday and first_workday to
#    1 or 2, depending on whether the week and work week actually starts on Sunday or Monday for the locale.
def print_xdatetime_macros2(language_map):
    for code, language in language_map.items():
        if "locale_info" not in language.keys() or ("locale_info" in language.keys() and len(language["locale_info"]) == 0):
            continue
        language = language["locale_info"]

        name = code.upper()
        print("    data.am[\"{}\"] = u8\"{}\";".format(code, language["LC_TIME"]["am_pm"][0].replace('"', '')))
        print("    data.pm[\"{}\"] = u8\"{}\";".format(code, language["LC_TIME"]["am_pm"][1].replace('"', '')))
        print("    data.date_time_format[\"{}\"] = u8\"{}\";".format(code, language["LC_TIME"]["d_t_fmt"][0].replace('"', '')))
        print("    data.date_format[\"{}\"] = u8\"{}\";".format(code, language["LC_TIME"]["d_fmt"][0].replace('"', '')))
        print("    data.time24_format[\"{}\"] = u8\"{}\";".format(code, language["LC_TIME"]["t_fmt"][0].replace('"', '')))
        if "t_fmt_ampm" in language["LC_TIME"].keys():
            print("    data.time12_format[\"{}\"] = u8\"{}\";".format(code, language["LC_TIME"]["t_fmt_ampm"][0].replace('"', '')))
        else:
            print("    data.time12_format[\"{}\"] = u8\"{}\";".format(code, language["LC_TIME"]["t_fmt"][0].replace('"', '')))

        if "week" in language["LC_TIME"].keys():
            print("    data.days_in_week[\"{}\"] = {};".format(code, int(language["LC_TIME"]["week"][0])))
            print("    data.first_weekday_ref[\"{}\"] = {};".format(code, int(language["LC_TIME"]["week"][1])))
            print("    data.first_week_year_min_days[\"{}\"] = {};".format(code, int(language["LC_TIME"]["week"][2])))
        else:
            print("    data.days_in_week[\"{}\"] = 7;".format(code))
            print("    data.first_weekday_ref[\"{}\"] = 11971130;".format(code))
            print("    data.first_week_year_min_days[\"{}\"] = 4;".format(code))

        if "first_weekday" in language["LC_TIME"].keys():
            print("    data.first_weekday[\"{}\"] = {};".format(code, int(language["LC_TIME"]["first_weekday"][0])))
        else:
            print("    data.first_weekday[\"{}\"] = 1;".format(code))

        i = 0
        for data in language["LC_TIME"]["mon"]:
            print("    data.long_months[\"{}\"][{}] = u8\"{}\";".format(code, i, data.replace('"', '')))
            i += 1
        print("") # empty line

        i = 0
        for data in language["LC_TIME"]["abmon"]:
            print("    data.short_months[\"{}\"][{}] = u8\"{}\";".format(code, i, data.replace('"', '')))
            i += 1
        print("")

        i = 0
        for data in language["LC_TIME"]["day"]:
            print("    data.long_weekdays[\"{}\"][{}] = u8\"{}\";".format(code, i, data.replace('"', '')))
            i += 1
        print("")

        i = 0
        for data in language["LC_TIME"]["abday"]:
            print("    data.short_weekdays[\"{}\"][{}] = u8\"{}\";".format(code, i, data.replace('"', '')))
            i += 1
        print("")
        print("\n") # two empty lines

def print_autogenerated_code(language_map):
    print("// Automatically generated by cldr-gen-locale-data.py. DO NOT MODIFY.\n")
    print("#ifndef X_DATETIME_LOCALE_DATA_H")
    print("#define X_DATETIME_LOCALE_DATA_H")
    print("#include <map>")
    print("#include <string>\n")
    print("namespace xDateTime {")
    print("struct _LocaleData {")
    print("    std::map<std::string, std::string> am;")
    print("    std::map<std::string, std::string> pm;")
    print("    std::map<std::string, std::string> date_time_format;")
    print("    std::map<std::string, std::string> date_format;")
    print("    std::map<std::string, std::string> time24_format;")
    print("    std::map<std::string, std::string> time12_format;")
    print("    std::map<std::string, int> days_in_week;")
    print("    std::map<std::string, int> first_weekday_ref;")
    print("    std::map<std::string, int> first_weekday;")
    print("    std::map<std::string, int> first_week_year_min_days;")
    print("    std::map<std::string, std::map<int, std::string>> long_months;")
    print("    std::map<std::string, std::map<int, std::string>> short_months;")
    print("    std::map<std::string, std::map<int, std::string>> long_weekdays;")
    print("    std::map<std::string, std::map<int, std::string>> short_weekdays;")
    print("};\n")
    print("_LocaleData InitializeLocaleData() {")
    print("    static _LocaleData data;")
    print("    static bool initialized = false;")
    print("    if (initialized) return data;\n")
    print_xdatetime_macros2(language_map)
    print("    initialized = true;")
    print("    return data;")
    print("}\n")
    print("std::string GetLocaleLongMonth(const std::string& locale, int key) {")
    print("    _LocaleData data = InitializeLocaleData();")
    print("    return data.long_months[locale][key];")
    print("}\n")
    print("std::string GetLocaleShortMonth(const std::string& locale, int key) {")
    print("    _LocaleData data = InitializeLocaleData();")
    print("    return data.short_months[locale][key];")
    print("}\n")
    print("std::string GetLocaleLongWeekday(const std::string& locale, int key) {")
    print("    _LocaleData data = InitializeLocaleData();")
    print("    return data.long_weekdays[locale][key];")
    print("}\n")
    print("std::string GetLocaleShortWeekday(const std::string& locale, int key) {")
    print("    _LocaleData data = InitializeLocaleData();")
    print("    return data.short_weekdays[locale][key];")
    print("}\n")
    print("std::string GetLocaleAM(const std::string& locale) {")
    print("    _LocaleData data = InitializeLocaleData();")
    print("    return data.am[locale];")
    print("}\n")
    print("std::string GetLocalePM(const std::string& locale) {")
    print("    _LocaleData data = InitializeLocaleData();")
    print("    return data.pm[locale];")
    print("}\n")
    print("std::string GetLocaleDateTimeFormat(const std::string& locale) {")
    print("    _LocaleData data = InitializeLocaleData();")
    print("    return data.date_time_format[locale];")
    print("}\n")
    print("std::string GetLocaleDateFormat(const std::string& locale) {")
    print("    _LocaleData data = InitializeLocaleData();")
    print("    return data.date_format[locale];")
    print("}\n")
    print("std::string GetLocaleTime24Format(const std::string& locale) {")
    print("    _LocaleData data = InitializeLocaleData();")
    print("    return data.time24_format[locale];")
    print("}\n")
    print("std::string GetLocaleTime12Format(const std::string& locale) {")
    print("    _LocaleData data = InitializeLocaleData();")
    print("    return data.time12_format[locale];")
    print("}\n")
    print("int GetLocaleDaysInWeeks(const std::string& locale) {")
    print("    _LocaleData data = InitializeLocaleData();")
    print("    return data.days_in_week[locale];")
    print("}\n")
    print("int GetLocaleFirstWeekdayReference(const std::string& locale) {")
    print("    _LocaleData data = InitializeLocaleData();")
    print("    return data.first_weekday_ref[locale];")
    print("}\n")
    print("int GetLocaleFirstWeekOfYearMinDays(const std::string& locale) {")
    print("    _LocaleData data = InitializeLocaleData();")
    print("    return data.first_week_year_min_days[locale];")
    print("}\n")
    print("int GetLocaleFirstWeekday(const std::string& locale) {")
    print("    _LocaleData data = InitializeLocaleData();")
    print("    return data.first_weekday[locale];")
    print("}\n")
    print("}") # namespace
    print("#endif /* X_DATETIME_LOCALE_DATA_H */")


import pprint

def print_locale_info(language_map):
    for key, value in language_map.items():
         if "locale_info" in value.keys():
             for vkey, vvalue in value["locale_info"].items():
                 if vkey == "LC_TIME":
                     print(key, "$$$$$$$")
                     pprint.pprint(vvalue)

def main():
    dir_path = sys.argv[1]
    language_map = parse_ldml_language_map(dir_path + '/en.xml')

    # search the CLDR first...
    for file_name in os.listdir(dir_path):
        # Only parse the language files e.g. "en", "es", "fr".
        # The language-country files such as en_US do not have language information
        # and should be skipped.
        if file_name == "root.xml":
            continue
        if file_name.endswith('.xml') and '_' not in file_name:
            language_map = parse_ldml_locales(language_map, dir_path + '/' + file_name, file_name[:-4])

    #...now search the OS-specific locales
    locales_folder = "/usr/share/i18n/locales"
    for file_name2 in os.listdir(locales_folder):
        # The OS locales do not have a file extension.
        if code in language_map.keys():
            language_map = read_locale_file(locales_folder + '/' + file_name2, language_map, file_name2)

    print_autogenerated_code(language_map)

if __name__ == '__main__':
    main()
	# Copyright 2023 Ali Sherief
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	import os
	import re
	import sys
	import xml.etree.ElementTree as ET

	def decode_utf8(encoded_string):
	decoded_string = ''
	thres = 0
	for i in range(len(encoded_string)):
	if thres > i:
	continue
	if encoded_string[i] == "<" and re.search(r'<U[0-9A-Fa-f]{4}>', encoded_string[i:i+7]):
	code = encoded_string[i+2:i+6]
	code_int = int(code, 16) # convert to base 16
	decoded_string += chr(code_int)
	thres = i+7
	else:
	decoded_string += encoded_string[i]
	thres = i+1
	return decoded_string

	def read_locale_file(file_name, language_map, code):
	# Set the comment_char and escape_char options
	comment_char = '%'
	escape_char = '/'

	# Read in the given file
	with open(file_name) as f:
	all_lines = []
	line = "1"
	while line:
	try:
	line = f.readline()
	all_lines.append(line)
	except UnicodeDecodeError as e:
	# Some comment has invalid bytes as the real data
	# is only supposed to be in ASCII - just ignore it
	continue

	# Variables to store the current section and key-values
	current_section = ""
	kv_pairs = {"" : {}}
	continue_line = False

	# Iterate through all the lines
	for line in all_lines:

	line = line.replace('\t', ' ')
	line = decode_utf8(line.strip())
	# If the line starts with the comment character, blank,
	# or an END directive (e.g. END LC_MESSAGES), ignore it
	if line == '' or line.startswith("END") or not continue_line and line.startswith(comment_char):
	continue
	elif continue_line:
	if line[-1] == escape_char:
	continue_line = True
	line = line[:-1]
	else:
	continue_line = False
	value = line.replace('//', '/').split(';')
	kv_pairs[current_section][key] += [v for v in value if v != '']
	elif line.count(' ') > 0:
	key = line.split(' ')[0]
	value = line[len(key)+1:]
	# check for escape char (except for the escape_char key)
	if len(value) > 0 and value[-1] == escape_char and key != "escape_char":
	continue_line = True
	value = value[:-1]
	else:
	continue_line = False
	# Strip any whitespace
	key = key.strip()
	value = value.strip().replace('//', '/').split(';')
	# Add it to the dict
	if key in kv_pairs[current_section].keys():
	kv_pairs[current_section][key] += [v for v in value if v != '']
	else:
	kv_pairs[current_section][key] = [v for v in value if v != '']
	else:
	# The line is a section header
	# Get the section name
	current_section = line.strip()
	kv_pairs[current_section] = {}
	language_map[code]["locale_info"] = kv_pairs
	# STOP! Don't just return here, we need to make sure that LC_TIME fields are filled.
	if "copy" in kv_pairs["LC_TIME"].keys():
	ref_code = kv_pairs["LC_TIME"]["copy"][0].replace('"', '')
	language_map[ref_code] = {}
	language_map = read_locale_file("/usr/share/i18n/locales/{}".format(ref_code), language_map, ref_code)
	# Will it work?
	language_map[code]["locale_info"]["LC_TIME"] = language_map[ref_code]["locale_info"]["LC_TIME"]
	return language_map

	def parse_ldml_language_map(file_name):
	tree = ET.parse(file_name)
	root = tree.getroot()

	language_map = {}

	for node in root.iter('localeDisplayNames'):
	for language in node.iter('languages'):
	for name in language.iter('language'):
	short_name = name.attrib['type']
	if "_" not in short_name:
	long_name = name.text
	language_map[short_name] = {}
	language_map[short_name]["name"] = long_name.upper()

	return language_map


	def parse_ldml_locales(language_map, file_name, iso_code):
	tree = ET.parse(file_name)
	root = tree.getroot()

	months_long = {}
	months_short = {}
	weeks_long = {}
	weeks_short = {}

	for node in root.iter('dates'):
	for node in node.find('calendars').iter('calendar'):
	calendar_name = node.attrib['type']
	if calendar_name == 'gregorian': # we only support gregorian calendars for now
	for month_node in node.iter('months'):
	for month in month_node.iter('monthContext'):
	month_name = month.attrib['type']
	if month_name == 'format':
	for names in month.iter('monthWidth'):
	name_type = names.attrib['type']
	if name_type == 'wide':
	for name in names.iter('month'):
	month_string = name.attrib['type']
	months_long[month_string] = name.text
	elif name_type == 'abbreviated':
	for name in names.iter('month'):
	month_string = name.attrib['type']
	months_short[month_string] = name.text
	for week_node in node.iter('days'):
	for week in week_node.iter('dayContext'):
	week_name = week.attrib['type']
	if week_name == 'format':
	for names in week.iter('dayWidth'):
	name_type = names.attrib['type']
	if name_type == 'wide':
	for name in names.iter('month'):
	month_string = name.attrib['type']
	months_long[month_string] = name.text
	elif name_type == 'abbreviated':
	for name in names.iter('month'):
	month_string = name.attrib['type']
	months_short[month_string] = name.text
	for week_node in node.iter('days'):
	for week in week_node.iter('dayContext'):
	week_name = week.attrib['type']
	if week_name == 'format':
	for names in week.iter('dayWidth'):
	name_type = names.attrib['type']
	if name_type == 'wide':
	for name in names.iter('day'):
	week_string = name.attrib['type']
	weeks_long[week_string] = name.text
	elif name_type == 'abbreviated':
	for name in names.iter('day'):
	week_string = name.attrib['type']
	weeks_short[week_string] = name.text

	language_map[iso_code]["months_long"] = months_long
	language_map[iso_code]["months_short"] = months_short
	language_map[iso_code]["weeks_long"] = weeks_long
	language_map[iso_code]["weeks_short"] = weeks_short
	return language_map

	def print_xdatetime_macros(language_map):
	print("// You must define an X_DATETIME_ONLY_LOCALE_* macro, which is only read if you don't want locales.")
	print("// English is set by default in the event that locales are disabled - which are also enabled by default.")
	print("// This means if you want to disable the English locale, you must undefine this macro before including this file.")
	print("#define X_DATETIME_ONLY_LOCALE_ENGLISH\n")
	print("#ifndef X_DATETIME_NO_LOCALES")
	for code, language in language_map.items():
	if "months_long" not in language.keys() or ("months_long" in language.keys() and len(language["months_long"]) == 0) or \
	"months_short" not in language.keys() or ("months_short" in language.keys() and len(language["months_short"]) == 0) or \
	"weeks_long" not in language.keys() or ("weeks_long" in language.keys() and len(language["weeks_long"]) == 0) or \
	"weeks_short" not in language.keys() or ("weeks_short" in language.keys() and len(language["weeks_short"]) == 0):
	continue

	name = language["name"]
	name = name.replace(',', '_')
	name = name.replace(' ', '_')
	name = name.replace('-', '_')
	for identifier, data in language["months_long"].items():
	print(" data.long_months[\"{}\"][\"{}\"] = u8\"{}\";".format(code, identifier, data))
	print("") # empty line

	for identifier, data in language["months_short"].items():
	print(" data.short_months[\"{}\"][\"{}\"] = u8\"{}\";".format(code, identifier, data))
	print("")

	for identifier, data in language["weeks_long"].items():
	print(" data.long_weeks[\"{}\"][\"{}\"] = u8\"{}\";".format(code, identifier, data))
	print("")

	for identifier, data in language["weeks_short"].items():
	print(" data.short_months[\"{}\"][\"{}\"] = u8\"{}\";".format(code, identifier, data))
	print("\n") # two empty lines
	print("#else")
	for code, language in language_map.items():
	if "months_long" not in language.keys() or ("months_long" in language.keys() and len(language["months_long"]) == 0) or \
	"months_short" not in language.keys() or ("months_short" in language.keys() and len(language["months_short"]) == 0) or \
	"weeks_long" not in language.keys() or ("weeks_long" in language.keys() and len(language["weeks_long"]) == 0) or \
	"weeks_short" not in language.keys() or ("weeks_short" in language.keys() and len(language["weeks_short"]) == 0):
	continue

	name = language["name"]
	name = name.replace(',', '_')
	name = name.replace(' ', '_')
	name = name.replace('-', '_')
	print("#ifdef X_DATETIME_ONLY_LOCALE_{}".format(name))
	for identifier, data in language["months_long"].items():
	print(" data.long_months[\"{}\"][\"{}\"] = u8\"{}\";".format(code, identifier, data))
	print("") # empty line

	for identifier, data in language["months_short"].items():
	print(" data.short_months[\"{}\"][\"{}\"] = u8\"{}\";".format(code, identifier, data))
	print("")

	for identifier, data in language["weeks_long"].items():
	print(" data.long_weeks[\"{}\"][\"{}\"] = u8\"{}\";".format(code, identifier, data))
	print("")

	for identifier, data in language["weeks_short"].items():
	print(" data.short_weeks[\"{}\"][\"{}\"] = u8\"{}\";".format(code, identifier, data))
	print("#endif\n")
	print("\n") # two empty lines
	print("#endif\n")


	# The collective GNU C library community wisdom regarding abday, day, week, first_weekday, and first_workday states at https://sourceware.org/glibc/wiki/Locales the following:
	#
	# * The value of the second week list item specifies the base of the abday and day lists.
	#
	# * first_weekday specifies the offset of the first day-of-week in the abday and day lists.
	#
	# * For compatibility reasons, all glibc locales should set the value of the second week list item to 19971130 (Sunday) and base the abday and day lists appropriately, and set first_weekday and first_workday to
	# 1 or 2, depending on whether the week and work week actually starts on Sunday or Monday for the locale.
	def print_xdatetime_macros2(language_map):
	for code, language in language_map.items():
	if "locale_info" not in language.keys() or ("locale_info" in language.keys() and len(language["locale_info"]) == 0):
	continue
	language = language["locale_info"]

	name = code.upper()
	print(" data.am[\"{}\"] = u8\"{}\";".format(code, language["LC_TIME"]["am_pm"][0].replace('"', '')))
	print(" data.pm[\"{}\"] = u8\"{}\";".format(code, language["LC_TIME"]["am_pm"][1].replace('"', '')))
	print(" data.date_time_format[\"{}\"] = u8\"{}\";".format(code, language["LC_TIME"]["d_t_fmt"][0].replace('"', '')))
	print(" data.date_format[\"{}\"] = u8\"{}\";".format(code, language["LC_TIME"]["d_fmt"][0].replace('"', '')))
	print(" data.time24_format[\"{}\"] = u8\"{}\";".format(code, language["LC_TIME"]["t_fmt"][0].replace('"', '')))
	if "t_fmt_ampm" in language["LC_TIME"].keys():
	print(" data.time12_format[\"{}\"] = u8\"{}\";".format(code, language["LC_TIME"]["t_fmt_ampm"][0].replace('"', '')))
	else:
	print(" data.time12_format[\"{}\"] = u8\"{}\";".format(code, language["LC_TIME"]["t_fmt"][0].replace('"', '')))

	if "week" in language["LC_TIME"].keys():
	print(" data.days_in_week[\"{}\"] = {};".format(code, int(language["LC_TIME"]["week"][0])))
	print(" data.first_weekday_ref[\"{}\"] = {};".format(code, int(language["LC_TIME"]["week"][1])))
	print(" data.first_week_year_min_days[\"{}\"] = {};".format(code, int(language["LC_TIME"]["week"][2])))
	else:
	print(" data.days_in_week[\"{}\"] = 7;".format(code))
	print(" data.first_weekday_ref[\"{}\"] = 11971130;".format(code))
	print(" data.first_week_year_min_days[\"{}\"] = 4;".format(code))

	if "first_weekday" in language["LC_TIME"].keys():
	print(" data.first_weekday[\"{}\"] = {};".format(code, int(language["LC_TIME"]["first_weekday"][0])))
	else:
	print(" data.first_weekday[\"{}\"] = 1;".format(code))

	i = 0
	for data in language["LC_TIME"]["mon"]:
	print(" data.long_months[\"{}\"][{}] = u8\"{}\";".format(code, i, data.replace('"', '')))
	i += 1
	print("") # empty line

	i = 0
	for data in language["LC_TIME"]["abmon"]:
	print(" data.short_months[\"{}\"][{}] = u8\"{}\";".format(code, i, data.replace('"', '')))
	i += 1
	print("")

	i = 0
	for data in language["LC_TIME"]["day"]:
	print(" data.long_weekdays[\"{}\"][{}] = u8\"{}\";".format(code, i, data.replace('"', '')))
	i += 1
	print("")

	i = 0
	for data in language["LC_TIME"]["abday"]:
	print(" data.short_weekdays[\"{}\"][{}] = u8\"{}\";".format(code, i, data.replace('"', '')))
	i += 1
	print("")
	print("\n") # two empty lines

	def print_autogenerated_code(language_map):
	print("// Automatically generated by cldr-gen-locale-data.py. DO NOT MODIFY.\n")
	print("#ifndef X_DATETIME_LOCALE_DATA_H")
	print("#define X_DATETIME_LOCALE_DATA_H")
	print("#include <map>")
	print("#include <string>\n")
	print("namespace xDateTime {")
	print("struct _LocaleData {")
	print(" std::map<std::string, std::string> am;")
	print(" std::map<std::string, std::string> pm;")
	print(" std::map<std::string, std::string> date_time_format;")
	print(" std::map<std::string, std::string> date_format;")
	print(" std::map<std::string, std::string> time24_format;")
	print(" std::map<std::string, std::string> time12_format;")
	print(" std::map<std::string, int> days_in_week;")
	print(" std::map<std::string, int> first_weekday_ref;")
	print(" std::map<std::string, int> first_weekday;")
	print(" std::map<std::string, int> first_week_year_min_days;")
	print(" std::map<std::string, std::map<int, std::string>> long_months;")
	print(" std::map<std::string, std::map<int, std::string>> short_months;")
	print(" std::map<std::string, std::map<int, std::string>> long_weekdays;")
	print(" std::map<std::string, std::map<int, std::string>> short_weekdays;")
	print("};\n")
	print("_LocaleData InitializeLocaleData() {")
	print(" static _LocaleData data;")
	print(" static bool initialized = false;")
	print(" if (initialized) return data;\n")
	print_xdatetime_macros2(language_map)
	print(" initialized = true;")
	print(" return data;")
	print("}\n")
	print("std::string GetLocaleLongMonth(const std::string& locale, int key) {")
	print(" _LocaleData data = InitializeLocaleData();")
	print(" return data.long_months[locale][key];")
	print("}\n")
	print("std::string GetLocaleShortMonth(const std::string& locale, int key) {")
	print(" _LocaleData data = InitializeLocaleData();")
	print(" return data.short_months[locale][key];")
	print("}\n")
	print("std::string GetLocaleLongWeekday(const std::string& locale, int key) {")
	print(" _LocaleData data = InitializeLocaleData();")
	print(" return data.long_weekdays[locale][key];")
	print("}\n")
	print("std::string GetLocaleShortWeekday(const std::string& locale, int key) {")
	print(" _LocaleData data = InitializeLocaleData();")
	print(" return data.short_weekdays[locale][key];")
	print("}\n")
	print("std::string GetLocaleAM(const std::string& locale) {")
	print(" _LocaleData data = InitializeLocaleData();")
	print(" return data.am[locale];")
	print("}\n")
	print("std::string GetLocalePM(const std::string& locale) {")
	print(" _LocaleData data = InitializeLocaleData();")
	print(" return data.pm[locale];")
	print("}\n")
	print("std::string GetLocaleDateTimeFormat(const std::string& locale) {")
	print(" _LocaleData data = InitializeLocaleData();")
	print(" return data.date_time_format[locale];")
	print("}\n")
	print("std::string GetLocaleDateFormat(const std::string& locale) {")
	print(" _LocaleData data = InitializeLocaleData();")
	print(" return data.date_format[locale];")
	print("}\n")
	print("std::string GetLocaleTime24Format(const std::string& locale) {")
	print(" _LocaleData data = InitializeLocaleData();")
	print(" return data.time24_format[locale];")
	print("}\n")
	print("std::string GetLocaleTime12Format(const std::string& locale) {")
	print(" _LocaleData data = InitializeLocaleData();")
	print(" return data.time12_format[locale];")
	print("}\n")
	print("int GetLocaleDaysInWeeks(const std::string& locale) {")
	print(" _LocaleData data = InitializeLocaleData();")
	print(" return data.days_in_week[locale];")
	print("}\n")
	print("int GetLocaleFirstWeekdayReference(const std::string& locale) {")
	print(" _LocaleData data = InitializeLocaleData();")
	print(" return data.first_weekday_ref[locale];")
	print("}\n")
	print("int GetLocaleFirstWeekOfYearMinDays(const std::string& locale) {")
	print(" _LocaleData data = InitializeLocaleData();")
	print(" return data.first_week_year_min_days[locale];")
	print("}\n")
	print("int GetLocaleFirstWeekday(const std::string& locale) {")
	print(" _LocaleData data = InitializeLocaleData();")
	print(" return data.first_weekday[locale];")
	print("}\n")
	print("}") # namespace
	print("#endif /* X_DATETIME_LOCALE_DATA_H */")


	import pprint

	def print_locale_info(language_map):
	for key, value in language_map.items():
	if "locale_info" in value.keys():
	for vkey, vvalue in value["locale_info"].items():
	if vkey == "LC_TIME":
	print(key, "$$$$$$$")
	pprint.pprint(vvalue)

	def main():
	dir_path = sys.argv[1]
	language_map = parse_ldml_language_map(dir_path + '/en.xml')

	# search the CLDR first...
	for file_name in os.listdir(dir_path):
	# Only parse the language files e.g. "en", "es", "fr".
	# The language-country files such as en_US do not have language information
	# and should be skipped.
	if file_name == "root.xml":
	continue
	if file_name.endswith('.xml') and '_' not in file_name:
	language_map = parse_ldml_locales(language_map, dir_path + '/' + file_name, file_name[:-4])

	#...now search the OS-specific locales
	locales_folder = "/usr/share/i18n/locales"
	for file_name2 in os.listdir(locales_folder):
	# The OS locales do not have a file extension.
	if code in language_map.keys():
	language_map = read_locale_file(locales_folder + '/' + file_name2, language_map, file_name2)

	print_autogenerated_code(language_map)

	if __name__ == '__main__':
	main()