Skip to content

Instantly share code, notes, and snippets.

@flodolo
Created July 31, 2018 12:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save flodolo/23844c17dc349542627e1367d061b7ee to your computer and use it in GitHub Desktop.
Save flodolo/23844c17dc349542627e1367d061b7ee to your computer and use it in GitHub Desktop.
Extract language names from CLDR for bug 1476781
#! /usr/bin/env python3
# Needs clones of these repositories in the same path as the script
# https://github.com/unicode-cldr/cldr-misc-full/
# https://github.com/unicode-cldr/cldr-localenames-full
import json
import os
from collections import OrderedDict
from urllib.request import urlopen
# This array is used to map a Mozilla code to CLDR, e.g.
# 'es-ES': 'es'
locale_mapping = {
'bn-BD': 'bn',
'en-US': 'en',
'es-ES': 'es',
'fy-NL': 'fy',
'ga-IE': 'ga',
'gu-IN': 'gu',
'hi-IN': 'hi',
'hy-AM': 'hy',
'ja-JP-mac': 'ja',
'nb-NO': 'nb',
'ne-NP': 'ne',
'nn-NO': 'nn',
'pa-IN': 'pa',
'pt-BR': 'pt',
'sv-SE': 'sv',
'zh-CN': 'zh-Hans',
'zh-TW': 'zh-Hant',
}
def getShippingLocales(shipping_locales):
# Get the list of locales shipping in Firefox
locales_urls = [
'https://hg.mozilla.org/mozilla-central/raw-file/default/browser/locales/all-locales',
'https://hg.mozilla.org/mozilla-central/raw-file/default/mobile/android/locales/all-locales',
]
for locales_url in locales_urls:
try:
with urlopen(locales_url) as response:
output = response.readlines()
for locale in output:
locale = locale.rstrip().decode()
shipping_locales.append(locale)
except Exception as e:
print(e)
shipping_locales = list(set(shipping_locales))
shipping_locales.sort()
def main():
# Path to this script
script_folder = os.path.abspath(os.path.dirname(__file__))
shipping_locales = []
getShippingLocales(shipping_locales)
log = {
'no-cldr': [],
'missing-transform': [],
'missing-name': [],
'capitalized': []
}
#with open(os.path.join(cldr_localenames_path, 'en', 'languages.json')) as data_file:
# json_data = json.load(data_file)
#cldr_languages = json_data['main']['en']['localeDisplayNames']['languages']
languages = OrderedDict()
transforms=[]
for locale in shipping_locales:
cldr_locale = locale_mapping.get(locale, locale)
cldr_path_names = os.path.join(
script_folder, 'cldr-localenames-full', 'main', cldr_locale)
cldr_path_transforms = os.path.join(
script_folder, 'cldr-misc-full', 'main', cldr_locale)
# Check if folder exists in CLDR
if not os.path.isdir(cldr_path_names):
log['no-cldr'].append(cldr_locale)
languages[locale] = 'N/A'
continue
# Read transform. Possible values are
# 'titlecase-firstword': title case
# 'no-change': no change from the language name
transform_file = os.path.join(
cldr_path_transforms, 'contextTransforms.json')
text_transformation = 'no-change'
if os.path.isfile(transform_file):
with open(transform_file) as data_file:
json_data = json.load(data_file)
try:
text_transformation = json_data['main'][cldr_locale]['contextTransforms']['languages']['uiListOrMenu']
except Exception as e:
log['missing-transform'].append(locale)
# Read language name
language_file = os.path.join(
cldr_path_names, 'languages.json')
if os.path.isfile(language_file):
with open(language_file) as data_file:
language_name = 'N/A'
json_data = json.load(data_file)
try:
language_name = json_data['main'][cldr_locale]['localeDisplayNames']['languages'][cldr_locale]
except Exception as e:
log['missing-name'].append(locale)
# Apply text transform
if text_transformation == 'titlecase-firstword' and language_name != 'N/A':
print('Language name capitalized for {}.\nOriginal: {}\nUpdated {}'.format(
cldr_locale, language_name, language_name.capitalize()
))
language_name = language_name.capitalize()
log['capitalized'].append(locale)
languages[locale] = language_name
for list_type, locales in log.items():
locales.sort()
print('Locales not available in CLDR: {}'.format(', '.join(log['no-cldr'])))
print('Locales missing language name: {}'.format(', '.join(log['missing-transform'])))
print('Locales missing context transform: {}'.format(', '.join(log['missing-name'])))
print('Locales capitalized: {}'.format(', '.join(log['capitalized'])))
with open('output.json', 'w') as f:
json.dump(languages, f, ensure_ascii=False, indent=2, sort_keys=True)
print('JSON saved as output.json')
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment