Skip to content

Instantly share code, notes, and snippets.

@cmkilger
Created April 13, 2022 20:32
Show Gist options
  • Save cmkilger/efe01546e6bf0e71eefc2dae1e633f81 to your computer and use it in GitHub Desktop.
Save cmkilger/efe01546e6bf0e71eefc2dae1e633f81 to your computer and use it in GitHub Desktop.
Pick a preferred language from a set of languages based on a list of preferences
// Finds the best locale from available locales based on the user's preferences. Falls back on the default locale if none is found.
function preferredLocale(preferences, availableLocales, defaultLocale) {
// Parses the BCP 47 locale into a ISO-639-3 language, ISO-15924 script, and M.49 region.
function parseLocale(locale) {
// Map grandfathered language identifiers to ISO-639
const grandfatheredMap = {
'art-lojban': 'jbo', 'i-ami': 'ami', 'i-bnn': 'bnn', 'i-hak': 'hak', 'i-klingon': 'tlh', 'i-lux': 'lb', 'i-navajo': 'nv',
'i-pwn': 'pwn', 'i-tao': 'tao', 'i-tay': 'tay', 'i-tsu': 'tsu', 'no-bok': 'nb', 'no-nyn': 'nn', 'sgn-BE-FR': 'sfb',
'sgn-BE-NL': 'vgt', 'sgn-CH-DE': 'sgg', 'zh-guoyu': 'zh', 'zh-hakka': 'hak', 'zh-min-nan': 'nan', 'zh-xiang': 'hsn'
}
// Replace grandfathered identifiers
const grandfatheredReplacement = grandfatheredMap[locale];
if (grandfatheredReplacement) {
locale = grandfatheredReplacement;
}
// BCP 47 regex
const pattern = /^(?:(?:(?<language>(?:(?:[a-z]{2,3})(?:(?:-|_)(?<extlang>[a-z]{3})){0,3})|(?:[a-z]{4})|(?:[a-z]{5,8}))(?:(?:-|_)(?<script>[a-z]{4}))?(?:(?:-|_)(?<region>[a-z]{2}|[0-9]{3}))?(?:(?:-|_)(?<variant>[a-z0-9]{5,8}|[0-9][a-z0-9]{3}))*(?:(?:-|_)(?<extensions>[a-z0-9-[x]](?:-[a-z0-9]{2,8})+))*(?:-x(?:(?:-|_)(?<privateuse1>[a-z0-9]{1,8}))+)?)|(?:x(?:(?:-|_)(?<privateuse>[a-z0-9]{1,8}))+)|(?<grandfathered>(?<irregular>en-GB-oed|i-ami|i-bnn|i-default|i-enochian|i-hak|i-klingon|i-lux|i-mingo|i-navajo|i-pwn|i-tao|i-tay|i-tsu|sgn-BE-FR|sgn-BE-NL|sgn-CH-DE)|(?<regular>art-lojban|cel-gaulish|no-bok|no-nyn|zh-guoyu|zh-hakka|zh-min|zh-min-nan|zh-xiang)))$/i;
const match = pattern.exec(locale);
// Map ISO-639 parts 1, 2b, & 2t to part 3 codes
const languageMap = {
'aa': 'aar', 'ab': 'abk', 'ae': 'ave', 'af': 'afr', 'ak': 'aka', 'alb': 'sqi', 'am': 'amh', 'an': 'arg', 'ar': 'ara',
'arm': 'hye', 'as': 'asm', 'av': 'ava', 'ay': 'aym', 'az': 'aze', 'ba': 'bak', 'baq': 'eus', 'be': 'bel', 'bg': 'bul',
'bi': 'bis', 'bm': 'bam', 'bn': 'ben', 'bo': 'bod', 'br': 'bre', 'bs': 'bos', 'bur': 'mya', 'ca': 'cat', 'ce': 'che',
'ch': 'cha', 'chi': 'zho', 'co': 'cos', 'cr': 'cre', 'cs': 'ces', 'cu': 'chu', 'cv': 'chv', 'cy': 'cym', 'cze': 'ces',
'da': 'dan', 'de': 'deu', 'dut': 'nld', 'dv': 'div', 'dz': 'dzo', 'ee': 'ewe', 'el': 'ell', 'en': 'eng', 'eo': 'epo',
'es': 'spa', 'et': 'est', 'eu': 'eus', 'fa': 'fas', 'ff': 'ful', 'fi': 'fin', 'fj': 'fij', 'fo': 'fao', 'fr': 'fra',
'fre': 'fra', 'fy': 'fry', 'ga': 'gle', 'gd': 'gla', 'geo': 'kat', 'ger': 'deu', 'gl': 'glg', 'gn': 'grn', 'gre': 'ell',
'gu': 'guj', 'gv': 'glv', 'ha': 'hau', 'he': 'heb', 'hi': 'hin', 'ho': 'hmo', 'hr': 'hrv', 'ht': 'hat', 'hu': 'hun',
'hy': 'hye', 'hz': 'her', 'ia': 'ina', 'ice': 'isl', 'id': 'ind', 'ie': 'ile', 'ig': 'ibo', 'ii': 'iii', 'ik': 'ipk',
'io': 'ido', 'is': 'isl', 'it': 'ita', 'iu': 'iku', 'ja': 'jpn', 'jv': 'jav', 'ka': 'kat', 'kg': 'kon', 'ki': 'kik',
'kj': 'kua', 'kk': 'kaz', 'kl': 'kal', 'km': 'khm', 'kn': 'kan', 'ko': 'kor', 'kr': 'kau', 'ks': 'kas', 'ku': 'kur',
'kv': 'kom', 'kw': 'cor', 'ky': 'kir', 'la': 'lat', 'lb': 'ltz', 'lg': 'lug', 'li': 'lim', 'ln': 'lin', 'lo': 'lao',
'lt': 'lit', 'lu': 'lub', 'lv': 'lav', 'mac': 'mkd', 'mao': 'mri', 'may': 'msa', 'mg': 'mlg', 'mh': 'mah', 'mi': 'mri',
'mk': 'mkd', 'ml': 'mal', 'mn': 'mon', 'mr': 'mar', 'ms': 'msa', 'mt': 'mlt', 'my': 'mya', 'na': 'nau', 'nb': 'nob',
'nd': 'nde', 'ne': 'nep', 'ng': 'ndo', 'nl': 'nld', 'nn': 'nno', 'no': 'nor', 'nr': 'nbl', 'nv': 'nav', 'ny': 'nya',
'oc': 'oci', 'oj': 'oji', 'om': 'orm', 'or': 'ori', 'os': 'oss', 'pa': 'pan', 'per': 'fas', 'pi': 'pli', 'pl': 'pol',
'ps': 'pus', 'pt': 'por', 'qu': 'que', 'rm': 'roh', 'rn': 'run', 'ro': 'ron', 'ru': 'rus', 'rum': 'ron', 'rw': 'kin',
'sa': 'san', 'sc': 'srd', 'sd': 'snd', 'se': 'sme', 'sg': 'sag', 'sh': 'hbs', 'si': 'sin', 'sk': 'slk', 'sl': 'slv',
'slo': 'slk', 'sm': 'smo', 'sn': 'sna', 'so': 'som', 'sq': 'sqi', 'sr': 'srp', 'ss': 'ssw', 'st': 'sot', 'su': 'sun',
'sv': 'swe', 'sw': 'swa', 'ta': 'tam', 'te': 'tel', 'tg': 'tgk', 'th': 'tha', 'ti': 'tir', 'tib': 'bod', 'tk': 'tuk',
'tl': 'tgl', 'tn': 'tsn', 'to': 'ton', 'tr': 'tur', 'ts': 'tso', 'tt': 'tat', 'tw': 'twi', 'ty': 'tah', 'ug': 'uig',
'uk': 'ukr', 'ur': 'urd', 'uz': 'uzb', 've': 'ven', 'vi': 'vie', 'vo': 'vol', 'wa': 'wln', 'wel': 'cym', 'wo': 'wol',
'xh': 'xho', 'yi': 'yid', 'yo': 'yor', 'za': 'zha', 'zh': 'zho', 'zu': 'zul'
};
// Map ISO 3166-1 to UN M.49
const regionMap = {
'af': '004', 'al': '008', 'dz': '012', 'as': '016', 'ad': '020', 'ao': '024', 'ai': '660', 'aq': '010', 'ag': '028',
'ar': '032', 'am': '051', 'aw': '533', 'au': '036', 'at': '040', 'az': '031', 'bs': '044', 'bh': '048', 'bd': '050',
'bb': '052', 'by': '112', 'be': '056', 'bz': '084', 'bj': '204', 'bm': '060', 'bt': '064', 'bo': '068', 'bq': '535',
'ba': '070', 'bw': '072', 'bv': '074', 'br': '076', 'io': '086', 'bn': '096', 'bg': '100', 'bf': '854', 'bi': '108',
'cv': '132', 'kh': '116', 'cm': '120', 'ca': '124', 'ky': '136', 'cf': '140', 'td': '148', 'cl': '152', 'cn': '156',
'cx': '162', 'cc': '166', 'co': '170', 'km': '174', 'cd': '180', 'cg': '178', 'ck': '184', 'cr': '188', 'hr': '191',
'cu': '192', 'cw': '531', 'cy': '196', 'cz': '203', 'ci': '384', 'dk': '208', 'dj': '262', 'dm': '212', 'do': '214',
'ec': '218', 'eg': '818', 'sv': '222', 'gq': '226', 'er': '232', 'ee': '233', 'sz': '748', 'et': '231', 'fk': '238',
'fo': '234', 'fj': '242', 'fi': '246', 'fr': '250', 'gf': '254', 'pf': '258', 'tf': '260', 'ga': '266', 'gm': '270',
'ge': '268', 'de': '276', 'gh': '288', 'gi': '292', 'gr': '300', 'gl': '304', 'gd': '308', 'gp': '312', 'gu': '316',
'gt': '320', 'gg': '831', 'gn': '324', 'gw': '624', 'gy': '328', 'ht': '332', 'hm': '334', 'va': '336', 'hn': '340',
'hk': '344', 'hu': '348', 'is': '352', 'in': '356', 'id': '360', 'ir': '364', 'iq': '368', 'ie': '372', 'im': '833',
'il': '376', 'it': '380', 'jm': '388', 'jp': '392', 'je': '832', 'jo': '400', 'kz': '398', 'ke': '404', 'ki': '296',
'kp': '408', 'kr': '410', 'kw': '414', 'kg': '417', 'la': '418', 'lv': '428', 'lb': '422', 'ls': '426', 'lr': '430',
'ly': '434', 'li': '438', 'lt': '440', 'lu': '442', 'mo': '446', 'mg': '450', 'mw': '454', 'my': '458', 'mv': '462',
'ml': '466', 'mt': '470', 'mh': '584', 'mq': '474', 'mr': '478', 'mu': '480', 'yt': '175', 'mx': '484', 'fm': '583',
'md': '498', 'mc': '492', 'mn': '496', 'me': '499', 'ms': '500', 'ma': '504', 'mz': '508', 'mm': '104', 'na': '516',
'nr': '520', 'np': '524', 'nl': '528', 'nc': '540', 'nz': '554', 'ni': '558', 'ne': '562', 'ng': '566', 'nu': '570',
'nf': '574', 'mp': '580', 'no': '578', 'om': '512', 'pk': '586', 'pw': '585', 'ps': '275', 'pa': '591', 'pg': '598',
'py': '600', 'pe': '604', 'ph': '608', 'pn': '612', 'pl': '616', 'pt': '620', 'pr': '630', 'qa': '634', 'mk': '807',
'ro': '642', 'ru': '643', 'rw': '646', 're': '638', 'bl': '652', 'sh': '654', 'kn': '659', 'lc': '662', 'mf': '663',
'pm': '666', 'vc': '670', 'ws': '882', 'sm': '674', 'st': '678', 'sa': '682', 'sn': '686', 'rs': '688', 'sc': '690',
'sl': '694', 'sg': '702', 'sx': '534', 'sk': '703', 'si': '705', 'sb': '090', 'so': '706', 'za': '710', 'gs': '239',
'ss': '728', 'es': '724', 'lk': '144', 'sd': '729', 'sr': '740', 'sj': '744', 'se': '752', 'ch': '756', 'sy': '760',
'tw': '158', 'tj': '762', 'tz': '834', 'th': '764', 'tl': '626', 'tg': '768', 'tk': '772', 'to': '776', 'tt': '780',
'tn': '788', 'tr': '792', 'tm': '795', 'tc': '796', 'tv': '798', 'ug': '800', 'ua': '804', 'ae': '784', 'gb': '826',
'um': '581', 'us': '840', 'uy': '858', 'uz': '860', 'vu': '548', 've': '862', 'vn': '704', 'vg': '092', 'vi': '850',
'wf': '876', 'eh': '732', 'ye': '887', 'zm': '894', 'zw': '716', 'ax': '248', 'afg': '004', 'alb': '008', 'dza': '012',
'asm': '016', 'and': '020', 'ago': '024', 'aia': '660', 'ata': '010', 'atg': '028', 'arg': '032', 'arm': '051', 'abw': '533',
'aus': '036', 'aut': '040', 'aze': '031', 'bhs': '044', 'bhr': '048', 'bgd': '050', 'brb': '052', 'blr': '112', 'bel': '056',
'blz': '084', 'ben': '204', 'bmu': '060', 'btn': '064', 'bol': '068', 'bes': '535', 'bih': '070', 'bwa': '072', 'bvt': '074',
'bra': '076', 'iot': '086', 'brn': '096', 'bgr': '100', 'bfa': '854', 'bdi': '108', 'cpv': '132', 'khm': '116', 'cmr': '120',
'can': '124', 'cym': '136', 'caf': '140', 'tcd': '148', 'chl': '152', 'chn': '156', 'cxr': '162', 'cck': '166', 'col': '170',
'com': '174', 'cod': '180', 'cog': '178', 'cok': '184', 'cri': '188', 'hrv': '191', 'cub': '192', 'cuw': '531', 'cyp': '196',
'cze': '203', 'civ': '384', 'dnk': '208', 'dji': '262', 'dma': '212', 'dom': '214', 'ecu': '218', 'egy': '818', 'slv': '222',
'gnq': '226', 'eri': '232', 'est': '233', 'swz': '748', 'eth': '231', 'flk': '238', 'fro': '234', 'fji': '242', 'fin': '246',
'fra': '250', 'guf': '254', 'pyf': '258', 'atf': '260', 'gab': '266', 'gmb': '270', 'geo': '268', 'deu': '276', 'gha': '288',
'gib': '292', 'grc': '300', 'grl': '304', 'grd': '308', 'glp': '312', 'gum': '316', 'gtm': '320', 'ggy': '831', 'gin': '324',
'gnb': '624', 'guy': '328', 'hti': '332', 'hmd': '334', 'vat': '336', 'hnd': '340', 'hkg': '344', 'hun': '348', 'isl': '352',
'ind': '356', 'idn': '360', 'irn': '364', 'irq': '368', 'irl': '372', 'imn': '833', 'isr': '376', 'ita': '380', 'jam': '388',
'jpn': '392', 'jey': '832', 'jor': '400', 'kaz': '398', 'ken': '404', 'kir': '296', 'prk': '408', 'kor': '410', 'kwt': '414',
'kgz': '417', 'lao': '418', 'lva': '428', 'lbn': '422', 'lso': '426', 'lbr': '430', 'lby': '434', 'lie': '438', 'ltu': '440',
'lux': '442', 'mac': '446', 'mdg': '450', 'mwi': '454', 'mys': '458', 'mdv': '462', 'mli': '466', 'mlt': '470', 'mhl': '584',
'mtq': '474', 'mrt': '478', 'mus': '480', 'myt': '175', 'mex': '484', 'fsm': '583', 'mda': '498', 'mco': '492', 'mng': '496',
'mne': '499', 'msr': '500', 'mar': '504', 'moz': '508', 'mmr': '104', 'nam': '516', 'nru': '520', 'npl': '524', 'nld': '528',
'ncl': '540', 'nzl': '554', 'nic': '558', 'ner': '562', 'nga': '566', 'niu': '570', 'nfk': '574', 'mnp': '580', 'nor': '578',
'omn': '512', 'pak': '586', 'plw': '585', 'pse': '275', 'pan': '591', 'png': '598', 'pry': '600', 'per': '604', 'phl': '608',
'pcn': '612', 'pol': '616', 'prt': '620', 'pri': '630', 'qat': '634', 'mkd': '807', 'rou': '642', 'rus': '643', 'rwa': '646',
'reu': '638', 'blm': '652', 'shn': '654', 'kna': '659', 'lca': '662', 'maf': '663', 'spm': '666', 'vct': '670', 'wsm': '882',
'smr': '674', 'stp': '678', 'sau': '682', 'sen': '686', 'srb': '688', 'syc': '690', 'sle': '694', 'sgp': '702', 'sxm': '534',
'svk': '703', 'svn': '705', 'slb': '090', 'som': '706', 'zaf': '710', 'sgs': '239', 'ssd': '728', 'esp': '724', 'lka': '144',
'sdn': '729', 'sur': '740', 'sjm': '744', 'swe': '752', 'che': '756', 'syr': '760', 'twn': '158', 'tjk': '762', 'tza': '834',
'tha': '764', 'tls': '626', 'tgo': '768', 'tkl': '772', 'ton': '776', 'tto': '780', 'tun': '788', 'tur': '792', 'tkm': '795',
'tca': '796', 'tuv': '798', 'uga': '800', 'ukr': '804', 'are': '784', 'gbr': '826', 'umi': '581', 'usa': '840', 'ury': '858',
'uzb': '860', 'vut': '548', 'ven': '862', 'vnm': '704', 'vgb': '092', 'vir': '850', 'wlf': '876', 'esh': '732', 'yem': '887',
'zmb': '894', 'zwe': '716', 'ala': '248'
};
// Make result
var result = { locale: locale };
// Set language
if (match[1]) {
const lowercase = match[1].toLowerCase();
const mapped = languageMap[lowercase];
result.language = (mapped ? mapped : lowercase);
}
// Set script
if (match[3]) {
result.script = match[3].toLowerCase();
}
// Set region
if (match[4]) {
const lowercase = match[4].toLowerCase();
const mapped = regionMap[lowercase];
result.region = (mapped ? mapped : lowercase);
} else {
result.region = '001'; // Default to Earth
}
// Handle Chinese that uses regions and not scripts, e.g. zh_CN should use Hans
if (result.language == 'zho' && result.script == null) {
if (['156', '702'].indexOf(result.region) > -1) {
result.script = 'hans'; // China & Singapore
} else if (['158', '344', '446'].indexOf(result.region) > -1) {
result.script = 'hant'; // Taiwan, Hong Kong, & Macao
}
}
// Return result
return result;
}
// Returns the distance of parent/child relationships of M.94 region codes, e.g. Argentina (032) to Latin America (419) is 2.
// 0 will be returned if they are the same, null if there is no direct relationship.
function regionRelationshipDistance(child, parent) {
if (child === parent) {
return 0; // Same region
}
// Map M.49 children to parents
const regionParents = {
'090': '054', '212': '029', '231': '014', '652': '029', '604': '005', '798': '061', '574': '053', '268': '145', '498': '151',
'430': '011', '426': '018', '316': '057', '703': '151', '112': '151', '296': '057', '262': '014', '009': '001', '070': '039',
'234': '154', '060': '021', '015': '002', '807': '039', '780': '029', '818': '015', '740': '005', '540': '054', '499': '039',
'586': '034', '151': '150', '462': '034', '188': '013', '418': '035', '148': '017', '116': '035', '061': '009', '728': '014',
'792': '145', '056': '155', '028': '029', '666': '021', '144': '034', '570': '061', '524': '034', '531': '029', '334': '053',
'270': '011', '832': '830', '414': '145', '292': '039', '528': '155', '202': '002', '581': '057', '584': '057', '654': '011',
'120': '017', '155': '150', '620': '039', '694': '011', '239': '005', '036': '053', '150': '001', '054': '009', '716': '014',
'064': '034', '764': '035', '233': '154', '466': '011', '180': '017', '704': '035', '882': '061', '140': '017', '048': '145',
'024': '017', '340': '013', '682': '145', '092': '029', '035': '142', '744': '154', '005': '419', '004': '034', '356': '034',
'422': '145', '008': '039', '830': '154', '500': '029', '057': '009', '324': '011', '214': '029', '336': '039', '312': '029',
'050': '034', '408': '030', '072': '018', '039': '150', '663': '029', '548': '054', '670': '029', '376': '145', '678': '017',
'204': '011', '021': '019', '029': '419', '608': '035', '470': '039', '478': '011', '136': '029', '192': '029', '512': '145',
'388': '029', '196': '145', '016': '061', '084': '013', '583': '057', '585': '057', '706': '014', '124': '021', '308': '029',
'686': '011', '002': '001', '100': '151', '154': '150', '578': '154', '690': '014', '248': '154', '800': '014', '300': '039',
'156': '030', '533': '029', '226': '017', '854': '011', '051': '145', '104': '035', '534': '029', '208': '154', '152': '005',
'634': '145', '702': '035', '068': '005', '558': '013', '276': '155', '566': '011', '762': '143', '626': '035', '170': '005',
'364': '034', '368': '145', '674': '039', '440': '154', '535': '029', '804': '151', '484': '013', '840': '021', '019': '001',
'850': '029', '031': '145', '562': '011', '630': '029', '752': '154', '020': '039', '894': '014', '012': '015', '030': '142',
'710': '018', '504': '015', '360': '035', '646': '014', '428': '154', '238': '005', '616': '151', '492': '155', '887': '145',
'705': '039', '434': '015', '258': '061', '458': '035', '010': '001', '218': '005', '480': '014', '660': '029', '288': '011',
'174': '014', '348': '151', '328': '005', '142': '001', '496': '030', '772': '061', '729': '015', '380': '039', '417': '143',
'834': '014', '724': '039', '260': '014', '474': '029', '040': '155', '662': '029', '858': '005', '011': '202', '013': '419',
'242': '054', '304': '021', '145': '142', '598': '054', '826': '154', '132': '011', '398': '143', '017': '202', '600': '005',
'254': '005', '352': '154', '203': '151', '410': '030', '344': '030', '014': '202', '108': '014', '796': '029', '320': '013',
'756': '155', '096': '035', '018': '202', '788': '015', '246': '154', '052': '029', '250': '155', '143': '142', '044': '029',
'688': '039', '184': '061', '053': '009', '454': '014', '419': '019', '191': '039', '438': '155', '748': '018', '516': '018',
'232': '014', '175': '014', '520': '057', '178': '017', '795': '143', '332': '029', '222': '013', '384': '011', '624': '011',
'086': '014', '776': '061', '032': '005', '831': '830', '275': '145', '400': '145', '860': '143', '508': '014', '442': '155',
'612': '061', '266': '017', '162': '053', '446': '030', '862': '005', '760': '145', '580': '057', '643': '151', '591': '013',
'076': '005', '768': '011', '074': '005', '166': '053', '450': '014', '638': '014', '642': '151', '732': '015', '784': '145',
'392': '030', '554': '053', '372': '154', '659': '029', '680': '830', '876': '061', '034': '142', '404': '014', '833': '154'
};
var distance = 0;
var code = child;
while (temp = regionParents[code]) {
code = temp;
distance += 1;
if (code === parent) {
return distance;
}
}
return null; // No relationship
}
// Parse available locales
const parsedAvailableLocales = availableLocales.map(function (locale) {
return parseLocale(locale);
});
// Parse preferences
const parsedPreferences = preferences.map(function (locale) {
return parseLocale(locale)
});
// Attempt to find a match for each preference until one is found
for (var i = 0, l = parsedPreferences.length; i < l; i++) {
const preference = parsedPreferences[i];
// Languages must match and script must match or be undefined
const languageLocales = parsedAvailableLocales.filter(function (locale) {
return locale.language === preference.language && (locale.script === undefined || preference.script === undefined || locale.script === preference.script);
});
// Sort results
const sortedLanguageLocales = languageLocales.sort(function (a, b) {
// Prefer matching script over none
const script = preference.script;
if (a.script === script && b.script !== script) {
return -1;
} else if (a.script !== script && b.script === script) {
return 1;
}
// Prefer closer region relationships
const distanceA = regionRelationshipDistance(preference.region, a.region);
const distanceB = regionRelationshipDistance(preference.region, b.region);
if (distanceA !== null && distanceB !== null) {
return distanceA - distanceB;
} else if (distanceA !== null) {
return -1;
} else if (distanceB !== null) {
return 1;
}
// Prefer higher level regions
const regionLevelA = regionRelationshipDistance(a.region, '001');
const regionLevelB = regionRelationshipDistance(b.region, '001');
return regionLevelA - regionLevelB;
});
// Return the best matched region
if (sortedLanguageLocales.length > 0) {
return sortedLanguageLocales[0].locale;
}
}
return defaultLocale; // No matches
}
console.log(preferredLocale(['es-419', 'en'], ['es', 'en'], 'en')); // 'es'
console.log(preferredLocale(['pt-Latn-PT', 'en', 'es', 'zh-Hans'], ['es', 'en', 'pt-Cyrl-PT', 'pt-PT', 'pt-Latn-BR'], 'en')); // 'pt-Latn-BR'
console.log(preferredLocale(['zh-SG', 'pt-Latn-BR', 'en', 'es'], ['es', 'en', 'pt-PT', 'pt-Cyrl-BR', 'pt-Latn-PT', 'zh-Hant', 'zh-Hans'], 'en')); // 'zh-Hans'
console.log(preferredLocale(['pt-Latn-BR', 'en', 'es'], ['es', 'en', 'pt-Cyrl-BR', 'zh-Hant', 'zh-Hans'], 'es')); // 'en'
console.log(preferredLocale(['es_Latn_MX'], ['es_MX', 'es'], 'en')); // 'es_MX'
console.log(preferredLocale(['i-navajo', 'en-US'], ['en-US', 'en-UK', 'es-419', 'nv'], 'en')); // 'nv'
<?php
class LocalePicker
{
// Parses the BCP 47 locale into a ISO-639-3 language, ISO-15924 script, and M.49 region.
private static function parseLocale($locale, &$language, &$script, &$region)
{
// Map grandfathered language identifiers to ISO-639
$grandfathered_map = array(
'art-lojban' => 'jbo', 'i-ami' => 'ami', 'i-bnn' => 'bnn', 'i-hak' => 'hak', 'i-klingon' => 'tlh', 'i-lux' => 'lb', 'i-navajo' => 'nv',
'i-pwn' => 'pwn', 'i-tao' => 'tao', 'i-tay' => 'tay', 'i-tsu' => 'tsu', 'no-bok' => 'nb', 'no-nyn' => 'nn', 'sgn-BE-FR' => 'sfb',
'sgn-BE-NL' => 'vgt', 'sgn-CH-DE' => 'sgg', 'zh-guoyu' => 'zh', 'zh-hakka' => 'hak', 'zh-min-nan' => 'nan', 'zh-xiang' => 'hsn'
);
// Replace grandfathered identifiers
if (array_key_exists($locale, $grandfathered_map)) {
$locale = $grandfathered_map[$locale];
}
// BCP 47 regex
$pattern = "/^(((?'language'(([a-z]{2,3})((-|_)(?'extlang'[a-z]{3})){0,3})|([a-z]{4})|([a-z]{5,8}))((-|_)(?'script'[a-z]{4}))?((-|_)(?'region'[a-z]{2}|[0-9]{3}))?((-|_)(?'variant'[a-z0-9]{5,8}|[0-9][a-z0-9]{3}))*((-|_)(?'extensions'[a-z0-9-[x]](-[a-z0-9]{2,8})+))*(-x((-|_)(?'privateuse1'[a-z0-9]{1,8}))+)?)|(x((-|_)(?'privateuse'[a-z0-9]{1,8}))+)|(?'grandfathered'(?'irregular'en-GB-oed|i-ami|i-bnn|i-default|i-enochian|i-hak|i-klingon|i-lux|i-mingo|i-navajo|i-pwn|i-tao|i-tay|i-tsu|sgn-BE-FR|sgn-BE-NL|sgn-CH-DE)|(?'regular'art-lojban|cel-gaulish|no-bok|no-nyn|zh-guoyu|zh-hakka|zh-min|zh-min-nan|zh-xiang)))$/i";
preg_match($pattern, $locale, $match);
// Map ISO-639 parts 1, 2b, & 2t to part 3 codes
$language_map = array(
'aa' => 'aar', 'ab' => 'abk', 'ae' => 'ave', 'af' => 'afr', 'ak' => 'aka', 'alb' => 'sqi', 'am' => 'amh', 'an' => 'arg', 'ar' => 'ara',
'arm' => 'hye', 'as' => 'asm', 'av' => 'ava', 'ay' => 'aym', 'az' => 'aze', 'ba' => 'bak', 'baq' => 'eus', 'be' => 'bel', 'bg' => 'bul',
'bi' => 'bis', 'bm' => 'bam', 'bn' => 'ben', 'bo' => 'bod', 'br' => 'bre', 'bs' => 'bos', 'bur' => 'mya', 'ca' => 'cat', 'ce' => 'che',
'ch' => 'cha', 'chi' => 'zho', 'co' => 'cos', 'cr' => 'cre', 'cs' => 'ces', 'cu' => 'chu', 'cv' => 'chv', 'cy' => 'cym', 'cze' => 'ces',
'da' => 'dan', 'de' => 'deu', 'dut' => 'nld', 'dv' => 'div', 'dz' => 'dzo', 'ee' => 'ewe', 'el' => 'ell', 'en' => 'eng', 'eo' => 'epo',
'es' => 'spa', 'et' => 'est', 'eu' => 'eus', 'fa' => 'fas', 'ff' => 'ful', 'fi' => 'fin', 'fj' => 'fij', 'fo' => 'fao', 'fr' => 'fra',
'fre' => 'fra', 'fy' => 'fry', 'ga' => 'gle', 'gd' => 'gla', 'geo' => 'kat', 'ger' => 'deu', 'gl' => 'glg', 'gn' => 'grn', 'gre' => 'ell',
'gu' => 'guj', 'gv' => 'glv', 'ha' => 'hau', 'he' => 'heb', 'hi' => 'hin', 'ho' => 'hmo', 'hr' => 'hrv', 'ht' => 'hat', 'hu' => 'hun',
'hy' => 'hye', 'hz' => 'her', 'ia' => 'ina', 'ice' => 'isl', 'id' => 'ind', 'ie' => 'ile', 'ig' => 'ibo', 'ii' => 'iii', 'ik' => 'ipk',
'io' => 'ido', 'is' => 'isl', 'it' => 'ita', 'iu' => 'iku', 'ja' => 'jpn', 'jv' => 'jav', 'ka' => 'kat', 'kg' => 'kon', 'ki' => 'kik',
'kj' => 'kua', 'kk' => 'kaz', 'kl' => 'kal', 'km' => 'khm', 'kn' => 'kan', 'ko' => 'kor', 'kr' => 'kau', 'ks' => 'kas', 'ku' => 'kur',
'kv' => 'kom', 'kw' => 'cor', 'ky' => 'kir', 'la' => 'lat', 'lb' => 'ltz', 'lg' => 'lug', 'li' => 'lim', 'ln' => 'lin', 'lo' => 'lao',
'lt' => 'lit', 'lu' => 'lub', 'lv' => 'lav', 'mac' => 'mkd', 'mao' => 'mri', 'may' => 'msa', 'mg' => 'mlg', 'mh' => 'mah', 'mi' => 'mri',
'mk' => 'mkd', 'ml' => 'mal', 'mn' => 'mon', 'mr' => 'mar', 'ms' => 'msa', 'mt' => 'mlt', 'my' => 'mya', 'na' => 'nau', 'nb' => 'nob',
'nd' => 'nde', 'ne' => 'nep', 'ng' => 'ndo', 'nl' => 'nld', 'nn' => 'nno', 'no' => 'nor', 'nr' => 'nbl', 'nv' => 'nav', 'ny' => 'nya',
'oc' => 'oci', 'oj' => 'oji', 'om' => 'orm', 'or' => 'ori', 'os' => 'oss', 'pa' => 'pan', 'per' => 'fas', 'pi' => 'pli', 'pl' => 'pol',
'ps' => 'pus', 'pt' => 'por', 'qu' => 'que', 'rm' => 'roh', 'rn' => 'run', 'ro' => 'ron', 'ru' => 'rus', 'rum' => 'ron', 'rw' => 'kin',
'sa' => 'san', 'sc' => 'srd', 'sd' => 'snd', 'se' => 'sme', 'sg' => 'sag', 'sh' => 'hbs', 'si' => 'sin', 'sk' => 'slk', 'sl' => 'slv',
'slo' => 'slk', 'sm' => 'smo', 'sn' => 'sna', 'so' => 'som', 'sq' => 'sqi', 'sr' => 'srp', 'ss' => 'ssw', 'st' => 'sot', 'su' => 'sun',
'sv' => 'swe', 'sw' => 'swa', 'ta' => 'tam', 'te' => 'tel', 'tg' => 'tgk', 'th' => 'tha', 'ti' => 'tir', 'tib' => 'bod', 'tk' => 'tuk',
'tl' => 'tgl', 'tn' => 'tsn', 'to' => 'ton', 'tr' => 'tur', 'ts' => 'tso', 'tt' => 'tat', 'tw' => 'twi', 'ty' => 'tah', 'ug' => 'uig',
'uk' => 'ukr', 'ur' => 'urd', 'uz' => 'uzb', 've' => 'ven', 'vi' => 'vie', 'vo' => 'vol', 'wa' => 'wln', 'wel' => 'cym', 'wo' => 'wol',
'xh' => 'xho', 'yi' => 'yid', 'yo' => 'yor', 'za' => 'zha', 'zh' => 'zho', 'zu' => 'zul'
);
// Map ISO 3166-1 to UN M.49
$region_map = array(
'af' => '004', 'al' => '008', 'dz' => '012', 'as' => '016', 'ad' => '020', 'ao' => '024', 'ai' => '660', 'aq' => '010', 'ag' => '028',
'ar' => '032', 'am' => '051', 'aw' => '533', 'au' => '036', 'at' => '040', 'az' => '031', 'bs' => '044', 'bh' => '048', 'bd' => '050',
'bb' => '052', 'by' => '112', 'be' => '056', 'bz' => '084', 'bj' => '204', 'bm' => '060', 'bt' => '064', 'bo' => '068', 'bq' => '535',
'ba' => '070', 'bw' => '072', 'bv' => '074', 'br' => '076', 'io' => '086', 'bn' => '096', 'bg' => '100', 'bf' => '854', 'bi' => '108',
'cv' => '132', 'kh' => '116', 'cm' => '120', 'ca' => '124', 'ky' => '136', 'cf' => '140', 'td' => '148', 'cl' => '152', 'cn' => '156',
'cx' => '162', 'cc' => '166', 'co' => '170', 'km' => '174', 'cd' => '180', 'cg' => '178', 'ck' => '184', 'cr' => '188', 'hr' => '191',
'cu' => '192', 'cw' => '531', 'cy' => '196', 'cz' => '203', 'ci' => '384', 'dk' => '208', 'dj' => '262', 'dm' => '212', 'do' => '214',
'ec' => '218', 'eg' => '818', 'sv' => '222', 'gq' => '226', 'er' => '232', 'ee' => '233', 'sz' => '748', 'et' => '231', 'fk' => '238',
'fo' => '234', 'fj' => '242', 'fi' => '246', 'fr' => '250', 'gf' => '254', 'pf' => '258', 'tf' => '260', 'ga' => '266', 'gm' => '270',
'ge' => '268', 'de' => '276', 'gh' => '288', 'gi' => '292', 'gr' => '300', 'gl' => '304', 'gd' => '308', 'gp' => '312', 'gu' => '316',
'gt' => '320', 'gg' => '831', 'gn' => '324', 'gw' => '624', 'gy' => '328', 'ht' => '332', 'hm' => '334', 'va' => '336', 'hn' => '340',
'hk' => '344', 'hu' => '348', 'is' => '352', 'in' => '356', 'id' => '360', 'ir' => '364', 'iq' => '368', 'ie' => '372', 'im' => '833',
'il' => '376', 'it' => '380', 'jm' => '388', 'jp' => '392', 'je' => '832', 'jo' => '400', 'kz' => '398', 'ke' => '404', 'ki' => '296',
'kp' => '408', 'kr' => '410', 'kw' => '414', 'kg' => '417', 'la' => '418', 'lv' => '428', 'lb' => '422', 'ls' => '426', 'lr' => '430',
'ly' => '434', 'li' => '438', 'lt' => '440', 'lu' => '442', 'mo' => '446', 'mg' => '450', 'mw' => '454', 'my' => '458', 'mv' => '462',
'ml' => '466', 'mt' => '470', 'mh' => '584', 'mq' => '474', 'mr' => '478', 'mu' => '480', 'yt' => '175', 'mx' => '484', 'fm' => '583',
'md' => '498', 'mc' => '492', 'mn' => '496', 'me' => '499', 'ms' => '500', 'ma' => '504', 'mz' => '508', 'mm' => '104', 'na' => '516',
'nr' => '520', 'np' => '524', 'nl' => '528', 'nc' => '540', 'nz' => '554', 'ni' => '558', 'ne' => '562', 'ng' => '566', 'nu' => '570',
'nf' => '574', 'mp' => '580', 'no' => '578', 'om' => '512', 'pk' => '586', 'pw' => '585', 'ps' => '275', 'pa' => '591', 'pg' => '598',
'py' => '600', 'pe' => '604', 'ph' => '608', 'pn' => '612', 'pl' => '616', 'pt' => '620', 'pr' => '630', 'qa' => '634', 'mk' => '807',
'ro' => '642', 'ru' => '643', 'rw' => '646', 're' => '638', 'bl' => '652', 'sh' => '654', 'kn' => '659', 'lc' => '662', 'mf' => '663',
'pm' => '666', 'vc' => '670', 'ws' => '882', 'sm' => '674', 'st' => '678', 'sa' => '682', 'sn' => '686', 'rs' => '688', 'sc' => '690',
'sl' => '694', 'sg' => '702', 'sx' => '534', 'sk' => '703', 'si' => '705', 'sb' => '090', 'so' => '706', 'za' => '710', 'gs' => '239',
'ss' => '728', 'es' => '724', 'lk' => '144', 'sd' => '729', 'sr' => '740', 'sj' => '744', 'se' => '752', 'ch' => '756', 'sy' => '760',
'tw' => '158', 'tj' => '762', 'tz' => '834', 'th' => '764', 'tl' => '626', 'tg' => '768', 'tk' => '772', 'to' => '776', 'tt' => '780',
'tn' => '788', 'tr' => '792', 'tm' => '795', 'tc' => '796', 'tv' => '798', 'ug' => '800', 'ua' => '804', 'ae' => '784', 'gb' => '826',
'um' => '581', 'us' => '840', 'uy' => '858', 'uz' => '860', 'vu' => '548', 've' => '862', 'vn' => '704', 'vg' => '092', 'vi' => '850',
'wf' => '876', 'eh' => '732', 'ye' => '887', 'zm' => '894', 'zw' => '716', 'ax' => '248', 'afg' => '004', 'alb' => '008', 'dza' => '012',
'asm' => '016', 'and' => '020', 'ago' => '024', 'aia' => '660', 'ata' => '010', 'atg' => '028', 'arg' => '032', 'arm' => '051', 'abw' => '533',
'aus' => '036', 'aut' => '040', 'aze' => '031', 'bhs' => '044', 'bhr' => '048', 'bgd' => '050', 'brb' => '052', 'blr' => '112', 'bel' => '056',
'blz' => '084', 'ben' => '204', 'bmu' => '060', 'btn' => '064', 'bol' => '068', 'bes' => '535', 'bih' => '070', 'bwa' => '072', 'bvt' => '074',
'bra' => '076', 'iot' => '086', 'brn' => '096', 'bgr' => '100', 'bfa' => '854', 'bdi' => '108', 'cpv' => '132', 'khm' => '116', 'cmr' => '120',
'can' => '124', 'cym' => '136', 'caf' => '140', 'tcd' => '148', 'chl' => '152', 'chn' => '156', 'cxr' => '162', 'cck' => '166', 'col' => '170',
'com' => '174', 'cod' => '180', 'cog' => '178', 'cok' => '184', 'cri' => '188', 'hrv' => '191', 'cub' => '192', 'cuw' => '531', 'cyp' => '196',
'cze' => '203', 'civ' => '384', 'dnk' => '208', 'dji' => '262', 'dma' => '212', 'dom' => '214', 'ecu' => '218', 'egy' => '818', 'slv' => '222',
'gnq' => '226', 'eri' => '232', 'est' => '233', 'swz' => '748', 'eth' => '231', 'flk' => '238', 'fro' => '234', 'fji' => '242', 'fin' => '246',
'fra' => '250', 'guf' => '254', 'pyf' => '258', 'atf' => '260', 'gab' => '266', 'gmb' => '270', 'geo' => '268', 'deu' => '276', 'gha' => '288',
'gib' => '292', 'grc' => '300', 'grl' => '304', 'grd' => '308', 'glp' => '312', 'gum' => '316', 'gtm' => '320', 'ggy' => '831', 'gin' => '324',
'gnb' => '624', 'guy' => '328', 'hti' => '332', 'hmd' => '334', 'vat' => '336', 'hnd' => '340', 'hkg' => '344', 'hun' => '348', 'isl' => '352',
'ind' => '356', 'idn' => '360', 'irn' => '364', 'irq' => '368', 'irl' => '372', 'imn' => '833', 'isr' => '376', 'ita' => '380', 'jam' => '388',
'jpn' => '392', 'jey' => '832', 'jor' => '400', 'kaz' => '398', 'ken' => '404', 'kir' => '296', 'prk' => '408', 'kor' => '410', 'kwt' => '414',
'kgz' => '417', 'lao' => '418', 'lva' => '428', 'lbn' => '422', 'lso' => '426', 'lbr' => '430', 'lby' => '434', 'lie' => '438', 'ltu' => '440',
'lux' => '442', 'mac' => '446', 'mdg' => '450', 'mwi' => '454', 'mys' => '458', 'mdv' => '462', 'mli' => '466', 'mlt' => '470', 'mhl' => '584',
'mtq' => '474', 'mrt' => '478', 'mus' => '480', 'myt' => '175', 'mex' => '484', 'fsm' => '583', 'mda' => '498', 'mco' => '492', 'mng' => '496',
'mne' => '499', 'msr' => '500', 'mar' => '504', 'moz' => '508', 'mmr' => '104', 'nam' => '516', 'nru' => '520', 'npl' => '524', 'nld' => '528',
'ncl' => '540', 'nzl' => '554', 'nic' => '558', 'ner' => '562', 'nga' => '566', 'niu' => '570', 'nfk' => '574', 'mnp' => '580', 'nor' => '578',
'omn' => '512', 'pak' => '586', 'plw' => '585', 'pse' => '275', 'pan' => '591', 'png' => '598', 'pry' => '600', 'per' => '604', 'phl' => '608',
'pcn' => '612', 'pol' => '616', 'prt' => '620', 'pri' => '630', 'qat' => '634', 'mkd' => '807', 'rou' => '642', 'rus' => '643', 'rwa' => '646',
'reu' => '638', 'blm' => '652', 'shn' => '654', 'kna' => '659', 'lca' => '662', 'maf' => '663', 'spm' => '666', 'vct' => '670', 'wsm' => '882',
'smr' => '674', 'stp' => '678', 'sau' => '682', 'sen' => '686', 'srb' => '688', 'syc' => '690', 'sle' => '694', 'sgp' => '702', 'sxm' => '534',
'svk' => '703', 'svn' => '705', 'slb' => '090', 'som' => '706', 'zaf' => '710', 'sgs' => '239', 'ssd' => '728', 'esp' => '724', 'lka' => '144',
'sdn' => '729', 'sur' => '740', 'sjm' => '744', 'swe' => '752', 'che' => '756', 'syr' => '760', 'twn' => '158', 'tjk' => '762', 'tza' => '834',
'tha' => '764', 'tls' => '626', 'tgo' => '768', 'tkl' => '772', 'ton' => '776', 'tto' => '780', 'tun' => '788', 'tur' => '792', 'tkm' => '795',
'tca' => '796', 'tuv' => '798', 'uga' => '800', 'ukr' => '804', 'are' => '784', 'gbr' => '826', 'umi' => '581', 'usa' => '840', 'ury' => '858',
'uzb' => '860', 'vut' => '548', 'ven' => '862', 'vnm' => '704', 'vgb' => '092', 'vir' => '850', 'wlf' => '876', 'esh' => '732', 'yem' => '887',
'zmb' => '894', 'zwe' => '716', 'ala' => '248'
);
// Set variables
if (isset($match['language'][0])) {
$lowercase = strtolower($match['language']);
$mapped = $language_map[$lowercase];
$language = ($mapped ? $mapped : $lowercase);
}
if (isset($match['script'][0])) {
$script = strtolower($match['script']);
}
if (isset($match['region'][0])) {
$lowercase = strtolower($match['region']);
$mapped = $region_map[$lowercase];
$region = ($mapped ? $mapped : $lowercase);
} else {
$region = '001'; // Default to Earth
}
// Handle Chinese that uses regions and not scripts, e.g. zh_CN should use Hans
if ($language == 'zho' && $script == null && $region != null) {
if (in_array($region, array('156', '702'))) {
$script = 'hans'; // China & Singapore
} else if (in_array($region, array('158', '344', '446'))) {
$script = 'hant'; // Taiwan, Hong Kong, & Macao
}
}
}
// Returns the distance of parent/child relationships of M.94 region codes, e.g. Argentina (032) to Latin America (419) is 2.
// 0 will be returned if they are the same, null if there is no direct relationship.
private static function regionRelationshipDistance($child, $parent)
{
if ($child == $parent) {
return 0; // Same region
}
$region_parents = array(
'090' => '054', '212' => '029', '231' => '014', '652' => '029', '604' => '005', '798' => '061', '574' => '053', '268' => '145', '498' => '151',
'430' => '011', '426' => '018', '316' => '057', '703' => '151', '112' => '151', '296' => '057', '262' => '014', '009' => '001', '070' => '039',
'234' => '154', '060' => '021', '015' => '002', '807' => '039', '780' => '029', '818' => '015', '740' => '005', '540' => '054', '499' => '039',
'586' => '034', '151' => '150', '462' => '034', '188' => '013', '418' => '035', '148' => '017', '116' => '035', '061' => '009', '728' => '014',
'792' => '145', '056' => '155', '028' => '029', '666' => '021', '144' => '034', '570' => '061', '524' => '034', '531' => '029', '334' => '053',
'270' => '011', '832' => '830', '414' => '145', '292' => '039', '528' => '155', '202' => '002', '581' => '057', '584' => '057', '654' => '011',
'120' => '017', '155' => '150', '620' => '039', '694' => '011', '239' => '005', '036' => '053', '150' => '001', '054' => '009', '716' => '014',
'064' => '034', '764' => '035', '233' => '154', '466' => '011', '180' => '017', '704' => '035', '882' => '061', '140' => '017', '048' => '145',
'024' => '017', '340' => '013', '682' => '145', '092' => '029', '035' => '142', '744' => '154', '005' => '419', '004' => '034', '356' => '034',
'422' => '145', '008' => '039', '830' => '154', '500' => '029', '057' => '009', '324' => '011', '214' => '029', '336' => '039', '312' => '029',
'050' => '034', '408' => '030', '072' => '018', '039' => '150', '663' => '029', '548' => '054', '670' => '029', '376' => '145', '678' => '017',
'204' => '011', '021' => '019', '029' => '419', '608' => '035', '470' => '039', '478' => '011', '136' => '029', '192' => '029', '512' => '145',
'388' => '029', '196' => '145', '016' => '061', '084' => '013', '583' => '057', '585' => '057', '706' => '014', '124' => '021', '308' => '029',
'686' => '011', '002' => '001', '100' => '151', '154' => '150', '578' => '154', '690' => '014', '248' => '154', '800' => '014', '300' => '039',
'156' => '030', '533' => '029', '226' => '017', '854' => '011', '051' => '145', '104' => '035', '534' => '029', '208' => '154', '152' => '005',
'634' => '145', '702' => '035', '068' => '005', '558' => '013', '276' => '155', '566' => '011', '762' => '143', '626' => '035', '170' => '005',
'364' => '034', '368' => '145', '674' => '039', '440' => '154', '535' => '029', '804' => '151', '484' => '013', '840' => '021', '019' => '001',
'850' => '029', '031' => '145', '562' => '011', '630' => '029', '752' => '154', '020' => '039', '894' => '014', '012' => '015', '030' => '142',
'710' => '018', '504' => '015', '360' => '035', '646' => '014', '428' => '154', '238' => '005', '616' => '151', '492' => '155', '887' => '145',
'705' => '039', '434' => '015', '258' => '061', '458' => '035', '010' => '001', '218' => '005', '480' => '014', '660' => '029', '288' => '011',
'174' => '014', '348' => '151', '328' => '005', '142' => '001', '496' => '030', '772' => '061', '729' => '015', '380' => '039', '417' => '143',
'834' => '014', '724' => '039', '260' => '014', '474' => '029', '040' => '155', '662' => '029', '858' => '005', '011' => '202', '013' => '419',
'242' => '054', '304' => '021', '145' => '142', '598' => '054', '826' => '154', '132' => '011', '398' => '143', '017' => '202', '600' => '005',
'254' => '005', '352' => '154', '203' => '151', '410' => '030', '344' => '030', '014' => '202', '108' => '014', '796' => '029', '320' => '013',
'756' => '155', '096' => '035', '018' => '202', '788' => '015', '246' => '154', '052' => '029', '250' => '155', '143' => '142', '044' => '029',
'688' => '039', '184' => '061', '053' => '009', '454' => '014', '419' => '019', '191' => '039', '438' => '155', '748' => '018', '516' => '018',
'232' => '014', '175' => '014', '520' => '057', '178' => '017', '795' => '143', '332' => '029', '222' => '013', '384' => '011', '624' => '011',
'086' => '014', '776' => '061', '032' => '005', '831' => '830', '275' => '145', '400' => '145', '860' => '143', '508' => '014', '442' => '155',
'612' => '061', '266' => '017', '162' => '053', '446' => '030', '862' => '005', '760' => '145', '580' => '057', '643' => '151', '591' => '013',
'076' => '005', '768' => '011', '074' => '005', '166' => '053', '450' => '014', '638' => '014', '642' => '151', '732' => '015', '784' => '145',
'392' => '030', '554' => '053', '372' => '154', '659' => '029', '680' => '830', '876' => '061', '034' => '142', '404' => '014', '833' => '154'
);
$distance = 0;
$code = $child;
while ($temp = $region_parents[$code]) {
$code = $temp;
$distance += 1;
if ($code === $parent) {
return $distance;
}
}
return null; // No relationship
}
// Finds the best locale from available locales based on the user's preferences. Falls back on the default locale if none is found.
public static function preferredLocale($preferences, $available_locales, $default_locale)
{
// Parse available locales
array_walk($available_locales, function (&$locale) {
LocalePicker::parseLocale($locale, $language, $script, $region);
$locale = array($locale, $language, $script, $region);
});
// Parse preferences
array_walk($preferences, function (&$locale) {
LocalePicker::parseLocale($locale, $language, $script, $region);
$locale = array($locale, $language, $script, $region);
});
// Attempt to find a match for each preference until one is found
foreach ($preferences as $preference) {
$language_locales = $available_locales;
// Languages must match and script must match or be null
$language_locales = array_filter($language_locales, function($locale) use ($preference) {
return $locale[1] === $preference[1] && ($locale[2] === null || $preference[2] === null || $locale[2] === $preference[2]);
});
// Sort results
usort($language_locales, function($a, $b) use ($preference) {
// Prefer matching script over none
$script = $preference[2];
if ($a[2] === $script && $b[2] !== $script) {
return -1;
} else if ($a[2] !== $script && $b[2] === $script) {
return 1;
}
// Prefer closer region relationships
$distanceA = LocalePicker::regionRelationshipDistance($preference[3], $a[3]);
$distanceB = LocalePicker::regionRelationshipDistance($preference[3], $b[3]);
if ($distanceA !== null && $distanceB !== null) {
return $distanceA - $distanceB;
} else if ($distanceA !== null) {
return -1;
} else if ($distanceB !== null) {
return 1;
}
// Prefer higher level regions
$regionLevelA = LocalePicker::regionRelationshipDistance($a[3], '001');
$regionLevelB = LocalePicker::regionRelationshipDistance($b[3], '001');
return $regionLevelA - $regionLevelB;
});
// Return the best matched region
if (isset($language_locales[0])) {
return $language_locales[0][0];
}
}
return $default_locale; // No matches
}
}
echo LocalePicker::preferredLocale(array('es-419', 'en'), array('es', 'en'), 'en') ."\n"; // 'es'
echo LocalePicker::preferredLocale(array('pt-Latn-PT', 'en', 'es', 'zh-Hans'), array('es', 'en', 'pt-Cyrl-PT', 'pt-PT', 'pt-Latn-BR'), 'en') ."\n"; // 'pt-Latn-BR'
echo LocalePicker::preferredLocale(array('zh-SG', 'pt-Latn-BR', 'en', 'es'), array('es', 'en', 'pt-PT', 'pt-Cyrl-BR', 'pt-Latn-PT', 'zh-Hant', 'zh-Hans'), 'en') ."\n"; // 'zh-Hans'
echo LocalePicker::preferredLocale(array('pt-Latn-BR', 'en', 'es'), array('es', 'en', 'pt-Cyrl-BR', 'zh-Hant', 'zh-Hans'), 'es') ."\n"; // 'en'
echo LocalePicker::preferredLocale(array('es_Latn_MX'), array('es_MX', 'es'), 'en') ."\n"; // 'es_MX'
echo LocalePicker::preferredLocale(array('i-navajo', 'en-US'), array('en-US', 'en-UK', 'es-419', 'nv'), 'en') ."\n"; // 'nv'
import re
class LocalePicker:
# BCP 47 regex
REGEX = re.compile("^(((?P<language>(([a-z]{2,3})((-|_)(?P<extlang>[a-z]{3})){0,3})|([a-z]{4})|([a-z]{5,8}))((-|_)(?P<script>[a-z]{4}))?((-|_)(?P<region>[a-z]{2}|[0-9]{3}))?((-|_)(?P<variant>[a-z0-9]{5,8}|[0-9][a-z0-9]{3}))*((-|_)(?P<extensions>[a-z0-9-[x]](-[a-z0-9]{2,8})+))*(-x((-|_)(?P<privateuse1>[a-z0-9]{1,8}))+)?)|(x((-|_)(?P<privateuse>[a-z0-9]{1,8}))+)|(?P<grandfathered>(?P<irregular>en-GB-oed|i-ami|i-bnn|i-default|i-enochian|i-hak|i-klingon|i-lux|i-mingo|i-navajo|i-pwn|i-tao|i-tay|i-tsu|sgn-BE-FR|sgn-BE-NL|sgn-CH-DE)|(?P<regular>art-lojban|cel-gaulish|no-bok|no-nyn|zh-guoyu|zh-hakka|zh-min|zh-min-nan|zh-xiang)))$", re.IGNORECASE)
# Parses the IETF BCP 47 locale into a ISO-639-3 language, ISO-15924 script, and UN M.49 region.
@staticmethod
def __parse_locale(locale):
# Map grandfathered language identifiers to ISO-639
grandfathered_map = {
'art-lojban': 'jbo', 'i-ami': 'ami', 'i-bnn': 'bnn', 'i-hak': 'hak', 'i-klingon': 'tlh', 'i-lux': 'lb', 'i-navajo': 'nv',
'i-pwn': 'pwn', 'i-tao': 'tao', 'i-tay': 'tay', 'i-tsu': 'tsu', 'no-bok': 'nb', 'no-nyn': 'nn', 'sgn-BE-FR': 'sfb',
'sgn-BE-NL': 'vgt', 'sgn-CH-DE': 'sgg', 'zh-guoyu': 'zh', 'zh-hakka': 'hak', 'zh-min-nan': 'nan', 'zh-xiang': 'hsn'
}
# Replace grandfathered identifiers
if locale in grandfathered_map:
locale = grandfathered_map[locale]
match = LocalePicker.REGEX.match(locale)
# Map ISO-639 parts 1, 2b, & 2t to part 3 codes
language_map = {
'aa': 'aar', 'ab': 'abk', 'ae': 'ave', 'af': 'afr', 'ak': 'aka', 'alb': 'sqi', 'am': 'amh', 'an': 'arg', 'ar': 'ara',
'arm': 'hye', 'as': 'asm', 'av': 'ava', 'ay': 'aym', 'az': 'aze', 'ba': 'bak', 'baq': 'eus', 'be': 'bel', 'bg': 'bul',
'bi': 'bis', 'bm': 'bam', 'bn': 'ben', 'bo': 'bod', 'br': 'bre', 'bs': 'bos', 'bur': 'mya', 'ca': 'cat', 'ce': 'che',
'ch': 'cha', 'chi': 'zho', 'co': 'cos', 'cr': 'cre', 'cs': 'ces', 'cu': 'chu', 'cv': 'chv', 'cy': 'cym', 'cze': 'ces',
'da': 'dan', 'de': 'deu', 'dut': 'nld', 'dv': 'div', 'dz': 'dzo', 'ee': 'ewe', 'el': 'ell', 'en': 'eng', 'eo': 'epo',
'es': 'spa', 'et': 'est', 'eu': 'eus', 'fa': 'fas', 'ff': 'ful', 'fi': 'fin', 'fj': 'fij', 'fo': 'fao', 'fr': 'fra',
'fre': 'fra', 'fy': 'fry', 'ga': 'gle', 'gd': 'gla', 'geo': 'kat', 'ger': 'deu', 'gl': 'glg', 'gn': 'grn', 'gre': 'ell',
'gu': 'guj', 'gv': 'glv', 'ha': 'hau', 'he': 'heb', 'hi': 'hin', 'ho': 'hmo', 'hr': 'hrv', 'ht': 'hat', 'hu': 'hun',
'hy': 'hye', 'hz': 'her', 'ia': 'ina', 'ice': 'isl', 'id': 'ind', 'ie': 'ile', 'ig': 'ibo', 'ii': 'iii', 'ik': 'ipk',
'io': 'ido', 'is': 'isl', 'it': 'ita', 'iu': 'iku', 'ja': 'jpn', 'jv': 'jav', 'ka': 'kat', 'kg': 'kon', 'ki': 'kik',
'kj': 'kua', 'kk': 'kaz', 'kl': 'kal', 'km': 'khm', 'kn': 'kan', 'ko': 'kor', 'kr': 'kau', 'ks': 'kas', 'ku': 'kur',
'kv': 'kom', 'kw': 'cor', 'ky': 'kir', 'la': 'lat', 'lb': 'ltz', 'lg': 'lug', 'li': 'lim', 'ln': 'lin', 'lo': 'lao',
'lt': 'lit', 'lu': 'lub', 'lv': 'lav', 'mac': 'mkd', 'mao': 'mri', 'may': 'msa', 'mg': 'mlg', 'mh': 'mah', 'mi': 'mri',
'mk': 'mkd', 'ml': 'mal', 'mn': 'mon', 'mr': 'mar', 'ms': 'msa', 'mt': 'mlt', 'my': 'mya', 'na': 'nau', 'nb': 'nob',
'nd': 'nde', 'ne': 'nep', 'ng': 'ndo', 'nl': 'nld', 'nn': 'nno', 'no': 'nor', 'nr': 'nbl', 'nv': 'nav', 'ny': 'nya',
'oc': 'oci', 'oj': 'oji', 'om': 'orm', 'or': 'ori', 'os': 'oss', 'pa': 'pan', 'per': 'fas', 'pi': 'pli', 'pl': 'pol',
'ps': 'pus', 'pt': 'por', 'qu': 'que', 'rm': 'roh', 'rn': 'run', 'ro': 'ron', 'ru': 'rus', 'rum': 'ron', 'rw': 'kin',
'sa': 'san', 'sc': 'srd', 'sd': 'snd', 'se': 'sme', 'sg': 'sag', 'sh': 'hbs', 'si': 'sin', 'sk': 'slk', 'sl': 'slv',
'slo': 'slk', 'sm': 'smo', 'sn': 'sna', 'so': 'som', 'sq': 'sqi', 'sr': 'srp', 'ss': 'ssw', 'st': 'sot', 'su': 'sun',
'sv': 'swe', 'sw': 'swa', 'ta': 'tam', 'te': 'tel', 'tg': 'tgk', 'th': 'tha', 'ti': 'tir', 'tib': 'bod', 'tk': 'tuk',
'tl': 'tgl', 'tn': 'tsn', 'to': 'ton', 'tr': 'tur', 'ts': 'tso', 'tt': 'tat', 'tw': 'twi', 'ty': 'tah', 'ug': 'uig',
'uk': 'ukr', 'ur': 'urd', 'uz': 'uzb', 've': 'ven', 'vi': 'vie', 'vo': 'vol', 'wa': 'wln', 'wel': 'cym', 'wo': 'wol',
'xh': 'xho', 'yi': 'yid', 'yo': 'yor', 'za': 'zha', 'zh': 'zho', 'zu': 'zul'
}
# Map ISO 3166-1 to UN M.49
region_map = {
'af': '004', 'al': '008', 'dz': '012', 'as': '016', 'ad': '020', 'ao': '024', 'ai': '660', 'aq': '010', 'ag': '028',
'ar': '032', 'am': '051', 'aw': '533', 'au': '036', 'at': '040', 'az': '031', 'bs': '044', 'bh': '048', 'bd': '050',
'bb': '052', 'by': '112', 'be': '056', 'bz': '084', 'bj': '204', 'bm': '060', 'bt': '064', 'bo': '068', 'bq': '535',
'ba': '070', 'bw': '072', 'bv': '074', 'br': '076', 'io': '086', 'bn': '096', 'bg': '100', 'bf': '854', 'bi': '108',
'cv': '132', 'kh': '116', 'cm': '120', 'ca': '124', 'ky': '136', 'cf': '140', 'td': '148', 'cl': '152', 'cn': '156',
'cx': '162', 'cc': '166', 'co': '170', 'km': '174', 'cd': '180', 'cg': '178', 'ck': '184', 'cr': '188', 'hr': '191',
'cu': '192', 'cw': '531', 'cy': '196', 'cz': '203', 'ci': '384', 'dk': '208', 'dj': '262', 'dm': '212', 'do': '214',
'ec': '218', 'eg': '818', 'sv': '222', 'gq': '226', 'er': '232', 'ee': '233', 'sz': '748', 'et': '231', 'fk': '238',
'fo': '234', 'fj': '242', 'fi': '246', 'fr': '250', 'gf': '254', 'pf': '258', 'tf': '260', 'ga': '266', 'gm': '270',
'ge': '268', 'de': '276', 'gh': '288', 'gi': '292', 'gr': '300', 'gl': '304', 'gd': '308', 'gp': '312', 'gu': '316',
'gt': '320', 'gg': '831', 'gn': '324', 'gw': '624', 'gy': '328', 'ht': '332', 'hm': '334', 'va': '336', 'hn': '340',
'hk': '344', 'hu': '348', 'is': '352', 'in': '356', 'id': '360', 'ir': '364', 'iq': '368', 'ie': '372', 'im': '833',
'il': '376', 'it': '380', 'jm': '388', 'jp': '392', 'je': '832', 'jo': '400', 'kz': '398', 'ke': '404', 'ki': '296',
'kp': '408', 'kr': '410', 'kw': '414', 'kg': '417', 'la': '418', 'lv': '428', 'lb': '422', 'ls': '426', 'lr': '430',
'ly': '434', 'li': '438', 'lt': '440', 'lu': '442', 'mo': '446', 'mg': '450', 'mw': '454', 'my': '458', 'mv': '462',
'ml': '466', 'mt': '470', 'mh': '584', 'mq': '474', 'mr': '478', 'mu': '480', 'yt': '175', 'mx': '484', 'fm': '583',
'md': '498', 'mc': '492', 'mn': '496', 'me': '499', 'ms': '500', 'ma': '504', 'mz': '508', 'mm': '104', 'na': '516',
'nr': '520', 'np': '524', 'nl': '528', 'nc': '540', 'nz': '554', 'ni': '558', 'ne': '562', 'ng': '566', 'nu': '570',
'nf': '574', 'mp': '580', 'no': '578', 'om': '512', 'pk': '586', 'pw': '585', 'ps': '275', 'pa': '591', 'pg': '598',
'py': '600', 'pe': '604', 'ph': '608', 'pn': '612', 'pl': '616', 'pt': '620', 'pr': '630', 'qa': '634', 'mk': '807',
'ro': '642', 'ru': '643', 'rw': '646', 're': '638', 'bl': '652', 'sh': '654', 'kn': '659', 'lc': '662', 'mf': '663',
'pm': '666', 'vc': '670', 'ws': '882', 'sm': '674', 'st': '678', 'sa': '682', 'sn': '686', 'rs': '688', 'sc': '690',
'sl': '694', 'sg': '702', 'sx': '534', 'sk': '703', 'si': '705', 'sb': '090', 'so': '706', 'za': '710', 'gs': '239',
'ss': '728', 'es': '724', 'lk': '144', 'sd': '729', 'sr': '740', 'sj': '744', 'se': '752', 'ch': '756', 'sy': '760',
'tw': '158', 'tj': '762', 'tz': '834', 'th': '764', 'tl': '626', 'tg': '768', 'tk': '772', 'to': '776', 'tt': '780',
'tn': '788', 'tr': '792', 'tm': '795', 'tc': '796', 'tv': '798', 'ug': '800', 'ua': '804', 'ae': '784', 'gb': '826',
'um': '581', 'us': '840', 'uy': '858', 'uz': '860', 'vu': '548', 've': '862', 'vn': '704', 'vg': '092', 'vi': '850',
'wf': '876', 'eh': '732', 'ye': '887', 'zm': '894', 'zw': '716', 'ax': '248', 'afg': '004', 'alb': '008', 'dza': '012',
'asm': '016', 'and': '020', 'ago': '024', 'aia': '660', 'ata': '010', 'atg': '028', 'arg': '032', 'arm': '051', 'abw': '533',
'aus': '036', 'aut': '040', 'aze': '031', 'bhs': '044', 'bhr': '048', 'bgd': '050', 'brb': '052', 'blr': '112', 'bel': '056',
'blz': '084', 'ben': '204', 'bmu': '060', 'btn': '064', 'bol': '068', 'bes': '535', 'bih': '070', 'bwa': '072', 'bvt': '074',
'bra': '076', 'iot': '086', 'brn': '096', 'bgr': '100', 'bfa': '854', 'bdi': '108', 'cpv': '132', 'khm': '116', 'cmr': '120',
'can': '124', 'cym': '136', 'caf': '140', 'tcd': '148', 'chl': '152', 'chn': '156', 'cxr': '162', 'cck': '166', 'col': '170',
'com': '174', 'cod': '180', 'cog': '178', 'cok': '184', 'cri': '188', 'hrv': '191', 'cub': '192', 'cuw': '531', 'cyp': '196',
'cze': '203', 'civ': '384', 'dnk': '208', 'dji': '262', 'dma': '212', 'dom': '214', 'ecu': '218', 'egy': '818', 'slv': '222',
'gnq': '226', 'eri': '232', 'est': '233', 'swz': '748', 'eth': '231', 'flk': '238', 'fro': '234', 'fji': '242', 'fin': '246',
'fra': '250', 'guf': '254', 'pyf': '258', 'atf': '260', 'gab': '266', 'gmb': '270', 'geo': '268', 'deu': '276', 'gha': '288',
'gib': '292', 'grc': '300', 'grl': '304', 'grd': '308', 'glp': '312', 'gum': '316', 'gtm': '320', 'ggy': '831', 'gin': '324',
'gnb': '624', 'guy': '328', 'hti': '332', 'hmd': '334', 'vat': '336', 'hnd': '340', 'hkg': '344', 'hun': '348', 'isl': '352',
'ind': '356', 'idn': '360', 'irn': '364', 'irq': '368', 'irl': '372', 'imn': '833', 'isr': '376', 'ita': '380', 'jam': '388',
'jpn': '392', 'jey': '832', 'jor': '400', 'kaz': '398', 'ken': '404', 'kir': '296', 'prk': '408', 'kor': '410', 'kwt': '414',
'kgz': '417', 'lao': '418', 'lva': '428', 'lbn': '422', 'lso': '426', 'lbr': '430', 'lby': '434', 'lie': '438', 'ltu': '440',
'lux': '442', 'mac': '446', 'mdg': '450', 'mwi': '454', 'mys': '458', 'mdv': '462', 'mli': '466', 'mlt': '470', 'mhl': '584',
'mtq': '474', 'mrt': '478', 'mus': '480', 'myt': '175', 'mex': '484', 'fsm': '583', 'mda': '498', 'mco': '492', 'mng': '496',
'mne': '499', 'msr': '500', 'mar': '504', 'moz': '508', 'mmr': '104', 'nam': '516', 'nru': '520', 'npl': '524', 'nld': '528',
'ncl': '540', 'nzl': '554', 'nic': '558', 'ner': '562', 'nga': '566', 'niu': '570', 'nfk': '574', 'mnp': '580', 'nor': '578',
'omn': '512', 'pak': '586', 'plw': '585', 'pse': '275', 'pan': '591', 'png': '598', 'pry': '600', 'per': '604', 'phl': '608',
'pcn': '612', 'pol': '616', 'prt': '620', 'pri': '630', 'qat': '634', 'mkd': '807', 'rou': '642', 'rus': '643', 'rwa': '646',
'reu': '638', 'blm': '652', 'shn': '654', 'kna': '659', 'lca': '662', 'maf': '663', 'spm': '666', 'vct': '670', 'wsm': '882',
'smr': '674', 'stp': '678', 'sau': '682', 'sen': '686', 'srb': '688', 'syc': '690', 'sle': '694', 'sgp': '702', 'sxm': '534',
'svk': '703', 'svn': '705', 'slb': '090', 'som': '706', 'zaf': '710', 'sgs': '239', 'ssd': '728', 'esp': '724', 'lka': '144',
'sdn': '729', 'sur': '740', 'sjm': '744', 'swe': '752', 'che': '756', 'syr': '760', 'twn': '158', 'tjk': '762', 'tza': '834',
'tha': '764', 'tls': '626', 'tgo': '768', 'tkl': '772', 'ton': '776', 'tto': '780', 'tun': '788', 'tur': '792', 'tkm': '795',
'tca': '796', 'tuv': '798', 'uga': '800', 'ukr': '804', 'are': '784', 'gbr': '826', 'umi': '581', 'usa': '840', 'ury': '858',
'uzb': '860', 'vut': '548', 'ven': '862', 'vnm': '704', 'vgb': '092', 'vir': '850', 'wlf': '876', 'esh': '732', 'yem': '887',
'zmb': '894', 'zwe': '716', 'ala': '248'
}
# Set language
language = match.group('language')
if language:
language = language.lower()
if language in language_map:
language = language_map[language]
# Set script
script = match.group('script')
if script:
script = script.lower()
# Set region
region = match.group('region')
if region:
region = region.lower()
if region in region_map:
region = region_map[region]
else:
region = '001' # Default to Earth
# Handle Chinese that uses regions and not scripts, e.g. zh_CN should use Hans
if language == 'zho' and script == None and region != None:
if region in ['156', '702']:
script = 'hans' # China & Singapore
elif region in ['158', '344', '446']:
script = 'hant' # Taiwan, Hong Kong, & Macao
return (locale, language, script, region)
# Returns the distance of parent/child relationships of M.94 region codes, e.g. Argentina (032) to Latin America (419) is 2.
# 0 will be returned if they are the same, null if there is no direct relationship.
@staticmethod
def __region_relationship_distance(child, parent):
if child == parent:
return 0 # Same region
# Map of region codes to their parents
region_parents = {
'090': '054', '212': '029', '231': '014', '652': '029', '604': '005', '798': '061', '574': '053', '268': '145', '498': '151',
'430': '011', '426': '018', '316': '057', '703': '151', '112': '151', '296': '057', '262': '014', '009': '001', '070': '039',
'234': '154', '060': '021', '015': '002', '807': '039', '780': '029', '818': '015', '740': '005', '540': '054', '499': '039',
'586': '034', '151': '150', '462': '034', '188': '013', '418': '035', '148': '017', '116': '035', '061': '009', '728': '014',
'792': '145', '056': '155', '028': '029', '666': '021', '144': '034', '570': '061', '524': '034', '531': '029', '334': '053',
'270': '011', '832': '830', '414': '145', '292': '039', '528': '155', '202': '002', '581': '057', '584': '057', '654': '011',
'120': '017', '155': '150', '620': '039', '694': '011', '239': '005', '036': '053', '150': '001', '054': '009', '716': '014',
'064': '034', '764': '035', '233': '154', '466': '011', '180': '017', '704': '035', '882': '061', '140': '017', '048': '145',
'024': '017', '340': '013', '682': '145', '092': '029', '035': '142', '744': '154', '005': '419', '004': '034', '356': '034',
'422': '145', '008': '039', '830': '154', '500': '029', '057': '009', '324': '011', '214': '029', '336': '039', '312': '029',
'050': '034', '408': '030', '072': '018', '039': '150', '663': '029', '548': '054', '670': '029', '376': '145', '678': '017',
'204': '011', '021': '019', '029': '419', '608': '035', '470': '039', '478': '011', '136': '029', '192': '029', '512': '145',
'388': '029', '196': '145', '016': '061', '084': '013', '583': '057', '585': '057', '706': '014', '124': '021', '308': '029',
'686': '011', '002': '001', '100': '151', '154': '150', '578': '154', '690': '014', '248': '154', '800': '014', '300': '039',
'156': '030', '533': '029', '226': '017', '854': '011', '051': '145', '104': '035', '534': '029', '208': '154', '152': '005',
'634': '145', '702': '035', '068': '005', '558': '013', '276': '155', '566': '011', '762': '143', '626': '035', '170': '005',
'364': '034', '368': '145', '674': '039', '440': '154', '535': '029', '804': '151', '484': '013', '840': '021', '019': '001',
'850': '029', '031': '145', '562': '011', '630': '029', '752': '154', '020': '039', '894': '014', '012': '015', '030': '142',
'710': '018', '504': '015', '360': '035', '646': '014', '428': '154', '238': '005', '616': '151', '492': '155', '887': '145',
'705': '039', '434': '015', '258': '061', '458': '035', '010': '001', '218': '005', '480': '014', '660': '029', '288': '011',
'174': '014', '348': '151', '328': '005', '142': '001', '496': '030', '772': '061', '729': '015', '380': '039', '417': '143',
'834': '014', '724': '039', '260': '014', '474': '029', '040': '155', '662': '029', '858': '005', '011': '202', '013': '419',
'242': '054', '304': '021', '145': '142', '598': '054', '826': '154', '132': '011', '398': '143', '017': '202', '600': '005',
'254': '005', '352': '154', '203': '151', '410': '030', '344': '030', '014': '202', '108': '014', '796': '029', '320': '013',
'756': '155', '096': '035', '018': '202', '788': '015', '246': '154', '052': '029', '250': '155', '143': '142', '044': '029',
'688': '039', '184': '061', '053': '009', '454': '014', '419': '019', '191': '039', '438': '155', '748': '018', '516': '018',
'232': '014', '175': '014', '520': '057', '178': '017', '795': '143', '332': '029', '222': '013', '384': '011', '624': '011',
'086': '014', '776': '061', '032': '005', '831': '830', '275': '145', '400': '145', '860': '143', '508': '014', '442': '155',
'612': '061', '266': '017', '162': '053', '446': '030', '862': '005', '760': '145', '580': '057', '643': '151', '591': '013',
'076': '005', '768': '011', '074': '005', '166': '053', '450': '014', '638': '014', '642': '151', '732': '015', '784': '145',
'392': '030', '554': '053', '372': '154', '659': '029', '680': '830', '876': '061', '034': '142', '404': '014', '833': '154'
}
# Traverse up the region tree until the parent is found
distance = 0
code = child
while code in region_parents:
code = region_parents[code]
distance += 1
if code == parent:
return distance
return None # No relationship
# Finds the best locale from available locales based on the user's preferences. Falls back on the default locale if none is found.
@staticmethod
def preferred_locale(preferences, available_locales, default_locale):
# Parse available locales ad preferences
available_locales = list(map(LocalePicker.__parse_locale, available_locales))
preferences = list(map(LocalePicker.__parse_locale, preferences))
# Attempt to find a match for each preference until one is found
for preference in preferences:
# Languages must match and script must match or be None
language_locales = list(filter(lambda locale: locale[1] == preference[1] and (locale[2] == None or preference[2] == None or locale[2] == preference[2]), available_locales))
if not language_locales:
continue
# Sort preferring higher level regions
language_locales.sort(key=lambda locale: LocalePicker.__region_relationship_distance(locale[3], '001') or 99)
# Sort preferring closer region relationships to the preferred one
language_locales.sort(key=lambda locale: LocalePicker.__region_relationship_distance(locale[3], preference[3]) or 99)
# Sort preferring matching script over None
language_locales.sort(key=lambda locale: 0 if locale[2] else 1)
# Return the best matched region
return language_locales[0][0]
# No matches
return default_locale
print(LocalePicker.preferred_locale(['es-419', 'en'], ['es', 'en'], 'en')) # 'es'
print(LocalePicker.preferred_locale(['pt-Latn-PT', 'en', 'es', 'zh-Hans'], ['es', 'en', 'pt-Cyrl-PT', 'pt-PT', 'pt-Latn-BR'], 'en')) # 'pt-Latn-BR'
print(LocalePicker.preferred_locale(['zh-SG', 'pt-Latn-BR', 'en', 'es'], ['es', 'en', 'pt-PT', 'pt-Cyrl-BR', 'pt-Latn-PT', 'zh-Hant', 'zh-Hans'], 'en')) # 'zh-Hans'
print(LocalePicker.preferred_locale(['pt-Latn-BR', 'en', 'es'], ['es', 'en', 'pt-Cyrl-BR', 'zh-Hant', 'zh-Hans'], 'es')) # 'en'
print(LocalePicker.preferred_locale(['es_Latn_MX'], ['es_MX', 'es'], 'en')) # 'es_MX'
print(LocalePicker.preferred_locale(['i-navajo', 'en-US'], ['en-US', 'en-UK', 'es-419', 'nv'], 'en')) # 'nv'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment