Created
July 19, 2023 14:08
-
-
Save altilunium/ba6fb98f694c02a985e9e25858c4370c to your computer and use it in GitHub Desktop.
MediaWiki Fallback Languages https://phabricator.wikimedia.org/T258492#9028038
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
lang_dict = { | |
'sma':'Southern Sami', | |
'ace':'Achinese', | |
'ang':'Old English', | |
'agq':'Aghem', | |
'af':'Afrikaans', | |
'ak':'Akan', | |
'gsw':'Swiss German', | |
'als':'Alemannisch', | |
'en-us':'American English', | |
'ase':'American Sign Language', | |
'smn':'Inari Sami', | |
'an':'Aragonese', | |
'rup':'Aromanian', | |
'roa-rup':'Aromanian', | |
'frp':'Arpitan', | |
'ast':'Asturian', | |
'atj':'Atikamekw', | |
'gn':'Guarani', | |
'ay':'Aymara', | |
'az':'Azerbaijani', | |
'ksf':'Bafia', | |
'bfd':'Bafut', | |
'abs':'Ambonese Malay', | |
'bkc':'Baka', | |
'gor':'Gorontalo', | |
'id':'Indonesian', | |
'ms':'Malay', | |
'bm':'Bambara', | |
'bkh':'Bakako', | |
'bax':'Bamun', | |
'nan':'Min Nan Chinese', | |
'zh-min-nan':'Chinese (Min Nan)', | |
'bjn':'Banjar', | |
'ban':'Balinese', | |
'map-bms':'Basa Banyumasan', | |
'bas':'Basaa', | |
'btm':'Batak Mandailing', | |
'bbc':'Batak Toba', | |
'bbc-latn':'Batak Toba (Latin script)', | |
'bew':'Betawi', | |
'sje':'Pite Sami', | |
'bcl':'Central Bikol', | |
'bi':'Bislama', | |
'bar':'Bavarian', | |
'bs':'Bosnian', | |
'brh':'Brahui', | |
'en-gb':'British English', | |
'br':'Breton', | |
'en-ca':'Canadian English', | |
'cps':'Capiznon', | |
'ca':'Catalan', | |
'ceb':'Cebuano', | |
'cs':'Czech', | |
'cho':'Choctaw', | |
'ch':'Chamorro', | |
'cbk-zam':'Chavacano', | |
'ny':'Nyanja', | |
'sn':'Shona', | |
'tum':'Tumbuka', | |
'sei':'Seri', | |
'co':'Corsican', | |
'cy':'Welsh', | |
'dga':'Dagaare', | |
'dag':'Dagbani', | |
'se-no':'Northern Sami (Norway)', | |
'da':'Danish', | |
'se':'Northern Sami', | |
'se-se':'Northern Sami (Sweden)', | |
'se-fi':'Northern Sami (Finland)', | |
'pdc':'Pennsylvania German', | |
'de':'German', | |
'de-formal':'German (formal address)', | |
'nv':'Navajo', | |
'dsb':'Lower Sorbian', | |
'na':'Nauru', | |
'dua':'Duala', | |
'etu':'Ejagham', | |
'dtp':'Central Dusun', | |
'mh':'Marshallese', | |
'et':'Estonian', | |
'vmw':'Makhuwa', | |
'egl':'Emilian', | |
'eml':'Emiliano-Romagnolo', | |
'en':'English', | |
'es':'Spanish', | |
'es-formal':'Spanish (formal address)', | |
'es-419':'Latin American Spanish', | |
'eo':'Esperanto', | |
'ext':'Extremaduran', | |
'eto':'Eton', | |
'eu':'Basque', | |
'ee':'Ewe', | |
'ewo':'Ewondo', | |
'wls':'Wallisian', | |
'gur':'Frafra', | |
'fmp':'Fe\'Fe\'', | |
'hif':'Fiji Hindi', | |
'hif-latn':'Fiji Hindi (Latin script)', | |
'fon':'Fon', | |
'fo':'Faroese', | |
'fr':'French', | |
'frc':'Cajun French', | |
'fy':'Western Frisian', | |
'ff':'Fulah', | |
'fur':'Friulian', | |
'gaa':'Ga', | |
'ga':'Irish', | |
'gv':'Manx', | |
'sm':'Samoan', | |
'gag':'Gagauz', | |
'gd':'Scottish Gaelic', | |
'gl':'Galician', | |
'aln':'Gheg Albanian', | |
'gya':'Gbaya', | |
'gpe':'Ghanaian Pidgin', | |
'bbj':'Ghomala', | |
'ki':'Kikuyu', | |
'gom-latn':'Goan Konkani (Latin script)', | |
'guw':'Gun', | |
'cnh':'Hakha-Chin', | |
'ha':'Hausa', | |
'haw':'Hawaiian', | |
'ho':'Hiri Motu', | |
'hsb':'Upper Sorbian', | |
'hr':'Croatian', | |
'hrx':'Hunsrik', | |
'io':'Ido', | |
'igl':'Igala', | |
'ig':'Igbo', | |
'rw':'Kinyarwanda', | |
'ilo':'Iloko', | |
'rn':'Rundi', | |
'hil':'Hiligaynon', | |
'ia':'Interlingua', | |
'ie':'Interlingue', | |
'ike-latn':'Eastern Canadian (Latin script)', | |
'ik':'Inupiaq', | |
'bto':'Iriga Bicolano', | |
'xh':'Xhosa', | |
'zu':'Zulu', | |
'is':'Icelandic', | |
'isu':'Isu', | |
'it':'Italian', | |
'jv':'Javanese', | |
'smj':'Lule Sami', | |
'jut':'Jutish', | |
'rmf':'Finnish Kalo', | |
'kea':'Kabuverdianu', | |
'kbp':'Kabiye', | |
'kl':'Kalaallisut', | |
'kr':'Kanuri', | |
'pam':'Pampanga', | |
'cak':'Kaqchikel', | |
'krl':'Karelian', | |
'csb':'Kashubian', | |
'ker':'Kera', | |
'kw':'Cornish', | |
'krj':'Kinaray-a', | |
'kiu':'Kirmanjki', | |
'sw':'Swahili', | |
'bkm':'Kom', | |
'kg':'Kongo', | |
'avk':'Kotava', | |
'ses':'Koyraboro Senni', | |
'ht':'Haitian Creole', | |
'kri':'Krio', | |
'gcr':'Guianan Creole', | |
'ku':'Kurdish', | |
'ku-latn':'Kurdish (Latin script)', | |
'kus':'Kʋsaal', | |
'fkv':'Kvensk', | |
'kj':'Kuanyama', | |
'nmg':'Kwasio', | |
'jbo':'Lojban', | |
'lld':'Ladin', | |
'lad':'Ladino', | |
'lns':'Lamnso\'', | |
'ltg':'Latgalian', | |
'la':'Latin', | |
'lv':'Latvian', | |
'lzz':'Laz', | |
'to':'Tongan', | |
'lb':'Luxembourgish', | |
'nia':'Nias', | |
'lt':'Lithuanian', | |
'lij':'Ligurian', | |
'li':'Limburgish', | |
'ln':'Lingala', | |
'lfn':'Lingua Franca Nova', | |
'liv':'Livonian', | |
'olo':'Livvi-Karelian', | |
'lmo':'Lombard', | |
'lg':'Ganda', | |
'mad':'Madurese', | |
'hu':'Hungarian', | |
'hu-formal':'Hungarian (formal address)', | |
'vmf':'Main-Franconian', | |
'mcp':'Maka', | |
'mg':'Malagasy', | |
'mt':'Maltese', | |
'mi':'Māori', | |
'mrh':'Mara', | |
'arn':'Mapuche', | |
'srq':'Sirionó', | |
'fit':'Tornedalen Finnish', | |
'byv':'Medumba', | |
'fat':'Fanti', | |
'min':'Minangkabau', | |
'cdo':'Min Dong Chinese', | |
'mwl':'Mirandese', | |
'lus':'Mizo', | |
'bqz':'Mka\'a', | |
'mos':'Mossi', | |
'mua':'Mundang', | |
'mus':'Muscogee', | |
'fj':'Fijian', | |
'nah':'Nahuatl', | |
'pcm':'Nigerian Pidgin', | |
'nap':'Neapolitan', | |
'nmz':'Nawdm', | |
'nnz':'Nda\'Nda\'', | |
'nl':'Dutch', | |
'nl-informal':'Dutch (informal address)', | |
'nds-nl':'Low Saxon', | |
'cr':'Cree', | |
'nnh':'Ngiemboon', | |
'nla':'Ngombala', | |
'nge':'Ngémba', | |
'yrl':'Nheengatu', | |
'niu':'Niuean', | |
'lem':'Nomaande', | |
'frr':'Northern Frisian', | |
'pih':'Norfuk / Pitkern', | |
'no':'Norwegian', | |
'nb':'Norwegian Bokmal', | |
'nn':'Norwegian Nynorsk', | |
'nrm':'Norman', | |
'nov':'Novial', | |
'yas':'Nugunu', | |
'sms':'Skolt Sami', | |
'nys':'Nyungar', | |
'uz-latn':'Uzbek (Latin script)', | |
'uz':'Uzbek', | |
'ann':'Obolo', | |
'oc':'Occitan', | |
'ojb':'Northwestern Ojibwa', | |
'om':'Oromo', | |
'ng':'Ndonga', | |
'de-at':'Austrian German', | |
'hz':'Herero', | |
'pfl':'Palatine German', | |
'pag':'Pangasinan', | |
'ami':'Amis', | |
'pap':'Papiamento', | |
'pap-aw':'Papiamento (Aruba)', | |
'pcd':'Picard', | |
'jam':'Jamaican Creole English', | |
'wes':'Pidgin (Cameroon)', | |
'pms':'Piedmontese', | |
'pwn':'Paiwan', | |
'nds':'Low German', | |
'pdt':'Plautdietsch', | |
'pt-br':'Brazilian Portuguese', | |
'pl':'Polish', | |
'pt':'Portuguese', | |
'prg':'Prussian', | |
'aa':'Afar', | |
'kaa':'Kara-Kalpak', | |
'quc':'Kʼicheʼ', | |
'kk-latn':'Kazakh (Latin script)', | |
'kk-tr':'Kazakh (Turkey)', | |
'crh':'Crimean Tatar', | |
'crh-latn':'Crimean Tatar (Latin script)', | |
'ty':'Tahitian', | |
'rgn':'Romagnol', | |
'ksh':'Colognian', | |
'ro':'Romanian', | |
'rmc':'Carpathian Romani', | |
'rmy':'Vlax Romani', | |
'rm':'Romansh', | |
'qug':'Chimborazo Highland Quichua', | |
'qu':'Quechua', | |
'nyn':'Nyankole', | |
'xsy':'Saisiyat', | |
'szy':'Sakizaya', | |
'sg':'Sango', | |
'sc':'Sardinian', | |
'sro':'Campidanese Sardinian', | |
'sdc':'Sassarese Sardinian', | |
'sli':'Lower Silesian', | |
'de-ch':'Swiss High German', | |
'sco':'Scots', | |
'trv':'Taroko', | |
'stq':'Saterland Frisian', | |
'st':'Southern Sotho', | |
'nso':'Northern Sotho', | |
'tn':'Tswana', | |
'sq':'Albanian', | |
'scn':'Sicilian', | |
'loz':'Lozi', | |
'simple':'Simple English', | |
'ss':'Swati', | |
'sk':'Slovak', | |
'sl':'Slovenian', | |
'szl':'Silesian', | |
'so':'Somali', | |
'srn':'Sranan Tongo', | |
'sr-el':'Serbian (Latin script)', | |
'sh':'Serbo-Croatian', | |
'sh-latn':'Serbo-Croatian (Latin script)', | |
'su':'Sundanese', | |
'fi':'Finnish', | |
'sv':'Swedish', | |
'kab':'Kabyle', | |
'shy':'Shawiya', | |
'shy-latn':'Shawiya (Latin script)', | |
'tl':'Tagalog', | |
'roa-tara':'Tarantino', | |
'rif':'Riffian', | |
'shi':'Tachelhit', | |
'shi-latn':'Tachelhit (Latin script)', | |
'tt-latn':'Tatar (Latin script)', | |
'crh-ro':'tatarşa', | |
'tay':'Tayal', | |
'tet':'Tetum', | |
'din':'Dinka', | |
'tg-latn':'Tajik (Latin script)', | |
'vi':'Vietnamese', | |
'tpi':'Tok Pisin', | |
'tok':'Toki Pona', | |
'tly':'Talysh', | |
'chy':'Cheyenne', | |
've':'Venda', | |
'bag':'Tuki', | |
'tvu':'Tunen', | |
'aeb-latn':'Tunisian Arabic (Latin script)', | |
'tr':'Turkish', | |
'tk':'Turkmen', | |
'tru':'Turoyo', | |
'tw':'Twi', | |
'kcg':'Tyap', | |
'ug-latn':'Uyghur (Latin script)', | |
'sju':'Ume Sami', | |
'vot':'Votic', | |
'za':'Zhuang', | |
'vec':'Venetian', | |
'vep':'Veps', | |
'ruq':'Megleno-Romanian', | |
'ruq-latn':'Megleno-Romanian (Latin script)', | |
'vo':'Volapük', | |
'vro':'Voro', | |
'fiu-vro':'voro', | |
'mcn':'Massa', | |
'vut':'Vute', | |
'wa':'Walloon', | |
'bci':'Baoulé', | |
'guc':'Wayuu', | |
'osa-latn':'Osage (Latin script)', | |
'vls':'West Flemish', | |
'wal':'Wolaytta', | |
'wo':'Wolof', | |
'war':'Waray', | |
'wya':'Wyandot', | |
'ts':'Tsonga', | |
'yat':'Yambeta', | |
'ybb':'Yemba', | |
'yav':'Yangben', | |
'yo':'Yoruba', | |
'diq':'Zazaki', | |
'zea':'Zeelandic', | |
'sgs':'Samogitian', | |
'bat-smg':'Samogitian', | |
'grc':'Ancient Greek', | |
'el':'Greek', | |
'pnt':'Pontic', | |
'av':'Avaric', | |
'ady':'Adyghe', | |
'ady-cyrl':'Adyghe (Cyrillic script)', | |
'kbd':'Kabardian', | |
'kbd-cyrl':'Kabardian (Cyrillic script)', | |
'ab':'Abkhazian', | |
'alt':'Southern Altai', | |
'ba':'Bashkir', | |
'be':'Belarusian', | |
'be-tarask':'Belarusian (Taraskievica orthography)', | |
'be-x-old':'Belarusian (Taraskievica orthography)', | |
'bxr':'Russia Buriat', | |
'bg':'Bulgarian', | |
'ruq-cyrl':'Megleno-Romanian (Cyrillic script)', | |
'os':'Ossetic', | |
'inh':'Ingush', | |
'sjd':'Kildin Sami', | |
'kv':'Komi', | |
'krc':'Karachay-Balkar', | |
'kum':'Kumyk', | |
'crh-cyrl':'Crimean Tatar (Cyrillic script)', | |
'ky':'Kyrgyz', | |
'mrj':'Western Mari', | |
'kk':'Kazakh', | |
'lbe':'Lak', | |
'kk-cyrl':'Kazakh (Cyrillic script)', | |
'kk-kz':'Kazakh (Kazakhstan)', | |
'lez':'Lezghian', | |
'mk':'Macedonian', | |
'mdf':'Moksha', | |
'mn':'Mongolian', | |
'mo':'Moldovan', | |
'gld':'Nanai', | |
'nog':'Nogai', | |
'ce':'Chechen', | |
'mhr':'Eastern Mari', | |
'koi':'Komi-Permyak', | |
'rue':'Rusyn', | |
'rsk':'Pannonian Rusyn', | |
'ru':'Russian', | |
'sah':'Yakut', | |
'sty':'Siberian Tatar', | |
'cu':'Church Slavic', | |
'sr-ec':'Serbian (Cyrillic script)', | |
'sr':'Serbian', | |
'sh-cyrl':'Serbo-Croatian (Cyrillic script)', | |
'tt-cyrl':'Tatar (Cyrillic script)', | |
'tt':'Tatar', | |
'tly-cyrl':'Talysh (Cyrillic script)', | |
'tg':'Tajik', | |
'tg-cyrl':'Tajik (Cyrillic script)', | |
'tyv':'Tuvinian', | |
'udm':'Udmurt', | |
'uz-cyrl':'Uzbek (Cyrillic script)', | |
'uk':'Ukrainian', | |
'kjh':'Khakas', | |
'xal':'Kalmyk', | |
'cv':'Chuvash', | |
'myv':'Erzya', | |
'xmf':'Mingrelian', | |
'ka':'Georgian', | |
'hyw':'Western Armenian', | |
'hy':'Armenian', | |
'anp':'Angika', | |
'awa':'Awadhi', | |
'ks-deva':'Kashmiri (Devanagari script)', | |
'ks':'Kashmiri', | |
'gom-deva':'Goan Konkani (Devanagari script)', | |
'gom':'Goan Konkani', | |
'dty':'Doteli', | |
'bho':'Bhojpuri', | |
'new':'Newari', | |
'ne':'Nepali', | |
'pi':'Pali', | |
'bh':'Bhojpuri', | |
'mag':'Magahi', | |
'mr':'Marathi', | |
'rwr':'Marwari (India)', | |
'mai':'Maithili', | |
'sa':'Sanskrit', | |
'hi':'Hindi', | |
'as':'Assamese', | |
'bn':'Bangla', | |
'bpy':'Bishnupriya', | |
'pa':'Punjabi', | |
'syl':'Sylheti', | |
'gu':'Gujarati', | |
'or':'Odia', | |
'ta':'Tamil', | |
'te':'Telugu', | |
'kn':'Kannada', | |
'tcy':'Tulu', | |
'ml':'Malayalam', | |
'si':'Sinhala', | |
'dz':'Dzongkha', | |
'bo':'Tibetan', | |
'mni':'Manipuri', | |
'ksw':'S\'gaw Karen', | |
'blk':'Pa\'O', | |
'kjp':'Eastern Pwo', | |
'shn':'Shan', | |
'my':'Burmese', | |
'mnw':'Mon', | |
'rki':'Arakanese', | |
'km':'Khmer', | |
'lo':'Lao', | |
'th':'Thai', | |
'tdd':'Tai Nuea', | |
'nod':'Northern Thai', | |
'bug':'Buginese', | |
'ban-bali':'Balinese (Balinese script)', | |
'sat':'Santali', | |
'chr':'Cherokee', | |
'ike-cans':'Eastern Canadian (Aboriginal syllabics)', | |
'iu':'Inuktitut', | |
'got':'Gothic', | |
'tzm':'Central Atlas Tamazight', | |
'zgh':'Standard Moroccan Tamazight', | |
'shi-tfng':'Tachelhit (Tifinagh script)', | |
'ti':'Tigrinya', | |
'am':'Amharic', | |
'ii':'Sichuan Yi', | |
'ko-kp':'Korean (North Korea)', | |
'ko':'Korean', | |
'ja':'Japanese', | |
'ryu':'Okinawan', | |
'zh':'Chinese', | |
'zh-cn':'Chinese (China)', | |
'zh-tw':'Chinese (Taiwan)', | |
'zh-sg':'Chinese (Singapore)', | |
'zh-mo':'Chinese (Macau)', | |
'zh-hant':'Traditional Chinese', | |
'zh-hans':'Simplified Chinese', | |
'zh-hk':'Chinese (Hong Kong)', | |
'zh-my':'Chinese (Malaysia)', | |
'wuu-hant':'Wu Chinese (Traditional)', | |
'wuu':'Wu Chinese', | |
'wuu-hans':'Wu Chinese (Simplified)', | |
'hak':'Hakka Chinese', | |
'lzh':'Literary Chinese', | |
'zh-classical':'Classical Chinese', | |
'hsn':'Xiang Chinese', | |
'yue':'Cantonese', | |
'zh-yue':'Cantonese', | |
'yue-hant':'Cantonese (Traditional)', | |
'yue-hans':'Cantonese (Simplified)', | |
'gan':'Gan Chinese', | |
'gan-hant':'Gan (Traditional)', | |
'gan-hans':'Gan (Simplified)', | |
'nan-hani':'Min Nan (Hanji)', | |
'yi':'Yiddish', | |
'he':'Hebrew', | |
'ur':'Urdu', | |
'ary':'Moroccan Arabic', | |
'ar':'Arabic', | |
'acm':'Iraqi Arabic', | |
'bqi':'Bakhtiari', | |
'ms-arab':'Malay (Jawi script)', | |
'ps':'Pashto', | |
'pnb':'Western Punjabi', | |
'aeb-arab':'Tunisian Arabic (Arabic script)', | |
'aeb':'Tunisian Arabic', | |
'azb':'South Azerbaijani', | |
'arq':'Algerian Arabic', | |
'bcc':'Southern Balochi', | |
'bgn':'Western Balochi', | |
'skr':'Saraiki', | |
'fa':'Persian', | |
'skr-arab':'Saraiki (Arabic script)', | |
'sd':'Sindhi', | |
'kk-arab':'Kazakh (Arabic script)', | |
'kk-cn':'Kazakh (China)', | |
'ku-arab':'Kurdish (Arabic script)', | |
'ks-arab':'Kashmiri (Arabic script)', | |
'khw':'Khowar', | |
'ckb':'Central Kurdish', | |
'sdh':'Southern Kurdish', | |
'glk':'Gilaki', | |
'ota':'Ottoman Turkish', | |
'lrc':'Northern Luri', | |
'luz':'Southern Luri', | |
'lki':'Laki', | |
'mzn':'Mazanderani', | |
'arz':'Egyptian Arabic', | |
'hno':'Northern Hindko', | |
'ug-arab':'Uyghur (Arabic script)', | |
'ug':'Uyghur', | |
'arc':'Aramaic', | |
'dv':'Divehi', | |
'nqo':'N\'Ko', | |
'zam' : 'Zam', | |
'cbk' : 'Chavacano' | |
} | |
with open('lang.txt', 'r') as file: | |
bag = [] | |
for line in file: | |
input_str = line.strip() | |
language_code = input_str.split(':')[0].strip() | |
language_codes = input_str.split(':')[1].split(',') | |
language_codes = [language_code] + [code.strip() for code in language_codes] | |
bag.append(language_codes) | |
bag_sorted = sorted(bag, key=len, reverse=True) | |
for i in bag_sorted: | |
for index, element in enumerate(i): | |
if index == len(i) - 1: | |
print(lang_dict[element]) | |
else: | |
print(lang_dict[element],end=" -> ") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment