Skip to content

Instantly share code, notes, and snippets.

@altilunium
Created July 19, 2023 14:08
Show Gist options
  • Save altilunium/ba6fb98f694c02a985e9e25858c4370c to your computer and use it in GitHub Desktop.
Save altilunium/ba6fb98f694c02a985e9e25858c4370c to your computer and use it in GitHub Desktop.
lang_dict = {
'sma':'Southern Sami',
'ace':'Achinese',
'ang':'Old English',
'agq':'Aghem',
'af':'Afrikaans',
'ak':'Akan',
'gsw':'Swiss German',
'als':'Alemannisch',
'en-us':'American English',
'ase':'American Sign Language',
'smn':'Inari Sami',
'an':'Aragonese',
'rup':'Aromanian',
'roa-rup':'Aromanian',
'frp':'Arpitan',
'ast':'Asturian',
'atj':'Atikamekw',
'gn':'Guarani',
'ay':'Aymara',
'az':'Azerbaijani',
'ksf':'Bafia',
'bfd':'Bafut',
'abs':'Ambonese Malay',
'bkc':'Baka',
'gor':'Gorontalo',
'id':'Indonesian',
'ms':'Malay',
'bm':'Bambara',
'bkh':'Bakako',
'bax':'Bamun',
'nan':'Min Nan Chinese',
'zh-min-nan':'Chinese (Min Nan)',
'bjn':'Banjar',
'ban':'Balinese',
'map-bms':'Basa Banyumasan',
'bas':'Basaa',
'btm':'Batak Mandailing',
'bbc':'Batak Toba',
'bbc-latn':'Batak Toba (Latin script)',
'bew':'Betawi',
'sje':'Pite Sami',
'bcl':'Central Bikol',
'bi':'Bislama',
'bar':'Bavarian',
'bs':'Bosnian',
'brh':'Brahui',
'en-gb':'British English',
'br':'Breton',
'en-ca':'Canadian English',
'cps':'Capiznon',
'ca':'Catalan',
'ceb':'Cebuano',
'cs':'Czech',
'cho':'Choctaw',
'ch':'Chamorro',
'cbk-zam':'Chavacano',
'ny':'Nyanja',
'sn':'Shona',
'tum':'Tumbuka',
'sei':'Seri',
'co':'Corsican',
'cy':'Welsh',
'dga':'Dagaare',
'dag':'Dagbani',
'se-no':'Northern Sami (Norway)',
'da':'Danish',
'se':'Northern Sami',
'se-se':'Northern Sami (Sweden)',
'se-fi':'Northern Sami (Finland)',
'pdc':'Pennsylvania German',
'de':'German',
'de-formal':'German (formal address)',
'nv':'Navajo',
'dsb':'Lower Sorbian',
'na':'Nauru',
'dua':'Duala',
'etu':'Ejagham',
'dtp':'Central Dusun',
'mh':'Marshallese',
'et':'Estonian',
'vmw':'Makhuwa',
'egl':'Emilian',
'eml':'Emiliano-Romagnolo',
'en':'English',
'es':'Spanish',
'es-formal':'Spanish (formal address)',
'es-419':'Latin American Spanish',
'eo':'Esperanto',
'ext':'Extremaduran',
'eto':'Eton',
'eu':'Basque',
'ee':'Ewe',
'ewo':'Ewondo',
'wls':'Wallisian',
'gur':'Frafra',
'fmp':'Fe\'Fe\'',
'hif':'Fiji Hindi',
'hif-latn':'Fiji Hindi (Latin script)',
'fon':'Fon',
'fo':'Faroese',
'fr':'French',
'frc':'Cajun French',
'fy':'Western Frisian',
'ff':'Fulah',
'fur':'Friulian',
'gaa':'Ga',
'ga':'Irish',
'gv':'Manx',
'sm':'Samoan',
'gag':'Gagauz',
'gd':'Scottish Gaelic',
'gl':'Galician',
'aln':'Gheg Albanian',
'gya':'Gbaya',
'gpe':'Ghanaian Pidgin',
'bbj':'Ghomala',
'ki':'Kikuyu',
'gom-latn':'Goan Konkani (Latin script)',
'guw':'Gun',
'cnh':'Hakha-Chin',
'ha':'Hausa',
'haw':'Hawaiian',
'ho':'Hiri Motu',
'hsb':'Upper Sorbian',
'hr':'Croatian',
'hrx':'Hunsrik',
'io':'Ido',
'igl':'Igala',
'ig':'Igbo',
'rw':'Kinyarwanda',
'ilo':'Iloko',
'rn':'Rundi',
'hil':'Hiligaynon',
'ia':'Interlingua',
'ie':'Interlingue',
'ike-latn':'Eastern Canadian (Latin script)',
'ik':'Inupiaq',
'bto':'Iriga Bicolano',
'xh':'Xhosa',
'zu':'Zulu',
'is':'Icelandic',
'isu':'Isu',
'it':'Italian',
'jv':'Javanese',
'smj':'Lule Sami',
'jut':'Jutish',
'rmf':'Finnish Kalo',
'kea':'Kabuverdianu',
'kbp':'Kabiye',
'kl':'Kalaallisut',
'kr':'Kanuri',
'pam':'Pampanga',
'cak':'Kaqchikel',
'krl':'Karelian',
'csb':'Kashubian',
'ker':'Kera',
'kw':'Cornish',
'krj':'Kinaray-a',
'kiu':'Kirmanjki',
'sw':'Swahili',
'bkm':'Kom',
'kg':'Kongo',
'avk':'Kotava',
'ses':'Koyraboro Senni',
'ht':'Haitian Creole',
'kri':'Krio',
'gcr':'Guianan Creole',
'ku':'Kurdish',
'ku-latn':'Kurdish (Latin script)',
'kus':'Kʋsaal',
'fkv':'Kvensk',
'kj':'Kuanyama',
'nmg':'Kwasio',
'jbo':'Lojban',
'lld':'Ladin',
'lad':'Ladino',
'lns':'Lamnso\'',
'ltg':'Latgalian',
'la':'Latin',
'lv':'Latvian',
'lzz':'Laz',
'to':'Tongan',
'lb':'Luxembourgish',
'nia':'Nias',
'lt':'Lithuanian',
'lij':'Ligurian',
'li':'Limburgish',
'ln':'Lingala',
'lfn':'Lingua Franca Nova',
'liv':'Livonian',
'olo':'Livvi-Karelian',
'lmo':'Lombard',
'lg':'Ganda',
'mad':'Madurese',
'hu':'Hungarian',
'hu-formal':'Hungarian (formal address)',
'vmf':'Main-Franconian',
'mcp':'Maka',
'mg':'Malagasy',
'mt':'Maltese',
'mi':'Māori',
'mrh':'Mara',
'arn':'Mapuche',
'srq':'Sirionó',
'fit':'Tornedalen Finnish',
'byv':'Medumba',
'fat':'Fanti',
'min':'Minangkabau',
'cdo':'Min Dong Chinese',
'mwl':'Mirandese',
'lus':'Mizo',
'bqz':'Mka\'a',
'mos':'Mossi',
'mua':'Mundang',
'mus':'Muscogee',
'fj':'Fijian',
'nah':'Nahuatl',
'pcm':'Nigerian Pidgin',
'nap':'Neapolitan',
'nmz':'Nawdm',
'nnz':'Nda\'Nda\'',
'nl':'Dutch',
'nl-informal':'Dutch (informal address)',
'nds-nl':'Low Saxon',
'cr':'Cree',
'nnh':'Ngiemboon',
'nla':'Ngombala',
'nge':'Ngémba',
'yrl':'Nheengatu',
'niu':'Niuean',
'lem':'Nomaande',
'frr':'Northern Frisian',
'pih':'Norfuk / Pitkern',
'no':'Norwegian',
'nb':'Norwegian Bokmal',
'nn':'Norwegian Nynorsk',
'nrm':'Norman',
'nov':'Novial',
'yas':'Nugunu',
'sms':'Skolt Sami',
'nys':'Nyungar',
'uz-latn':'Uzbek (Latin script)',
'uz':'Uzbek',
'ann':'Obolo',
'oc':'Occitan',
'ojb':'Northwestern Ojibwa',
'om':'Oromo',
'ng':'Ndonga',
'de-at':'Austrian German',
'hz':'Herero',
'pfl':'Palatine German',
'pag':'Pangasinan',
'ami':'Amis',
'pap':'Papiamento',
'pap-aw':'Papiamento (Aruba)',
'pcd':'Picard',
'jam':'Jamaican Creole English',
'wes':'Pidgin (Cameroon)',
'pms':'Piedmontese',
'pwn':'Paiwan',
'nds':'Low German',
'pdt':'Plautdietsch',
'pt-br':'Brazilian Portuguese',
'pl':'Polish',
'pt':'Portuguese',
'prg':'Prussian',
'aa':'Afar',
'kaa':'Kara-Kalpak',
'quc':'Kʼicheʼ',
'kk-latn':'Kazakh (Latin script)',
'kk-tr':'Kazakh (Turkey)',
'crh':'Crimean Tatar',
'crh-latn':'Crimean Tatar (Latin script)',
'ty':'Tahitian',
'rgn':'Romagnol',
'ksh':'Colognian',
'ro':'Romanian',
'rmc':'Carpathian Romani',
'rmy':'Vlax Romani',
'rm':'Romansh',
'qug':'Chimborazo Highland Quichua',
'qu':'Quechua',
'nyn':'Nyankole',
'xsy':'Saisiyat',
'szy':'Sakizaya',
'sg':'Sango',
'sc':'Sardinian',
'sro':'Campidanese Sardinian',
'sdc':'Sassarese Sardinian',
'sli':'Lower Silesian',
'de-ch':'Swiss High German',
'sco':'Scots',
'trv':'Taroko',
'stq':'Saterland Frisian',
'st':'Southern Sotho',
'nso':'Northern Sotho',
'tn':'Tswana',
'sq':'Albanian',
'scn':'Sicilian',
'loz':'Lozi',
'simple':'Simple English',
'ss':'Swati',
'sk':'Slovak',
'sl':'Slovenian',
'szl':'Silesian',
'so':'Somali',
'srn':'Sranan Tongo',
'sr-el':'Serbian (Latin script)',
'sh':'Serbo-Croatian',
'sh-latn':'Serbo-Croatian (Latin script)',
'su':'Sundanese',
'fi':'Finnish',
'sv':'Swedish',
'kab':'Kabyle',
'shy':'Shawiya',
'shy-latn':'Shawiya (Latin script)',
'tl':'Tagalog',
'roa-tara':'Tarantino',
'rif':'Riffian',
'shi':'Tachelhit',
'shi-latn':'Tachelhit (Latin script)',
'tt-latn':'Tatar (Latin script)',
'crh-ro':'tatarşa',
'tay':'Tayal',
'tet':'Tetum',
'din':'Dinka',
'tg-latn':'Tajik (Latin script)',
'vi':'Vietnamese',
'tpi':'Tok Pisin',
'tok':'Toki Pona',
'tly':'Talysh',
'chy':'Cheyenne',
've':'Venda',
'bag':'Tuki',
'tvu':'Tunen',
'aeb-latn':'Tunisian Arabic (Latin script)',
'tr':'Turkish',
'tk':'Turkmen',
'tru':'Turoyo',
'tw':'Twi',
'kcg':'Tyap',
'ug-latn':'Uyghur (Latin script)',
'sju':'Ume Sami',
'vot':'Votic',
'za':'Zhuang',
'vec':'Venetian',
'vep':'Veps',
'ruq':'Megleno-Romanian',
'ruq-latn':'Megleno-Romanian (Latin script)',
'vo':'Volapük',
'vro':'Voro',
'fiu-vro':'voro',
'mcn':'Massa',
'vut':'Vute',
'wa':'Walloon',
'bci':'Baoulé',
'guc':'Wayuu',
'osa-latn':'Osage (Latin script)',
'vls':'West Flemish',
'wal':'Wolaytta',
'wo':'Wolof',
'war':'Waray',
'wya':'Wyandot',
'ts':'Tsonga',
'yat':'Yambeta',
'ybb':'Yemba',
'yav':'Yangben',
'yo':'Yoruba',
'diq':'Zazaki',
'zea':'Zeelandic',
'sgs':'Samogitian',
'bat-smg':'Samogitian',
'grc':'Ancient Greek',
'el':'Greek',
'pnt':'Pontic',
'av':'Avaric',
'ady':'Adyghe',
'ady-cyrl':'Adyghe (Cyrillic script)',
'kbd':'Kabardian',
'kbd-cyrl':'Kabardian (Cyrillic script)',
'ab':'Abkhazian',
'alt':'Southern Altai',
'ba':'Bashkir',
'be':'Belarusian',
'be-tarask':'Belarusian (Taraskievica orthography)',
'be-x-old':'Belarusian (Taraskievica orthography)',
'bxr':'Russia Buriat',
'bg':'Bulgarian',
'ruq-cyrl':'Megleno-Romanian (Cyrillic script)',
'os':'Ossetic',
'inh':'Ingush',
'sjd':'Kildin Sami',
'kv':'Komi',
'krc':'Karachay-Balkar',
'kum':'Kumyk',
'crh-cyrl':'Crimean Tatar (Cyrillic script)',
'ky':'Kyrgyz',
'mrj':'Western Mari',
'kk':'Kazakh',
'lbe':'Lak',
'kk-cyrl':'Kazakh (Cyrillic script)',
'kk-kz':'Kazakh (Kazakhstan)',
'lez':'Lezghian',
'mk':'Macedonian',
'mdf':'Moksha',
'mn':'Mongolian',
'mo':'Moldovan',
'gld':'Nanai',
'nog':'Nogai',
'ce':'Chechen',
'mhr':'Eastern Mari',
'koi':'Komi-Permyak',
'rue':'Rusyn',
'rsk':'Pannonian Rusyn',
'ru':'Russian',
'sah':'Yakut',
'sty':'Siberian Tatar',
'cu':'Church Slavic',
'sr-ec':'Serbian (Cyrillic script)',
'sr':'Serbian',
'sh-cyrl':'Serbo-Croatian (Cyrillic script)',
'tt-cyrl':'Tatar (Cyrillic script)',
'tt':'Tatar',
'tly-cyrl':'Talysh (Cyrillic script)',
'tg':'Tajik',
'tg-cyrl':'Tajik (Cyrillic script)',
'tyv':'Tuvinian',
'udm':'Udmurt',
'uz-cyrl':'Uzbek (Cyrillic script)',
'uk':'Ukrainian',
'kjh':'Khakas',
'xal':'Kalmyk',
'cv':'Chuvash',
'myv':'Erzya',
'xmf':'Mingrelian',
'ka':'Georgian',
'hyw':'Western Armenian',
'hy':'Armenian',
'anp':'Angika',
'awa':'Awadhi',
'ks-deva':'Kashmiri (Devanagari script)',
'ks':'Kashmiri',
'gom-deva':'Goan Konkani (Devanagari script)',
'gom':'Goan Konkani',
'dty':'Doteli',
'bho':'Bhojpuri',
'new':'Newari',
'ne':'Nepali',
'pi':'Pali',
'bh':'Bhojpuri',
'mag':'Magahi',
'mr':'Marathi',
'rwr':'Marwari (India)',
'mai':'Maithili',
'sa':'Sanskrit',
'hi':'Hindi',
'as':'Assamese',
'bn':'Bangla',
'bpy':'Bishnupriya',
'pa':'Punjabi',
'syl':'Sylheti',
'gu':'Gujarati',
'or':'Odia',
'ta':'Tamil',
'te':'Telugu',
'kn':'Kannada',
'tcy':'Tulu',
'ml':'Malayalam',
'si':'Sinhala',
'dz':'Dzongkha',
'bo':'Tibetan',
'mni':'Manipuri',
'ksw':'S\'gaw Karen',
'blk':'Pa\'O',
'kjp':'Eastern Pwo',
'shn':'Shan',
'my':'Burmese',
'mnw':'Mon',
'rki':'Arakanese',
'km':'Khmer',
'lo':'Lao',
'th':'Thai',
'tdd':'Tai Nuea',
'nod':'Northern Thai',
'bug':'Buginese',
'ban-bali':'Balinese (Balinese script)',
'sat':'Santali',
'chr':'Cherokee',
'ike-cans':'Eastern Canadian (Aboriginal syllabics)',
'iu':'Inuktitut',
'got':'Gothic',
'tzm':'Central Atlas Tamazight',
'zgh':'Standard Moroccan Tamazight',
'shi-tfng':'Tachelhit (Tifinagh script)',
'ti':'Tigrinya',
'am':'Amharic',
'ii':'Sichuan Yi',
'ko-kp':'Korean (North Korea)',
'ko':'Korean',
'ja':'Japanese',
'ryu':'Okinawan',
'zh':'Chinese',
'zh-cn':'Chinese (China)',
'zh-tw':'Chinese (Taiwan)',
'zh-sg':'Chinese (Singapore)',
'zh-mo':'Chinese (Macau)',
'zh-hant':'Traditional Chinese',
'zh-hans':'Simplified Chinese',
'zh-hk':'Chinese (Hong Kong)',
'zh-my':'Chinese (Malaysia)',
'wuu-hant':'Wu Chinese (Traditional)',
'wuu':'Wu Chinese',
'wuu-hans':'Wu Chinese (Simplified)',
'hak':'Hakka Chinese',
'lzh':'Literary Chinese',
'zh-classical':'Classical Chinese',
'hsn':'Xiang Chinese',
'yue':'Cantonese',
'zh-yue':'Cantonese',
'yue-hant':'Cantonese (Traditional)',
'yue-hans':'Cantonese (Simplified)',
'gan':'Gan Chinese',
'gan-hant':'Gan (Traditional)',
'gan-hans':'Gan (Simplified)',
'nan-hani':'Min Nan (Hanji)',
'yi':'Yiddish',
'he':'Hebrew',
'ur':'Urdu',
'ary':'Moroccan Arabic',
'ar':'Arabic',
'acm':'Iraqi Arabic',
'bqi':'Bakhtiari',
'ms-arab':'Malay (Jawi script)',
'ps':'Pashto',
'pnb':'Western Punjabi',
'aeb-arab':'Tunisian Arabic (Arabic script)',
'aeb':'Tunisian Arabic',
'azb':'South Azerbaijani',
'arq':'Algerian Arabic',
'bcc':'Southern Balochi',
'bgn':'Western Balochi',
'skr':'Saraiki',
'fa':'Persian',
'skr-arab':'Saraiki (Arabic script)',
'sd':'Sindhi',
'kk-arab':'Kazakh (Arabic script)',
'kk-cn':'Kazakh (China)',
'ku-arab':'Kurdish (Arabic script)',
'ks-arab':'Kashmiri (Arabic script)',
'khw':'Khowar',
'ckb':'Central Kurdish',
'sdh':'Southern Kurdish',
'glk':'Gilaki',
'ota':'Ottoman Turkish',
'lrc':'Northern Luri',
'luz':'Southern Luri',
'lki':'Laki',
'mzn':'Mazanderani',
'arz':'Egyptian Arabic',
'hno':'Northern Hindko',
'ug-arab':'Uyghur (Arabic script)',
'ug':'Uyghur',
'arc':'Aramaic',
'dv':'Divehi',
'nqo':'N\'Ko',
'zam' : 'Zam',
'cbk' : 'Chavacano'
}
with open('lang.txt', 'r') as file:
bag = []
for line in file:
input_str = line.strip()
language_code = input_str.split(':')[0].strip()
language_codes = input_str.split(':')[1].split(',')
language_codes = [language_code] + [code.strip() for code in language_codes]
bag.append(language_codes)
bag_sorted = sorted(bag, key=len, reverse=True)
for i in bag_sorted:
for index, element in enumerate(i):
if index == len(i) - 1:
print(lang_dict[element])
else:
print(lang_dict[element],end=" -> ")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment