Last active
November 14, 2019 14:12
-
-
Save flaviussn/14d7c3859dd15130155c05e6aa226da4 to your computer and use it in GitHub Desktop.
From language name to ISO code
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pycountry | |
def get_code(language): | |
lang = pycountry.languages.get(name=language) | |
# print(lang) | |
if lang is None: | |
return language+ " doesn't exist" | |
try: | |
return lang.alpha_2 | |
except: | |
pass | |
try: | |
return lang.alpha_3 | |
except: | |
pass | |
return language + "has no alpha_2 or alpha_3" | |
bert_languages = ["Afrikaans","Albanian","Arabic","Aragonese","Armenian","Asturian","Azerbaijani","Bashkir","Basque","Bavarian","Belarusian","Bengali","Bishnupriya","Bosnian","Breton","Bulgarian","Burmese","Catalan","Cebuano","Chechen","Chinese (Simplified)","Chinese (Traditional)","Chuvash","Croatian","Czech","Danish","Dutch","English","Estonian","Finnish","French","Galician","Georgian","German","Modern Greek (1453-)","Gujarati","Haitian","Hebrew","Hindi","Hungarian","Icelandic","Ido","Indonesian","Irish","Italian","Japanese","Javanese","Kannada","Kazakh","Kirghiz","Korean","Latin","Latvian","Lithuanian","Lombard","Low German","Luxembourgish","Macedonian","Malagasy","Malay (macrolanguage)","Malayalam","Marathi","Minangkabau","Nepali (macrolanguage)","Newari","Norwegian Bokmål","Norwegian Nynorsk","Occitan (post 1500)","Persian","Piemontese","Polish","Portuguese","Panjabi","Romanian","Russian","Scots","Serbian","Serbo-Croatian","Sicilian","Slovak","Slovenian","South Azerbaijani","Spanish","Sundanese","Swahili (macrolanguage)","Swedish","Tagalog","Tajik","Tamil","Tatar","Telugu","Turkish","Ukrainian","Urdu","Uzbek","Vietnamese","Volapük","Waray (Philippines)","Welsh","Western Frisian","Western Panjabi","Yoruba"] | |
result = [get_code(l) for l in bert_languages] | |
for r,lang in zip(result,bert_languages): | |
print(r," --- ",lang) | |
print(result) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment