Skip to content

Instantly share code, notes, and snippets.

@ebraminio
Created November 2, 2013 15:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ebraminio/7280220 to your computer and use it in GitHub Desktop.
Save ebraminio/7280220 to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
# Genereated from https://raw.github.com/wikimedia/pywikibot-core/master/pywikibot/userinterfaces/transliteration.py
import json
def dictgen():
dict = {}
for char in u"ÀÁÂẦẤẪẨẬÃĀĂẰẮẴẶẲȦǠẠḀȂĄǍẢ":
dict[char] = u"A"
for char in u"ȀǞ":
dict[char] = u"Ä"
dict[u"Ǻ"] = u"Å"
dict[u"Ä"] = u"Ae"
dict[u"Å"] = u"Aa"
for char in u"àáâầấẫẩậãāăằắẵặẳȧǡạḁȃąǎảẚ":
dict[char] = u"a"
for char in u"ȁǟ":
dict[char] = u"ä"
dict[u"ǻ"] = u"å"
dict[u"ä"] = u"ae"
dict[u"å"] = u"aa"
for char in u"ḂḄḆƁƂ":
dict[char] = u"B"
for char in u"ḃḅḇƀɓƃ":
dict[char] = u"b"
for char in u"ĆĈĊÇČƇ":
dict[char] = u"C"
for char in u"ćĉċçčƈȼ":
dict[char] = u"c"
dict[u"Ḉ"] = u"Ç"
dict[u"ḉ"] = u"ç"
dict[u"Ð"] = u"Dh"
dict[u"ð"] = u"dh"
for char in u"ĎḊḌḎḐḒĐƉƊƋ":
dict[char] = u"D"
for char in u"ďḋḍḏḑḓđɖɗƌ":
dict[char] = u"d"
for char in u"ÈȄÉÊḚËĒḔḖĔĖẸE̩ȆȨḜĘĚẼḘẺ":
dict[char] = u"E"
for char in u"ỀẾỄỆỂ":
dict[char] = u"Ê"
for char in u"èȅéêḛëēḕḗĕėẹe̩ȇȩḝęěẽḙẻ":
dict[char] = u"e"
for char in u"ềếễệể":
dict[char] = u"ê"
for char in u"ḞƑ":
dict[char] = u"F"
for char in u"ḟƒ":
dict[char] = u"f"
for char in u"ǴḠĞĠĢǦǤƓ":
dict[char] = u"G"
for char in u"ǵḡğġģǧǥɠ":
dict[char] = u"g"
dict[u"Ĝ"] = u"Gx"
dict[u"ĝ"] = u"gx"
for char in u"ḢḤḦȞḨḪH̱ĦǶ":
dict[char] = u"H"
for char in u"ḣḥḧȟḩḫ̱ẖħƕ":
dict[char] = u"h"
for char in u"IÌȈÍÎĨḬÏḮĪĬȊĮǏİỊỈƗ":
dict[char] = u"I"
for char in u"ıìȉíîĩḭïḯīĭȋįǐiịỉɨ":
dict[char] = u"i"
for char in u"ĴJ":
dict[char] = u"J"
for char in u"ɟĵ̌ǰ":
dict[char] = u"j"
for char in u"ḰǨĶḲḴƘ":
dict[char] = u"K"
for char in u"ḱǩķḳḵƙ":
dict[char] = u"k"
for char in u"ĹĻĽḶḸḺḼȽŁ":
dict[char] = u"L"
for char in u"ĺļľḷḹḻḽƚłɫ":
dict[char] = u"l"
for char in u"ḾṀṂ":
dict[char] = u"M"
for char in u"ḿṁṃɱ":
dict[char] = u"m"
for char in u"ǸŃÑŅŇṄṆṈṊŊƝɲȠ":
dict[char] = u"N"
for char in u"ǹńñņňṅṇṉṋŋɲƞ":
dict[char] = u"n"
for char in u"ÒÓÔÕṌṎȬÖŌṐṒŎǑȮȰỌǪǬƠỜỚỠỢỞỎƟØǾ":
dict[char] = u"O"
for char in u"òóôõṍṏȭöōṑṓŏǒȯȱọǫǭơờớỡợởỏɵøǿ":
dict[char] = u"o"
for char in u"ȌŐȪ":
dict[char] = u"Ö"
for char in u"ȍőȫ":
dict[char] = u"ö"
for char in u"ỒỐỖỘỔȎ":
dict[char] = u"Ô"
for char in u"ồốỗộổȏ":
dict[char] = u"ô"
for char in u"ṔṖƤ":
dict[char] = u"P"
for char in u"ṕṗƥ":
dict[char] = u"p"
dict[u"ᵽ"] = u"q"
for char in u"ȐŔŖŘȒṘṚṜṞ":
dict[char] = u"R"
for char in u"ȑŕŗřȓṙṛṝṟɽ":
dict[char] = u"r"
for char in u"ŚṤŞȘŠṦṠṢṨ":
dict[char] = u"S"
for char in u"śṥşșšṧṡṣṩȿ":
dict[char] = u"s"
dict[u"Ŝ"] = u"Sx"
dict[u"ŝ"] = u"sx"
for char in u"ŢȚŤṪṬṮṰŦƬƮ":
dict[char] = u"T"
for char in u"ţțťṫṭṯṱŧȾƭʈ":
dict[char] = u"t"
for char in u"ÙÚŨṸṴÜṲŪṺŬỤŮŲǓṶỦƯỮỰỬ":
dict[char] = u"U"
for char in u"ùúũṹṵüṳūṻŭụůųǔṷủưữựửʉ":
dict[char] = u"u"
for char in u"ȔŰǛǗǕǙ":
dict[char] = u"Ü"
for char in u"ȕűǜǘǖǚ":
dict[char] = u"ü"
dict[u"Û"] = u"Ux"
dict[u"û"] = u"ux"
dict[u"Ȗ"] = u"Û"
dict[u"ȗ"] = u"û"
dict[u"Ừ"] = u"Ù"
dict[u"ừ"] = u"ù"
dict[u"Ứ"] = u"Ú"
dict[u"ứ"] = u"ú"
for char in u"ṼṾ":
dict[char] = u"V"
for char in u"ṽṿ":
dict[char] = u"v"
for char in u"ẀẂŴẄẆẈ":
dict[char] = u"W"
for char in u"ẁẃŵẅẇẉ":
dict[char] = u"w"
for char in u"ẊẌ":
dict[char] = u"X"
for char in u"ẋẍ":
dict[char] = u"x"
for char in u"ỲÝŶŸỸȲẎỴỶƳ":
dict[char] = u"Y"
for char in u"ỳýŷÿỹȳẏỵỷƴ":
dict[char] = u"y"
for char in u"ŹẐŻẒŽẔƵȤ":
dict[char] = u"Z"
for char in u"źẑżẓžẕƶȥ":
dict[char] = u"z"
dict[u"ɀ"] = u"zv"
# Latin: extended Latin alphabet
dict[u"ɑ"] = u"a"
for char in u"ÆǼǢ":
dict[char] = u"AE"
for char in u"æǽǣ":
dict[char] = u"ae"
dict[u"Ð"] = u"Dh"
dict[u"ð"] = u"dh"
for char in u"ƎƏƐ":
dict[char] = u"E"
for char in u"ǝəɛ":
dict[char] = u"e"
for char in u"ƔƢ":
dict[char] = u"G"
for char in u"ᵷɣƣᵹ":
dict[char] = u"g"
dict[u"Ƅ"] = u"H"
dict[u"ƅ"] = u"h"
dict[u"Ƕ"] = u"Wh"
dict[u"ƕ"] = u"wh"
dict[u"Ɩ"] = u"I"
dict[u"ɩ"] = u"i"
dict[u"Ŋ"] = u"Ng"
dict[u"ŋ"] = u"ng"
dict[u"Œ"] = u"OE"
dict[u"œ"] = u"oe"
dict[u"Ɔ"] = u"O"
dict[u"ɔ"] = u"o"
dict[u"Ȣ"] = u"Ou"
dict[u"ȣ"] = u"ou"
dict[u"Ƽ"] = u"Q"
for char in u"ĸƽ":
dict[char] = u"q"
dict[u"ȹ"] = u"qp"
dict[u""] = u"r"
dict[u"ſ"] = u"s"
dict[u"ß"] = u"ss"
dict[u"Ʃ"] = u"Sh"
for char in u"ʃᶋ":
dict[char] = u"sh"
dict[u"Ʉ"] = u"U"
dict[u"ʉ"] = u"u"
dict[u"Ʌ"] = u"V"
dict[u"ʌ"] = u"v"
for char in u"ƜǷ":
dict[char] = u"W"
for char in u"ɯƿ":
dict[char] = u"w"
dict[u"Ȝ"] = u"Y"
dict[u"ȝ"] = u"y"
dict[u"IJ"] = u"IJ"
dict[u"ij"] = u"ij"
dict[u"Ƨ"] = u"Z"
for char in u"ʮƨ":
dict[char] = u"z"
dict[u"Ʒ"] = u"Zh"
dict[u"ʒ"] = u"zh"
dict[u"Ǯ"] = u"Dzh"
dict[u"ǯ"] = u"dzh"
for char in u"ƸƹʔˀɁɂ":
dict[char] = u"'"
for char in u"Þ":
dict[char] = u"Th"
for char in u"þ":
dict[char] = u"th"
for char in u"Cʗǃ":
dict[char] = u"!"
#Punctuation and typography
for char in u"«»“”„¨":
dict[char] = u'"'
for char in u"‘’′":
dict[char] = u"'"
dict[u"•"] = u"*"
dict[u"@"] = u"(at)"
dict[u"¤"] = u"$"
dict[u"¢"] = u"c"
dict[u"€"] = u"E"
dict[u"£"] = u"L"
dict[u"¥"] = u"yen"
dict[u"†"] = u"+"
dict[u"‡"] = u"++"
dict[u"°"] = u":"
dict[u"¡"] = u"!"
dict[u"¿"] = u"?"
dict[u"‰"] = u"o/oo"
dict[u"‱"] = u"o/ooo"
for char in u"¶§":
dict[char] = u">"
for char in u"…":
dict[char] = u"..."
for char in u"‒–—―":
dict[char] = u"-"
for char in u"·":
dict[char] = u" "
dict[u"¦"] = u"|"
dict[u"⁂"] = u"***"
dict[u"◊"] = u"<>"
dict[u"‽"] = u"?!"
dict[u"؟"] = u";-)"
dict[u"¹"] = u"1"
dict[u"²"] = u"2"
dict[u"³"] = u"3"
# Cyrillic
dict.update({u"А": u"A", u"а": u"a", u"Б": u"B", u"б": u"b",
u"В": u"V", u"в": u"v", u"Г": u"G", u"г": u"g",
u"Д": u"D", u"д": u"d", u"Е": u"E", u"е": u"e",
u"Ж": u"Zh", u"ж": u"zh", u"З": u"Z", u"з": u"z",
u"И": u"I", u"и": u"i", u"Й": u"J", u"й": u"j",
u"К": u"K", u"к": u"k", u"Л": u"L", u"л": u"l",
u"М": u"M", u"м": u"m", u"Н": u"N", u"н": u"n",
u"О": u"O", u"о": u"o", u"П": u"P", u"п": u"p",
u"Р": u"R", u"р": u"r", u"С": u"S", u"с": u"s",
u"Т": u"T", u"т": u"t", u"У": u"U", u"у": u"u",
u"Ф": u"F", u"ф": u"f", u"х": u"kh", u"Ц": u"C",
u"ц": u"c", u"Ч": u"Ch", u"ч": u"ch", u"Ш": u"Sh",
u"ш": u"sh", u"Щ": u"Shch", u"щ": u"shch", u"Ь": u"'",
u"ь": "'", u"Ъ": u'"', u"ъ": '"', u"Ю": u"Yu",
u"ю": u"yu", u"Я": u"Ya", u"я": u"ya", u"Х": u"Kh",
u"Χ": u"Kh"})
# Additional Cyrillic letters, most occuring in only one or a few languages
dict.update({u"Ы": u"Y", u"ы": u"y", u"Ё": u"Ë", u"ё": u"ë",
u"Э": u"È", u"Ѐ": u"È", u"э": u"è", u"ѐ": u"è",
u"І": u"I", u"і": u"i", u"Ї": u"Ji", u"ї": u"ji",
u"Є": u"Je", u"є": u"je", u"Ґ": u"G", u"Ҝ": u"G",
u"ґ": u"g", u"ҝ": u"g", u"Ђ": u"Dj", u"ђ": u"dj",
u"Ӣ": u"Y", u"ӣ": u"y", u"Љ": u"Lj", u"љ": u"lj",
u"Њ": u"Nj", u"њ": u"nj", u"Ћ": u"Cj", u"ћ": u"cj",
u"Җ": u"Zhj", u"җ": u"zhj", u"Ѓ": u"Gj", u"ѓ": u"gj",
u"Ќ": u"Kj", u"ќ": u"kj", u"Ӣ": u"Ii", u"ӣ": u"ii",
u"Ӯ": u"U", u"ӯ": u"u", u"Ҳ": u"H", u"ҳ": u"h",
u"Ҷ": u"Dz", u"ҷ": u"dz", u"Ө": u"Ô", u"Ӫ": u"Ô",
u"ө": u"ô", u"ӫ": u"ô", u"Ү": u"Y", u"ү": u"y", u"Һ": u"H",
u"һ": u"h", u"Ә": u"AE", u"Ӕ": u"AE", u"ә": u"ae",
u"Ӛ": u"Ë", u"Ӭ": u"Ë", u"ӛ": u"ë", u"ӭ": u"ë", u"Җ": u"Zhj",
u"җ": u"zhj", u"Ұ": u"U", u"ұ": u"u", u"ў": u"ù", u"Ў": u"Ù",
u"ѝ": u"ì", u"Ѝ": u"Ì", u"Ӑ": u"A", u"ă": u"a", u"Ӓ": u"Ä",
u"ҿ": u"ä", u"Ҽ": u"Ts", u"Ҿ": u"Ts", u"ҽ": u"ts", u"ҿ": u"ts",
u"Ҙ": u"Dh", u"ҙ": u"dh", u"Ӏ": u"", u"ӏ": u"", u"Ӆ": u"L",
u"ӆ": u"l", u"Ӎ": u"M", u"ӎ": u"m", u"Ӧ": u"Ö", u"ӧ": u"ö",
u"Ҩ": u"u", u"ҩ": u"u", u"Ҧ": u"Ph", u"ҧ": u"ph", u"Ҏ": u"R",
u"ҏ": u"r", u"Ҫ": u"Th", u"ҫ": u"th", u"Ҭ": u"T", u"ҭ": u"t",
u"Ӯ": u"Û", u"ӯ": u"û", u"Ұ": u"U", u"Ӹ": u"U", u"ұ": u"u",
u"ӹ": u"u", u"Ҵ": u"Tts", u"ҵ": u"tts", u"Ӵ": u"Ch", u"ӵ": u"ch"})
for char in u"ЈӤҊ":
dict[char] = u"J"
for char in u"јӥҋ":
dict[char] = u"j"
for char in u"ЏӁӜҶ":
dict[char] = u"Dzh"
for char in u"џӂӝҷ":
dict[char] = u"dzh"
for char in u"ЅӞӠӋҸ":
dict[char] = u"Dz"
for char in u"ѕӟӡӌҹ":
dict[char] = u"dz"
for char in u"ҒӶҔ":
dict[char] = u"G"
for char in u"ғӷҕ":
dict[char] = u"g"
for char in u"ҚҞҠӃ":
dict[char] = u"Q"
for char in u"қҟҡӄ":
dict[char] = u"q"
for char in u"ҢҤӉӇ":
dict[char] = u"Ng"
for char in u"ңҥӊӈ":
dict[char] = u"ng"
for char in u"ӖѢҌ":
dict[char] = u"E"
for char in u"ӗѣҍ":
dict[char] = u"e"
for char in u"ӲӰҮ":
dict[char] = u"Ü"
for char in u"ӳӱү":
dict[char] = u"ü"
# Archaic Cyrillic letters
dict.update({u"Ѹ": u"Ou", u"ѹ": u"ou", u"Ѡ": u"O", u"Ѻ": u"O", u"ѡ": u"o",
u"ѻ": u"o", u"Ѿ": u"Ot", u"ѿ": u"ot", u"Ѣ": u"E", u"ѣ": u"e",
u"Ѥ": u"Ei", u"Ѧ": u"Ei", u"ѥ": u"ei", u"ѧ": u"ei", u"Ѫ": u"Ai",
u"ѫ": u"ai", u"Ѯ": u"X", u"ѯ": u"x", u"Ѱ": u"Ps", u"ѱ": u"ps",
u"Ѳ": u"Th", u"ѳ": u"th", u"Ѵ": u"Ü", u"Ѷ": u"Ü", u"ѵ": u"ü"})
# Hebrew alphabet
for char in u"אע":
dict[char] = u"'"
dict[u"ב"] = u"b"
dict[u"ג"] = u"g"
dict[u"ד"] = u"d"
dict[u"ה"] = u"h"
dict[u"ו"] = u"v"
dict[u"ז"] = u"z"
dict[u"ח"] = u"kh"
dict[u"ט"] = u"t"
dict[u"י"] = u"y"
for char in u"ךכ":
dict[char] = u"k"
dict[u"ל"] = u"l"
for char in u"םמ":
dict[char] = u"m"
for char in u"ןנ":
dict[char] = u"n"
dict[u"ס"] = u"s"
for char in u"ףפ":
dict[char] = u"ph"
for char in u"ץצ":
dict[char] = u"ts"
dict[u"ק"] = u"q"
dict[u"ר"] = u"r"
dict[u"ש"] = u"sh"
dict[u"ת"] = u"th"
# Arab alphabet
for char in u"اﺍﺎ":
dict[char] = u"a"
for char in u"بﺏﺐﺒﺑ":
dict[char] = u"b"
for char in u"تﺕﺖﺘﺗ":
dict[char] = u"t"
for char in u"ثﺙﺚﺜﺛ":
dict[char] = u"th"
for char in u"جﺝﺞﺠﺟ":
dict[char] = u"g"
for char in u"حﺡﺢﺤﺣ":
dict[char] = u"h"
for char in u"خﺥﺦﺨﺧ":
dict[char] = u"kh"
for char in u"دﺩﺪ":
dict[char] = u"d"
for char in u"ذﺫﺬ":
dict[char] = u"dh"
for char in u"رﺭﺮ":
dict[char] = u"r"
for char in u"زﺯﺰ":
dict[char] = u"z"
for char in u"سﺱﺲﺴﺳ":
dict[char] = u"s"
for char in u"شﺵﺶﺸﺷ":
dict[char] = u"sh"
for char in u"صﺹﺺﺼﺻ":
dict[char] = u"s"
for char in u"ضﺽﺾﻀﺿ":
dict[char] = u"d"
for char in u"طﻁﻂﻄﻃ":
dict[char] = u"t"
for char in u"ظﻅﻆﻈﻇ":
dict[char] = u"z"
for char in u"عﻉﻊﻌﻋ":
dict[char] = u"'"
for char in u"غﻍﻎﻐﻏ":
dict[char] = u"gh"
for char in u"فﻑﻒﻔﻓ":
dict[char] = u"f"
for char in u"قﻕﻖﻘﻗ":
dict[char] = u"q"
for char in u"كﻙﻚﻜﻛک":
dict[char] = u"k"
for char in u"لﻝﻞﻠﻟ":
dict[char] = u"l"
for char in u"مﻡﻢﻤﻣ":
dict[char] = u"m"
for char in u"نﻥﻦﻨﻧ":
dict[char] = u"n"
for char in u"هﻩﻪﻬﻫ":
dict[char] = u"h"
for char in u"وﻭﻮ":
dict[char] = u"w"
for char in u"یيﻱﻲﻴﻳ":
dict[char] = u"y"
# Arabic - additional letters, modified letters and ligatures
dict[u"ﺀ"] = u"'"
for char in u"آﺁﺂ":
dict[char] = u"'a"
for char in u"ةﺓﺔ":
dict[char] = u"th"
for char in u"ىﻯﻰ":
dict[char] = u"á"
for char in u"یﯼﯽﯿﯾ":
dict[char] = u"y"
dict[u"؟"] = u"?"
# Arabic - ligatures
for char in u"ﻻﻼ":
dict[char] = u"la"
dict[u"ﷲ"] = u"llah"
for char in u"إأ":
dict[char] = u"a'"
dict[u"ؤ"] = u"w'"
dict[u"ئ"] = u"y'"
for char in u"◌◌":
dict[char] = u"" # indicates absence of vowels
# Arabic vowels
dict[u"◌"] = u"a"
dict[u"◌"] = u"u"
dict[u"◌"] = u"i"
dict[u"◌"] = u"a"
dict[u"◌"] = u"ay"
dict[u"◌"] = u"ay"
dict[u"◌"] = u"u"
dict[u"◌"] = u"iy"
# Arab numerals
for char in u"٠۰":
dict[char] = u"0"
for char in u"١۱":
dict[char] = u"1"
for char in u"٢۲":
dict[char] = u"2"
for char in u"٣۳":
dict[char] = u"3"
for char in u"٤۴":
dict[char] = u"4"
for char in u"٥۵":
dict[char] = u"5"
for char in u"٦۶":
dict[char] = u"6"
for char in u"٧۷":
dict[char] = u"7"
for char in u"٨۸":
dict[char] = u"8"
for char in u"٩۹":
dict[char] = u"9"
# Perso-Arabic
for char in u"پﭙﭙپ":
dict[char] = u"p"
for char in u"چچچچ":
dict[char] = u"ch"
for char in u"ژژ":
dict[char] = u"zh"
for char in u"گﮔﮕﮓ":
dict[char] = u"g"
# Greek
dict.update({u"Α": u"A", u"α": u"a", u"Β": u"B", u"β": u"b", u"Γ": u"G",
u"γ": u"g", u"Δ": u"D", u"δ": u"d", u"Ε": u"E", u"ε": u"e",
u"Ζ": u"Z", u"ζ": u"z", u"Η": u"I", u"η": u"i", u"θ": u"th",
u"Θ": u"Th", u"Ι": u"I", u"ι": u"i", u"Κ": u"K", u"κ": u"k",
u"Λ": u"L", u"λ": u"l", u"Μ": u"M", u"μ": u"m", u"Ν": u"N",
u"ν": u"n", u"Ξ": u"X", u"ξ": u"x", u"Ο": u"O", u"ο": u"o",
u"Π": u"P", u"π": u"p", u"Ρ": u"R", u"ρ": u"r", u"Σ": u"S",
u"σ": u"s", u"ς": u"s", u"Τ": u"T", u"τ": u"t", u"Υ": u"Y",
u"υ": u"y", u"Φ": u"F", u"φ": u"f", u"Ψ": u"Ps", u"ψ": u"ps",
u"Ω": u"O", u"ω": u"o", u"ϗ": u"&", u"Ϛ": u"St", u"ϛ": u"st",
u"Ϙ": u"Q", u"Ϟ": u"Q", u"ϙ": u"q", u"ϟ": u"q", u"Ϻ": u"S",
u"ϻ": u"s", u"Ϡ": u"Ss", u"ϡ": u"ss", u"Ϸ": u"Sh", u"ϸ": u"sh",
u"·": u":", u"Ά": u"Á", u"ά": u"á", u"Έ": u"É", u"Ή": u"É",
u"έ": u"é", u"ή": u"é", u"Ί": u"Í", u"ί": u"í", u"Ϊ": u"Ï",
u"ϊ": u"ï", u"ΐ": u"ï", u"Ό": u"Ó", u"ό": u"ó", u"Ύ": u"Ý",
u"ύ": u"ý", u"Ϋ": u"Y", u"ϋ": u"ÿ", u"ΰ": u"ÿ", u"Ώ": u"Ó",
u"ώ": u"ó"})
# Japanese (katakana and hiragana)
for char in u"アァあ":
dict[char] = u"a"
for char in u"イィい":
dict[char] = u"i"
for char in u"ウう":
dict[char] = u"u"
for char in u"エェえ":
dict[char] = u"e"
for char in u"オォお":
dict[char] = u"o"
for char in u"ャや":
dict[char] = u"ya"
for char in u"ュゆ":
dict[char] = u"yu"
for char in u"ョよ":
dict[char] = u"yo"
for char in u"カか":
dict[char] = u"ka"
for char in u"キき":
dict[char] = u"ki"
for char in u"クく":
dict[char] = u"ku"
for char in u"ケけ":
dict[char] = u"ke"
for char in u"コこ":
dict[char] = u"ko"
for char in u"サさ":
dict[char] = u"sa"
for char in u"シし":
dict[char] = u"shi"
for char in u"スす":
dict[char] = u"su"
for char in u"セせ":
dict[char] = u"se"
for char in u"ソそ":
dict[char] = u"so"
for char in u"タた":
dict[char] = u"ta"
for char in u"チち":
dict[char] = u"chi"
for char in u"ツつ":
dict[char] = u"tsu"
for char in u"テて":
dict[char] = u"te"
for char in u"トと":
dict[char] = u"to"
for char in u"ナな":
dict[char] = u"na"
for char in u"ニに":
dict[char] = u"ni"
for char in u"ヌぬ":
dict[char] = u"nu"
for char in u"ネね":
dict[char] = u"ne"
for char in u"ノの":
dict[char] = u"no"
for char in u"ハは":
dict[char] = u"ha"
for char in u"ヒひ":
dict[char] = u"hi"
for char in u"フふ":
dict[char] = u"fu"
for char in u"ヘへ":
dict[char] = u"he"
for char in u"ホほ":
dict[char] = u"ho"
for char in u"マま":
dict[char] = u"ma"
for char in u"ミみ":
dict[char] = u"mi"
for char in u"ムむ":
dict[char] = u"mu"
for char in u"メめ":
dict[char] = u"me"
for char in u"モも":
dict[char] = u"mo"
for char in u"ラら":
dict[char] = u"ra"
for char in u"リり":
dict[char] = u"ri"
for char in u"ルる":
dict[char] = u"ru"
for char in u"レれ":
dict[char] = u"re"
for char in u"ロろ":
dict[char] = u"ro"
for char in u"ワわ":
dict[char] = u"wa"
for char in u"ヰゐ":
dict[char] = u"wi"
for char in u"ヱゑ":
dict[char] = u"we"
for char in u"ヲを":
dict[char] = u"wo"
for char in u"ンん":
dict[char] = u"n"
for char in u"ガが":
dict[char] = u"ga"
for char in u"ギぎ":
dict[char] = u"gi"
for char in u"グぐ":
dict[char] = u"gu"
for char in u"ゲげ":
dict[char] = u"ge"
for char in u"ゴご":
dict[char] = u"go"
for char in u"ザざ":
dict[char] = u"za"
for char in u"ジじ":
dict[char] = u"ji"
for char in u"ズず":
dict[char] = u"zu"
for char in u"ゼぜ":
dict[char] = u"ze"
for char in u"ゾぞ":
dict[char] = u"zo"
for char in u"ダだ":
dict[char] = u"da"
for char in u"ヂぢ":
dict[char] = u"dji"
for char in u"ヅづ":
dict[char] = u"dzu"
for char in u"デで":
dict[char] = u"de"
for char in u"ドど":
dict[char] = u"do"
for char in u"バば":
dict[char] = u"ba"
for char in u"ビび":
dict[char] = u"bi"
for char in u"ブぶ":
dict[char] = u"bu"
for char in u"ベべ":
dict[char] = u"be"
for char in u"ボぼ":
dict[char] = u"bo"
for char in u"パぱ":
dict[char] = u"pa"
for char in u"ピぴ":
dict[char] = u"pi"
for char in u"プぷ":
dict[char] = u"pu"
for char in u"ペぺ":
dict[char] = u"pe"
for char in u"ポぽ":
dict[char] = u"po"
for char in u"ヴゔ":
dict[char] = u"vu"
dict[u"ヷ"] = u"va"
dict[u"ヸ"] = u"vi"
dict[u"ヹ"] = u"ve"
dict[u"ヺ"] = u"vo"
# Japanese and Chinese punctuation and typography
for char in u"・·":
dict[char] = u" "
for char in u"〃『』《》":
dict[char] = u'"'
for char in u"「」〈〉〘〙〚〛":
dict[char] = u"'"
for char in u"(〔":
dict[char] = u"("
for char in u")〕":
dict[char] = u")"
for char in u"[【〖":
dict[char] = u"["
for char in u"]】〗":
dict[char] = u"]"
for char in u"{":
dict[char] = u"{"
for char in u"}":
dict[char] = u"}"
for char in u"っ":
dict[char] = u":"
for char in u"ー":
dict[char] = u"h"
for char in u"゛":
dict[char] = u"'"
for char in u"゜":
dict[char] = u"p"
for char in u"。":
dict[char] = u". "
for char in u"、":
dict[char] = u", "
for char in u"・":
dict[char] = u" "
for char in u"〆":
dict[char] = u"shime"
for char in u"〜":
dict[char] = u"-"
for char in u"…":
dict[char] = u"..."
for char in u"‥":
dict[char] = u".."
for char in u"ヶ":
dict[char] = u"months"
for char in u"•◦":
dict[char] = u"_"
for char in u"※*":
dict[char] = u"*"
for char in u"Ⓧ":
dict[char] = u"(X)"
for char in u"Ⓨ":
dict[char] = u"(Y)"
for char in u"!":
dict[char] = u"!"
for char in u"?":
dict[char] = u"?"
for char in u";":
dict[char] = u";"
for char in u":":
dict[char] = u":"
for char in u"。":
dict[char] = u"."
for char in u",、":
dict[char] = u","
# Georgian
for char in u"ა":
dict[char] = u"a"
for char in u"ბ":
dict[char] = u"b"
for char in u"გ":
dict[char] = u"g"
for char in u"დ":
dict[char] = u"d"
for char in u"ეჱ":
dict[char] = u"e"
for char in u"ვ":
dict[char] = u"v"
for char in u"ზ":
dict[char] = u"z"
for char in u"თ":
dict[char] = u"th"
for char in u"ი":
dict[char] = u"i"
for char in u"კ":
dict[char] = u"k"
for char in u"ლ":
dict[char] = u"l"
for char in u"მ":
dict[char] = u"m"
for char in u"ნ":
dict[char] = u"n"
for char in u"ო":
dict[char] = u"o"
for char in u"პ":
dict[char] = u"p"
for char in u"ჟ":
dict[char] = u"zh"
for char in u"რ":
dict[char] = u"r"
for char in u"ს":
dict[char] = u"s"
for char in u"ტ":
dict[char] = u"t"
for char in u"უ":
dict[char] = u"u"
for char in u"ფ":
dict[char] = u"ph"
for char in u"ქ":
dict[char] = u"q"
for char in u"ღ":
dict[char] = u"gh"
for char in u"ყ":
dict[char] = u"q'"
for char in u"შ":
dict[char] = u"sh"
for char in u"ჩ":
dict[char] = u"ch"
for char in u"ც":
dict[char] = u"ts"
for char in u"ძ":
dict[char] = u"dz"
for char in u"წ":
dict[char] = u"ts'"
for char in u"ჭ":
dict[char] = u"ch'"
for char in u"ხ":
dict[char] = u"kh"
for char in u"ჯ":
dict[char] = u"j"
for char in u"ჰ":
dict[char] = u"h"
for char in u"ჳ":
dict[char] = u"w"
for char in u"ჵ":
dict[char] = u"o"
for char in u"ჶ":
dict[char] = u"f"
# Devanagari
for char in u"पप":
dict[char] = u"p"
for char in u"अ":
dict[char] = u"a"
for char in u"आा":
dict[char] = u"aa"
for char in u"प":
dict[char] = u"pa"
for char in u"इि":
dict[char] = u"i"
for char in u"ईी":
dict[char] = u"ii"
for char in u"उु":
dict[char] = u"u"
for char in u"ऊू":
dict[char] = u"uu"
for char in u"एे":
dict[char] = u"e"
for char in u"ऐै":
dict[char] = u"ai"
for char in u"ओो":
dict[char] = u"o"
for char in u"औौ":
dict[char] = u"au"
for char in u"ऋृर":
dict[char] = u"r"
for char in u"ॠॄ":
dict[char] = u"rr"
for char in u"ऌॢल":
dict[char] = u"l"
for char in u"ॡॣ":
dict[char] = u"ll"
for char in u"क":
dict[char] = u"k"
for char in u"ख":
dict[char] = u"kh"
for char in u"ग":
dict[char] = u"g"
for char in u"घ":
dict[char] = u"gh"
for char in u"ङ":
dict[char] = u"ng"
for char in u"च":
dict[char] = u"c"
for char in u"छ":
dict[char] = u"ch"
for char in u"ज":
dict[char] = u"j"
for char in u"झ":
dict[char] = u"jh"
for char in u"ञ":
dict[char] = u"ñ"
for char in u"टत":
dict[char] = u"t"
for char in u"ठथ":
dict[char] = u"th"
for char in u"डद":
dict[char] = u"d"
for char in u"ढध":
dict[char] = u"dh"
for char in u"णन":
dict[char] = u"n"
for char in u"फ":
dict[char] = u"ph"
for char in u"ब":
dict[char] = u"b"
for char in u"भ":
dict[char] = u"bh"
for char in u"म":
dict[char] = u"m"
for char in u"य":
dict[char] = u"y"
for char in u"व":
dict[char] = u"v"
for char in u"श":
dict[char] = u"sh"
for char in u"षस":
dict[char] = u"s"
for char in u"ह":
dict[char] = u"h"
for char in u"क":
dict[char] = u"x"
for char in u"त":
dict[char] = u"tr"
for char in u"ज":
dict[char] = u"gj"
for char in u"क़":
dict[char] = u"q"
for char in u"फ":
dict[char] = u"f"
for char in u"ख":
dict[char] = u"hh"
for char in u"H":
dict[char] = u"gh"
for char in u"ज":
dict[char] = u"z"
for char in u"डढ":
dict[char] = u"r"
# Devanagari ligatures (possibly incomplete and/or incorrect)
for char in u"ख्":
dict[char] = u"khn"
for char in u"त":
dict[char] = u"tn"
for char in u"द्":
dict[char] = u"dn"
for char in u"श":
dict[char] = u"cn"
for char in u"ह्":
dict[char] = u"fn"
for char in u"अँ":
dict[char] = u"m"
for char in u"॒॑":
dict[char] = u""
for char in u"०":
dict[char] = u"0"
for char in u"१":
dict[char] = u"1"
for char in u"२":
dict[char] = u"2"
for char in u"३":
dict[char] = u"3"
for char in u"४":
dict[char] = u"4"
for char in u"५":
dict[char] = u"5"
for char in u"६":
dict[char] = u"6"
for char in u"७":
dict[char] = u"7"
for char in u"८":
dict[char] = u"8"
for char in u"९":
dict[char] = u"9"
# Armenian
for char in u"Ա":
dict[char] = u"A"
for char in u"ա":
dict[char] = u"a"
for char in u"Բ":
dict[char] = u"B"
for char in u"բ":
dict[char] = u"b"
for char in u"Գ":
dict[char] = u"G"
for char in u"գ":
dict[char] = u"g"
for char in u"Դ":
dict[char] = u"D"
for char in u"դ":
dict[char] = u"d"
for char in u"Ե":
dict[char] = u"Je"
for char in u"ե":
dict[char] = u"e"
for char in u"Զ":
dict[char] = u"Z"
for char in u"զ":
dict[char] = u"z"
for char in u"Է":
dict[char] = u"É"
for char in u"է":
dict[char] = u"é"
for char in u"Ը":
dict[char] = u"Ë"
for char in u"ը":
dict[char] = u"ë"
for char in u"Թ":
dict[char] = u"Th"
for char in u"թ":
dict[char] = u"th"
for char in u"Ժ":
dict[char] = u"Zh"
for char in u"ժ":
dict[char] = u"zh"
for char in u"Ի":
dict[char] = u"I"
for char in u"ի":
dict[char] = u"i"
for char in u"Լ":
dict[char] = u"L"
for char in u"լ":
dict[char] = u"l"
for char in u"Խ":
dict[char] = u"Ch"
for char in u"խ":
dict[char] = u"ch"
for char in u"Ծ":
dict[char] = u"Ts"
for char in u"ծ":
dict[char] = u"ts"
for char in u"Կ":
dict[char] = u"K"
for char in u"կ":
dict[char] = u"k"
for char in u"Հ":
dict[char] = u"H"
for char in u"հ":
dict[char] = u"h"
for char in u"Ձ":
dict[char] = u"Dz"
for char in u"ձ":
dict[char] = u"dz"
for char in u"Ղ":
dict[char] = u"R"
for char in u"ղ":
dict[char] = u"r"
for char in u"Ճ":
dict[char] = u"Cz"
for char in u"ճ":
dict[char] = u"cz"
for char in u"Մ":
dict[char] = u"M"
for char in u"մ":
dict[char] = u"m"
for char in u"Յ":
dict[char] = u"J"
for char in u"յ":
dict[char] = u"j"
for char in u"Ն":
dict[char] = u"N"
for char in u"ն":
dict[char] = u"n"
for char in u"Շ":
dict[char] = u"S"
for char in u"շ":
dict[char] = u"s"
for char in u"Շ":
dict[char] = u"Vo"
for char in u"շ":
dict[char] = u"o"
for char in u"Չ":
dict[char] = u"Tsh"
for char in u"չ":
dict[char] = u"tsh"
for char in u"Պ":
dict[char] = u"P"
for char in u"պ":
dict[char] = u"p"
for char in u"Ջ":
dict[char] = u"Dz"
for char in u"ջ":
dict[char] = u"dz"
for char in u"Ռ":
dict[char] = u"R"
for char in u"ռ":
dict[char] = u"r"
for char in u"Ս":
dict[char] = u"S"
for char in u"ս":
dict[char] = u"s"
for char in u"Վ":
dict[char] = u"V"
for char in u"վ":
dict[char] = u"v"
for char in u"Տ":
dict[char] = u"T'"
for char in u"տ":
dict[char] = u"t'"
for char in u"Ր":
dict[char] = u"R"
for char in u"ր":
dict[char] = u"r"
for char in u"Ց":
dict[char] = u"Tsh"
for char in u"ց":
dict[char] = u"tsh"
for char in u"Ւ":
dict[char] = u"V"
for char in u"ւ":
dict[char] = u"v"
for char in u"Փ":
dict[char] = u"Ph"
for char in u"փ":
dict[char] = u"ph"
for char in u"Ք":
dict[char] = u"Kh"
for char in u"ք":
dict[char] = u"kh"
for char in u"Օ":
dict[char] = u"O"
for char in u"օ":
dict[char] = u"o"
for char in u"Ֆ":
dict[char] = u"F"
for char in u"ֆ":
dict[char] = u"f"
for char in u"և":
dict[char] = u"&"
for char in u"՟":
dict[char] = u"."
for char in u"՞":
dict[char] = u"?"
for char in u"՝":
dict[char] = u";"
for char in u"՛":
dict[char] = u""
# Tamil
for char in u"க்":
dict[char] = u"k"
for char in u"ஙண்ந்ன்":
dict[char] = u"n"
for char in u"ச":
dict[char] = u"c"
for char in u"ஞ்":
dict[char] = u"ñ"
for char in u"ட்":
dict[char] = u"th"
for char in u"த":
dict[char] = u"t"
for char in u"ப":
dict[char] = u"p"
for char in u"ம்":
dict[char] = u"m"
for char in u"ய்":
dict[char] = u"y"
for char in u"ர்ழ்ற":
dict[char] = u"r"
for char in u"ல்ள":
dict[char] = u"l"
for char in u"வ்":
dict[char] = u"v"
for char in u"ஜ":
dict[char] = u"j"
for char in u"ஷ":
dict[char] = u"sh"
for char in u"ஸ":
dict[char] = u"s"
for char in u"ஹ":
dict[char] = u"h"
for char in u"க்ஷ":
dict[char] = u"x"
for char in u"அ":
dict[char] = u"a"
for char in u"ஆ":
dict[char] = u"aa"
for char in u"இ":
dict[char] = u"i"
for char in u"ஈ":
dict[char] = u"ii"
for char in u"உ":
dict[char] = u"u"
for char in u"ஊ":
dict[char] = u"uu"
for char in u"எ":
dict[char] = u"e"
for char in u"ஏ":
dict[char] = u"ee"
for char in u"ஐ":
dict[char] = u"ai"
for char in u"ஒ":
dict[char] = u"o"
for char in u"ஓ":
dict[char] = u"oo"
for char in u"ஔ":
dict[char] = u"au"
for char in u"ஃ":
dict[char] = ""
# Bengali
for char in u"অ":
dict[char] = u"ô"
for char in u"আা":
dict[char] = u"a"
for char in u"ইিঈী":
dict[char] = u"i"
for char in u"উুঊূ":
dict[char] = u"u"
for char in u"ঋৃ":
dict[char] = u"ri"
for char in u"এেয়":
dict[char] = u"e"
for char in u"ঐৈ":
dict[char] = u"oi"
for char in u"ওো":
dict[char] = u"o"
for char in u"ঔৌ":
dict[char] = "ou"
for char in u"্":
dict[char] = u""
for char in u"ৎ":
dict[char] = u"t"
for char in u"ং":
dict[char] = u"n"
for char in u"ঃ":
dict[char] = u"h"
for char in u"ঁ":
dict[char] = u"ñ"
for char in u"ক":
dict[char] = u"k"
for char in u"খ":
dict[char] = u"kh"
for char in u"গ":
dict[char] = u"g"
for char in u"ঘ":
dict[char] = u"gh"
for char in u"ঙ":
dict[char] = u"ng"
for char in u"চ":
dict[char] = u"ch"
for char in u"ছ":
dict[char] = u"chh"
for char in u"জ":
dict[char] = u"j"
for char in u"ঝ":
dict[char] = u"jh"
for char in u"ঞ":
dict[char] = u"n"
for char in u"টত":
dict[char] = u"t"
for char in u"ঠথ":
dict[char] = u"th"
for char in u"ডদ":
dict[char] = u"d"
for char in u"ঢধ":
dict[char] = u"dh"
for char in u"ণন":
dict[char] = u"n"
for char in u"প":
dict[char] = u"p"
for char in u"ফ":
dict[char] = u"ph"
for char in u"ব":
dict[char] = u"b"
for char in u"ভ":
dict[char] = u"bh"
for char in u"ম":
dict[char] = u"m"
for char in u"য":
dict[char] = u"dzh"
for char in u"র":
dict[char] = u"r"
for char in u"ল":
dict[char] = u"l"
for char in u"শ":
dict[char] = u"s"
for char in u"হ":
dict[char] = u"h"
for char in u"য়":
dict[char] = u"-"
for char in u"ড়":
dict[char] = u"r"
for char in u"ঢ":
dict[char] = u"rh"
for char in u"০":
dict[char] = u"0"
for char in u"১":
dict[char] = u"1"
for char in u"২":
dict[char] = u"2"
for char in u"৩":
dict[char] = u"3"
for char in u"৪":
dict[char] = u"4"
for char in u"৫":
dict[char] = u"5"
for char in u"৬":
dict[char] = u"6"
for char in u"৭":
dict[char] = u"7"
for char in u"৮":
dict[char] = u"8"
for char in u"৯":
dict[char] = u"9"
# Thai (because of complications of the alphabet, dictliterations
# are very imprecise here)
for char in u"ก":
dict[char] = u"k"
for char in u"ขฃคฅฆ":
dict[char] = u"kh"
for char in u"ง":
dict[char] = u"ng"
for char in u"จฉชฌ":
dict[char] = u"ch"
for char in u"ซศษส":
dict[char] = u"s"
for char in u"ญย":
dict[char] = u"y"
for char in u"ฎด":
dict[char] = u"d"
for char in u"ฏต":
dict[char] = u"t"
for char in u"ฐฑฒถทธ":
dict[char] = u"th"
for char in u"ณน":
dict[char] = u"n"
for char in u"บ":
dict[char] = u"b"
for char in u"ป":
dict[char] = u"p"
for char in u"ผพภ":
dict[char] = u"ph"
for char in u"ฝฟ":
dict[char] = u"f"
for char in u"ม":
dict[char] = u"m"
for char in u"ร":
dict[char] = u"r"
for char in u"ฤ":
dict[char] = u"rue"
for char in u"ๅ":
dict[char] = u":"
for char in u"ลฬ":
dict[char] = u"l"
for char in u"ฦ":
dict[char] = u"lue"
for char in u"ว":
dict[char] = u"w"
for char in u"หฮ":
dict[char] = u"h"
for char in u"อ":
dict[char] = u""
for char in u"ร":
dict[char] = u"ü"
for char in u"ว":
dict[char] = u"ua"
for char in u"อวโิ":
dict[char] = u"o"
for char in u"ะัา":
dict[char] = u"a"
for char in u"ว":
dict[char] = u"u"
for char in u"ำ":
dict[char] = u"am"
for char in u"ิ":
dict[char] = u"i"
for char in u"ี":
dict[char] = u"i:"
for char in u"ึ":
dict[char] = u"ue"
for char in u"ื":
dict[char] = u"ue:"
for char in u"ุ":
dict[char] = u"u"
for char in u"ู":
dict[char] = u"u:"
for char in u"เ็":
dict[char] = u"e"
for char in u"แ":
dict[char] = u"ae"
for char in u"ใไ":
dict[char] = u"ai"
for char in u"่้๊๋็์":
dict[char] = u""
for char in u"ฯ":
dict[char] = u"."
for char in u"ๆ":
dict[char] = u"(2)"
# Korean (Revised Romanization system within possible, incomplete)
for char in u"국":
dict[char] = u"guk"
for char in u"명":
dict[char] = u"myeong"
for char in u"검":
dict[char] = u"geom"
for char in u"타":
dict[char] = u"ta"
for char in u"분":
dict[char] = u"bun"
for char in u"사":
dict[char] = u"sa"
for char in u"류":
dict[char] = u"ryu"
for char in u"포":
dict[char] = u"po"
for char in u"르":
dict[char] = u"reu"
for char in u"투":
dict[char] = u"tu"
for char in u"갈":
dict[char] = u"gal"
for char in u"어":
dict[char] = u"eo"
for char in u"노":
dict[char] = u"no"
for char in u"웨":
dict[char] = u"we"
for char in u"이":
dict[char] = u"i"
for char in u"라":
dict[char] = u"ra"
for char in u"틴":
dict[char] = u"tin"
for char in u"루":
dict[char] = u"ru"
for char in u"마":
dict[char] = u"ma"
for char in u"니":
dict[char] = u"ni"
for char in u"아":
dict[char] = u"a"
for char in u"독":
dict[char] = u"dok"
for char in u"일":
dict[char] = u"il"
for char in u"모":
dict[char] = u"mo"
for char in u"크":
dict[char] = u"keu"
for char in u"샤":
dict[char] = u"sya"
for char in u"영":
dict[char] = u"yeong"
for char in u"불":
dict[char] = u"bul"
for char in u"가":
dict[char] = u"ga"
for char in u"리":
dict[char] = u"ri"
for char in u"그":
dict[char] = u"geu"
for char in u"지":
dict[char] = u"ji"
for char in u"야":
dict[char] = u"ya"
for char in u"바":
dict[char] = u"ba"
for char in u"슈":
dict[char] = u"syu"
for char in u"키":
dict[char] = u"ki"
for char in u"프":
dict[char] = u"peu"
for char in u"랑":
dict[char] = u"rang"
for char in u"스":
dict[char] = u"seu"
for char in u"로":
dict[char] = u"ro"
for char in u"메":
dict[char] = u"me"
for char in u"역":
dict[char] = u"yeok"
for char in u"도":
dict[char] = u"do"
# Kannada
dict[u"ಅ"] = u"a"
for char in u"ಆಾ":
dict[char] = u"aa"
for char in u"ಇಿ":
dict[char] = u"i"
for char in u"ಈೀ":
dict[char] = u"ii"
for char in u"ಉು":
dict[char] = u"u"
for char in u"ಊೂ":
dict[char] = u"uu"
for char in u"ಋೂ":
dict[char] = u"r'"
for char in u"ಎೆ":
dict[char] = u"e"
for char in u"ಏೇ":
dict[char] = u"ee"
for char in u"ಐೈ":
dict[char] = u"ai"
for char in u"ಒೊ":
dict[char] = u"o"
for char in u"ಓೋ":
dict[char] = u"oo"
for char in u"ಔೌ":
dict[char] = u"au"
dict[u"ಂ"] = u"m'"
dict[u"ಃ"] = u"h'"
dict[u"ಕ"] = u"k"
dict[u"ಖ"] = u"kh"
dict[u"ಗ"] = u"g"
dict[u"ಘ"] = u"gh"
dict[u"ಙ"] = u"ng"
dict[u"ಚ"] = u"c"
dict[u"ಛ"] = u"ch"
dict[u"ಜ"] = u"j"
dict[u"ಝ"] = u"ny"
dict[u"ಟ"] = u"tt"
dict[u"ಠ"] = u"tth"
dict[u"ಡ"] = u"dd"
dict[u"ಢ"] = u"ddh"
dict[u"ಣ"] = u"nn"
dict[u"ತ"] = u"t"
dict[u"ಥ"] = u"th"
dict[u"ದ"] = u"d"
dict[u"ಧ"] = u"dh"
dict[u"ನ"] = u"n"
dict[u"ಪ"] = u"p"
dict[u"ಫ"] = u"ph"
dict[u"ಬ"] = u"b"
dict[u"ಭ"] = u"bh"
dict[u"ಮ"] = u"m"
dict[u"ಯ"] = u"y"
dict[u"ರ"] = u"r"
dict[u"ಲ"] = u"l"
dict[u"ವ"] = u"v"
dict[u"ಶ"] = u"sh"
dict[u"ಷ"] = u"ss"
dict[u"ಸ"] = u"s"
dict[u"ಹ"] = u"h"
dict[u"ಳ"] = u"ll"
dict[u"೦"] = u"0"
dict[u"೧"] = u"1"
dict[u"೨"] = u"2"
dict[u"೩"] = u"3"
dict[u"೪"] = u"4"
dict[u"೫"] = u"5"
dict[u"೬"] = u"6"
dict[u"೭"] = u"7"
dict[u"೮"] = u"8"
dict[u"೯"] = u"9"
# Telugu
for char in u"అ":
dict[char] = u"a"
for char in u"ఆా":
dict[char] = u"aa"
for char in u"ఇి":
dict[char] = u"i"
for char in u"ఈీ":
dict[char] = u"ii"
for char in u"ఉు":
dict[char] = u"u"
for char in u"ఊూ":
dict[char] = u"uu"
for char in u"ఋృ":
dict[char] = u"r'"
for char in u"ౠౄ":
dict[char] = u'r"'
dict[u"ఌ"] = u"l'"
dict[u"ౡ"] = u'l"'
for char in u"ఎె":
dict[char] = u"e"
for char in u"ఏే":
dict[char] = u"ee"
for char in u"ఐై":
dict[char] = u"ai"
for char in u"ఒొ":
dict[char] = u"o"
for char in u"ఓో":
dict[char] = u"oo"
for char in u"ఔౌ":
dict[char] = u"au"
dict[u"ం"] = u"'"
dict[u"ః"] = u'"'
dict[u"క"] = u"k"
dict[u"ఖ"] = u"kh"
dict[u"గ"] = u"g"
dict[u"ఘ"] = u"gh"
dict[u"ఙ"] = u"ng"
dict[u"చ"] = u"ts"
dict[u"ఛ"] = u"tsh"
dict[u"జ"] = u"j"
dict[u"ఝ"] = u"jh"
dict[u"ఞ"] = u"ñ"
for char in u"టత":
dict[char] = u"t"
for char in u"ఠథ":
dict[char] = u"th"
for char in u"డద":
dict[char] = u"d"
for char in u"ఢధ":
dict[char] = u"dh"
for char in u"ణన":
dict[char] = u"n"
dict[u"ప"] = u"p"
dict[u"ఫ"] = u"ph"
dict[u"బ"] = u"b"
dict[u"భ"] = u"bh"
dict[u"మ"] = u"m"
dict[u"య"] = u"y"
for char in u"రఱ":
dict[char] = u"r"
for char in u"లళ":
dict[char] = u"l"
dict[u"వ"] = u"v"
dict[u"శ"] = u"sh"
for char in u"షస":
dict[char] = u"s"
dict[u"హ"] = u"h"
dict[u"్"] = ""
for char in u"ంఁ":
dict[char] = u"^"
dict[u"ః"] = u"-"
dict[u"౦"] = u"0"
dict[u"౧"] = u"1"
dict[u"౨"] = u"2"
dict[u"౩"] = u"3"
dict[u"౪"] = u"4"
dict[u"౫"] = u"5"
dict[u"౬"] = u"6"
dict[u"౭"] = u"7"
dict[u"౮"] = u"8"
dict[u"౯"] = u"9"
dict[u"౹"] = u"1/4"
dict[u"౺"] = u"1/2"
dict[u"౻"] = u"3/4"
dict[u"౼"] = u"1/16"
dict[u"౽"] = u"1/8"
dict[u"౾"] = u"3/16"
# Lao - note: pronounciation in initial position is used;
# different pronounciation in final position is ignored
dict[u"ກ"] = "k"
for char in u"ຂຄ":
dict[char] = "kh"
dict[u"ງ"] = "ng"
dict[u"ຈ"] = "ch"
for char in u"ສຊ":
dict[char] = "s"
dict[u"ຍ"] = "ny"
dict[u"ດ"] = "d"
dict[u"ຕ"] = "t"
for char in u"ຖທ":
dict[char] = "th"
dict[u"ນ"] = "n"
dict[u"ບ"] = "b"
dict[u"ປ"] = "p"
for char in u"ຜພ":
dict[char] = "ph"
for char in u"ຝຟ":
dict[char] = "f"
for char in u"ມໝ":
dict[char] = "m"
dict[u"ຢ"] = "y"
for char in u"ຣຼ":
dict[char] = "r"
for char in u"ລຼ":
dict[char] = "l"
dict[u"ວ"] = "v"
for char in u"ຮ":
dict[char] = "h"
dict[u"ອ"] = "'"
for char in u"ະັ":
dict[char] = "a"
dict[u"ິ"] = "i"
dict[u"ຶ"] = "ue"
dict[u"ຸ"] = "u"
dict[u"ເ"] = u"é"
dict[u"ແ"] = u"è"
for char in u"ໂົາໍ":
dict[char] = "o"
dict[u"ຽ"] = "ia"
dict[u"ເຶ"] = "uea"
dict[u"ຍ"] = "i"
for char in u"ໄໃ":
dict[char] = "ai"
dict[u"ຳ"] = "am"
dict[u"າ"] = "aa"
dict[u"ີ"] = "ii"
dict[u"ື"] = "yy"
dict[u"ູ"] = "uu"
dict[u"ເ"] = "e"
dict[u"ແ"] = "ei"
dict[u"໐"] = "0"
dict[u"໑"] = "1"
dict[u"໒"] = "2"
dict[u"໓"] = "3"
dict[u"໔"] = "4"
dict[u"໕"] = "5"
dict[u"໖"] = "6"
dict[u"໗"] = "7"
dict[u"໘"] = "8"
dict[u"໙"] = "9"
return dict
f = open('output.txt', 'w')
f.write(json.dumps(dictgen()))
f.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment