Created
November 2, 2013 15:42
-
-
Save ebraminio/7280220 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
# Genereated from https://raw.github.com/wikimedia/pywikibot-core/master/pywikibot/userinterfaces/transliteration.py | |
import json | |
def dictgen(): | |
dict = {} | |
for char in u"ÀÁÂẦẤẪẨẬÃĀĂẰẮẴẶẲȦǠẠḀȂĄǍẢ": | |
dict[char] = u"A" | |
for char in u"ȀǞ": | |
dict[char] = u"Ä" | |
dict[u"Ǻ"] = u"Å" | |
dict[u"Ä"] = u"Ae" | |
dict[u"Å"] = u"Aa" | |
for char in u"àáâầấẫẩậãāăằắẵặẳȧǡạḁȃąǎảẚ": | |
dict[char] = u"a" | |
for char in u"ȁǟ": | |
dict[char] = u"ä" | |
dict[u"ǻ"] = u"å" | |
dict[u"ä"] = u"ae" | |
dict[u"å"] = u"aa" | |
for char in u"ḂḄḆƁƂ": | |
dict[char] = u"B" | |
for char in u"ḃḅḇƀɓƃ": | |
dict[char] = u"b" | |
for char in u"ĆĈĊÇČƇ": | |
dict[char] = u"C" | |
for char in u"ćĉċçčƈȼ": | |
dict[char] = u"c" | |
dict[u"Ḉ"] = u"Ç" | |
dict[u"ḉ"] = u"ç" | |
dict[u"Ð"] = u"Dh" | |
dict[u"ð"] = u"dh" | |
for char in u"ĎḊḌḎḐḒĐƉƊƋ": | |
dict[char] = u"D" | |
for char in u"ďḋḍḏḑḓđɖɗƌ": | |
dict[char] = u"d" | |
for char in u"ÈȄÉÊḚËĒḔḖĔĖẸE̩ȆȨḜĘĚẼḘẺ": | |
dict[char] = u"E" | |
for char in u"ỀẾỄỆỂ": | |
dict[char] = u"Ê" | |
for char in u"èȅéêḛëēḕḗĕėẹe̩ȇȩḝęěẽḙẻ": | |
dict[char] = u"e" | |
for char in u"ềếễệể": | |
dict[char] = u"ê" | |
for char in u"ḞƑ": | |
dict[char] = u"F" | |
for char in u"ḟƒ": | |
dict[char] = u"f" | |
for char in u"ǴḠĞĠĢǦǤƓ": | |
dict[char] = u"G" | |
for char in u"ǵḡğġģǧǥɠ": | |
dict[char] = u"g" | |
dict[u"Ĝ"] = u"Gx" | |
dict[u"ĝ"] = u"gx" | |
for char in u"ḢḤḦȞḨḪH̱ĦǶ": | |
dict[char] = u"H" | |
for char in u"ḣḥḧȟḩḫ̱ẖħƕ": | |
dict[char] = u"h" | |
for char in u"IÌȈÍÎĨḬÏḮĪĬȊĮǏİỊỈƗ": | |
dict[char] = u"I" | |
for char in u"ıìȉíîĩḭïḯīĭȋįǐiịỉɨ": | |
dict[char] = u"i" | |
for char in u"ĴJ": | |
dict[char] = u"J" | |
for char in u"ɟĵ̌ǰ": | |
dict[char] = u"j" | |
for char in u"ḰǨĶḲḴƘ": | |
dict[char] = u"K" | |
for char in u"ḱǩķḳḵƙ": | |
dict[char] = u"k" | |
for char in u"ĹĻĽḶḸḺḼȽŁ": | |
dict[char] = u"L" | |
for char in u"ĺļľḷḹḻḽƚłɫ": | |
dict[char] = u"l" | |
for char in u"ḾṀṂ": | |
dict[char] = u"M" | |
for char in u"ḿṁṃɱ": | |
dict[char] = u"m" | |
for char in u"ǸŃÑŅŇṄṆṈṊŊƝɲȠ": | |
dict[char] = u"N" | |
for char in u"ǹńñņňṅṇṉṋŋɲƞ": | |
dict[char] = u"n" | |
for char in u"ÒÓÔÕṌṎȬÖŌṐṒŎǑȮȰỌǪǬƠỜỚỠỢỞỎƟØǾ": | |
dict[char] = u"O" | |
for char in u"òóôõṍṏȭöōṑṓŏǒȯȱọǫǭơờớỡợởỏɵøǿ": | |
dict[char] = u"o" | |
for char in u"ȌŐȪ": | |
dict[char] = u"Ö" | |
for char in u"ȍőȫ": | |
dict[char] = u"ö" | |
for char in u"ỒỐỖỘỔȎ": | |
dict[char] = u"Ô" | |
for char in u"ồốỗộổȏ": | |
dict[char] = u"ô" | |
for char in u"ṔṖƤ": | |
dict[char] = u"P" | |
for char in u"ṕṗƥ": | |
dict[char] = u"p" | |
dict[u"ᵽ"] = u"q" | |
for char in u"ȐŔŖŘȒṘṚṜṞ": | |
dict[char] = u"R" | |
for char in u"ȑŕŗřȓṙṛṝṟɽ": | |
dict[char] = u"r" | |
for char in u"ŚṤŞȘŠṦṠṢṨ": | |
dict[char] = u"S" | |
for char in u"śṥşșšṧṡṣṩȿ": | |
dict[char] = u"s" | |
dict[u"Ŝ"] = u"Sx" | |
dict[u"ŝ"] = u"sx" | |
for char in u"ŢȚŤṪṬṮṰŦƬƮ": | |
dict[char] = u"T" | |
for char in u"ţțťṫṭṯṱŧȾƭʈ": | |
dict[char] = u"t" | |
for char in u"ÙÚŨṸṴÜṲŪṺŬỤŮŲǓṶỦƯỮỰỬ": | |
dict[char] = u"U" | |
for char in u"ùúũṹṵüṳūṻŭụůųǔṷủưữựửʉ": | |
dict[char] = u"u" | |
for char in u"ȔŰǛǗǕǙ": | |
dict[char] = u"Ü" | |
for char in u"ȕűǜǘǖǚ": | |
dict[char] = u"ü" | |
dict[u"Û"] = u"Ux" | |
dict[u"û"] = u"ux" | |
dict[u"Ȗ"] = u"Û" | |
dict[u"ȗ"] = u"û" | |
dict[u"Ừ"] = u"Ù" | |
dict[u"ừ"] = u"ù" | |
dict[u"Ứ"] = u"Ú" | |
dict[u"ứ"] = u"ú" | |
for char in u"ṼṾ": | |
dict[char] = u"V" | |
for char in u"ṽṿ": | |
dict[char] = u"v" | |
for char in u"ẀẂŴẄẆẈ": | |
dict[char] = u"W" | |
for char in u"ẁẃŵẅẇẉ": | |
dict[char] = u"w" | |
for char in u"ẊẌ": | |
dict[char] = u"X" | |
for char in u"ẋẍ": | |
dict[char] = u"x" | |
for char in u"ỲÝŶŸỸȲẎỴỶƳ": | |
dict[char] = u"Y" | |
for char in u"ỳýŷÿỹȳẏỵỷƴ": | |
dict[char] = u"y" | |
for char in u"ŹẐŻẒŽẔƵȤ": | |
dict[char] = u"Z" | |
for char in u"źẑżẓžẕƶȥ": | |
dict[char] = u"z" | |
dict[u"ɀ"] = u"zv" | |
# Latin: extended Latin alphabet | |
dict[u"ɑ"] = u"a" | |
for char in u"ÆǼǢ": | |
dict[char] = u"AE" | |
for char in u"æǽǣ": | |
dict[char] = u"ae" | |
dict[u"Ð"] = u"Dh" | |
dict[u"ð"] = u"dh" | |
for char in u"ƎƏƐ": | |
dict[char] = u"E" | |
for char in u"ǝəɛ": | |
dict[char] = u"e" | |
for char in u"ƔƢ": | |
dict[char] = u"G" | |
for char in u"ᵷɣƣᵹ": | |
dict[char] = u"g" | |
dict[u"Ƅ"] = u"H" | |
dict[u"ƅ"] = u"h" | |
dict[u"Ƕ"] = u"Wh" | |
dict[u"ƕ"] = u"wh" | |
dict[u"Ɩ"] = u"I" | |
dict[u"ɩ"] = u"i" | |
dict[u"Ŋ"] = u"Ng" | |
dict[u"ŋ"] = u"ng" | |
dict[u"Œ"] = u"OE" | |
dict[u"œ"] = u"oe" | |
dict[u"Ɔ"] = u"O" | |
dict[u"ɔ"] = u"o" | |
dict[u"Ȣ"] = u"Ou" | |
dict[u"ȣ"] = u"ou" | |
dict[u"Ƽ"] = u"Q" | |
for char in u"ĸƽ": | |
dict[char] = u"q" | |
dict[u"ȹ"] = u"qp" | |
dict[u""] = u"r" | |
dict[u"ſ"] = u"s" | |
dict[u"ß"] = u"ss" | |
dict[u"Ʃ"] = u"Sh" | |
for char in u"ʃᶋ": | |
dict[char] = u"sh" | |
dict[u"Ʉ"] = u"U" | |
dict[u"ʉ"] = u"u" | |
dict[u"Ʌ"] = u"V" | |
dict[u"ʌ"] = u"v" | |
for char in u"ƜǷ": | |
dict[char] = u"W" | |
for char in u"ɯƿ": | |
dict[char] = u"w" | |
dict[u"Ȝ"] = u"Y" | |
dict[u"ȝ"] = u"y" | |
dict[u"IJ"] = u"IJ" | |
dict[u"ij"] = u"ij" | |
dict[u"Ƨ"] = u"Z" | |
for char in u"ʮƨ": | |
dict[char] = u"z" | |
dict[u"Ʒ"] = u"Zh" | |
dict[u"ʒ"] = u"zh" | |
dict[u"Ǯ"] = u"Dzh" | |
dict[u"ǯ"] = u"dzh" | |
for char in u"ƸƹʔˀɁɂ": | |
dict[char] = u"'" | |
for char in u"Þ": | |
dict[char] = u"Th" | |
for char in u"þ": | |
dict[char] = u"th" | |
for char in u"Cʗǃ": | |
dict[char] = u"!" | |
#Punctuation and typography | |
for char in u"«»“”„¨": | |
dict[char] = u'"' | |
for char in u"‘’′": | |
dict[char] = u"'" | |
dict[u"•"] = u"*" | |
dict[u"@"] = u"(at)" | |
dict[u"¤"] = u"$" | |
dict[u"¢"] = u"c" | |
dict[u"€"] = u"E" | |
dict[u"£"] = u"L" | |
dict[u"¥"] = u"yen" | |
dict[u"†"] = u"+" | |
dict[u"‡"] = u"++" | |
dict[u"°"] = u":" | |
dict[u"¡"] = u"!" | |
dict[u"¿"] = u"?" | |
dict[u"‰"] = u"o/oo" | |
dict[u"‱"] = u"o/ooo" | |
for char in u"¶§": | |
dict[char] = u">" | |
for char in u"…": | |
dict[char] = u"..." | |
for char in u"‒–—―": | |
dict[char] = u"-" | |
for char in u"·": | |
dict[char] = u" " | |
dict[u"¦"] = u"|" | |
dict[u"⁂"] = u"***" | |
dict[u"◊"] = u"<>" | |
dict[u"‽"] = u"?!" | |
dict[u"؟"] = u";-)" | |
dict[u"¹"] = u"1" | |
dict[u"²"] = u"2" | |
dict[u"³"] = u"3" | |
# Cyrillic | |
dict.update({u"А": u"A", u"а": u"a", u"Б": u"B", u"б": u"b", | |
u"В": u"V", u"в": u"v", u"Г": u"G", u"г": u"g", | |
u"Д": u"D", u"д": u"d", u"Е": u"E", u"е": u"e", | |
u"Ж": u"Zh", u"ж": u"zh", u"З": u"Z", u"з": u"z", | |
u"И": u"I", u"и": u"i", u"Й": u"J", u"й": u"j", | |
u"К": u"K", u"к": u"k", u"Л": u"L", u"л": u"l", | |
u"М": u"M", u"м": u"m", u"Н": u"N", u"н": u"n", | |
u"О": u"O", u"о": u"o", u"П": u"P", u"п": u"p", | |
u"Р": u"R", u"р": u"r", u"С": u"S", u"с": u"s", | |
u"Т": u"T", u"т": u"t", u"У": u"U", u"у": u"u", | |
u"Ф": u"F", u"ф": u"f", u"х": u"kh", u"Ц": u"C", | |
u"ц": u"c", u"Ч": u"Ch", u"ч": u"ch", u"Ш": u"Sh", | |
u"ш": u"sh", u"Щ": u"Shch", u"щ": u"shch", u"Ь": u"'", | |
u"ь": "'", u"Ъ": u'"', u"ъ": '"', u"Ю": u"Yu", | |
u"ю": u"yu", u"Я": u"Ya", u"я": u"ya", u"Х": u"Kh", | |
u"Χ": u"Kh"}) | |
# Additional Cyrillic letters, most occuring in only one or a few languages | |
dict.update({u"Ы": u"Y", u"ы": u"y", u"Ё": u"Ë", u"ё": u"ë", | |
u"Э": u"È", u"Ѐ": u"È", u"э": u"è", u"ѐ": u"è", | |
u"І": u"I", u"і": u"i", u"Ї": u"Ji", u"ї": u"ji", | |
u"Є": u"Je", u"є": u"je", u"Ґ": u"G", u"Ҝ": u"G", | |
u"ґ": u"g", u"ҝ": u"g", u"Ђ": u"Dj", u"ђ": u"dj", | |
u"Ӣ": u"Y", u"ӣ": u"y", u"Љ": u"Lj", u"љ": u"lj", | |
u"Њ": u"Nj", u"њ": u"nj", u"Ћ": u"Cj", u"ћ": u"cj", | |
u"Җ": u"Zhj", u"җ": u"zhj", u"Ѓ": u"Gj", u"ѓ": u"gj", | |
u"Ќ": u"Kj", u"ќ": u"kj", u"Ӣ": u"Ii", u"ӣ": u"ii", | |
u"Ӯ": u"U", u"ӯ": u"u", u"Ҳ": u"H", u"ҳ": u"h", | |
u"Ҷ": u"Dz", u"ҷ": u"dz", u"Ө": u"Ô", u"Ӫ": u"Ô", | |
u"ө": u"ô", u"ӫ": u"ô", u"Ү": u"Y", u"ү": u"y", u"Һ": u"H", | |
u"һ": u"h", u"Ә": u"AE", u"Ӕ": u"AE", u"ә": u"ae", | |
u"Ӛ": u"Ë", u"Ӭ": u"Ë", u"ӛ": u"ë", u"ӭ": u"ë", u"Җ": u"Zhj", | |
u"җ": u"zhj", u"Ұ": u"U", u"ұ": u"u", u"ў": u"ù", u"Ў": u"Ù", | |
u"ѝ": u"ì", u"Ѝ": u"Ì", u"Ӑ": u"A", u"ă": u"a", u"Ӓ": u"Ä", | |
u"ҿ": u"ä", u"Ҽ": u"Ts", u"Ҿ": u"Ts", u"ҽ": u"ts", u"ҿ": u"ts", | |
u"Ҙ": u"Dh", u"ҙ": u"dh", u"Ӏ": u"", u"ӏ": u"", u"Ӆ": u"L", | |
u"ӆ": u"l", u"Ӎ": u"M", u"ӎ": u"m", u"Ӧ": u"Ö", u"ӧ": u"ö", | |
u"Ҩ": u"u", u"ҩ": u"u", u"Ҧ": u"Ph", u"ҧ": u"ph", u"Ҏ": u"R", | |
u"ҏ": u"r", u"Ҫ": u"Th", u"ҫ": u"th", u"Ҭ": u"T", u"ҭ": u"t", | |
u"Ӯ": u"Û", u"ӯ": u"û", u"Ұ": u"U", u"Ӹ": u"U", u"ұ": u"u", | |
u"ӹ": u"u", u"Ҵ": u"Tts", u"ҵ": u"tts", u"Ӵ": u"Ch", u"ӵ": u"ch"}) | |
for char in u"ЈӤҊ": | |
dict[char] = u"J" | |
for char in u"јӥҋ": | |
dict[char] = u"j" | |
for char in u"ЏӁӜҶ": | |
dict[char] = u"Dzh" | |
for char in u"џӂӝҷ": | |
dict[char] = u"dzh" | |
for char in u"ЅӞӠӋҸ": | |
dict[char] = u"Dz" | |
for char in u"ѕӟӡӌҹ": | |
dict[char] = u"dz" | |
for char in u"ҒӶҔ": | |
dict[char] = u"G" | |
for char in u"ғӷҕ": | |
dict[char] = u"g" | |
for char in u"ҚҞҠӃ": | |
dict[char] = u"Q" | |
for char in u"қҟҡӄ": | |
dict[char] = u"q" | |
for char in u"ҢҤӉӇ": | |
dict[char] = u"Ng" | |
for char in u"ңҥӊӈ": | |
dict[char] = u"ng" | |
for char in u"ӖѢҌ": | |
dict[char] = u"E" | |
for char in u"ӗѣҍ": | |
dict[char] = u"e" | |
for char in u"ӲӰҮ": | |
dict[char] = u"Ü" | |
for char in u"ӳӱү": | |
dict[char] = u"ü" | |
# Archaic Cyrillic letters | |
dict.update({u"Ѹ": u"Ou", u"ѹ": u"ou", u"Ѡ": u"O", u"Ѻ": u"O", u"ѡ": u"o", | |
u"ѻ": u"o", u"Ѿ": u"Ot", u"ѿ": u"ot", u"Ѣ": u"E", u"ѣ": u"e", | |
u"Ѥ": u"Ei", u"Ѧ": u"Ei", u"ѥ": u"ei", u"ѧ": u"ei", u"Ѫ": u"Ai", | |
u"ѫ": u"ai", u"Ѯ": u"X", u"ѯ": u"x", u"Ѱ": u"Ps", u"ѱ": u"ps", | |
u"Ѳ": u"Th", u"ѳ": u"th", u"Ѵ": u"Ü", u"Ѷ": u"Ü", u"ѵ": u"ü"}) | |
# Hebrew alphabet | |
for char in u"אע": | |
dict[char] = u"'" | |
dict[u"ב"] = u"b" | |
dict[u"ג"] = u"g" | |
dict[u"ד"] = u"d" | |
dict[u"ה"] = u"h" | |
dict[u"ו"] = u"v" | |
dict[u"ז"] = u"z" | |
dict[u"ח"] = u"kh" | |
dict[u"ט"] = u"t" | |
dict[u"י"] = u"y" | |
for char in u"ךכ": | |
dict[char] = u"k" | |
dict[u"ל"] = u"l" | |
for char in u"םמ": | |
dict[char] = u"m" | |
for char in u"ןנ": | |
dict[char] = u"n" | |
dict[u"ס"] = u"s" | |
for char in u"ףפ": | |
dict[char] = u"ph" | |
for char in u"ץצ": | |
dict[char] = u"ts" | |
dict[u"ק"] = u"q" | |
dict[u"ר"] = u"r" | |
dict[u"ש"] = u"sh" | |
dict[u"ת"] = u"th" | |
# Arab alphabet | |
for char in u"اﺍﺎ": | |
dict[char] = u"a" | |
for char in u"بﺏﺐﺒﺑ": | |
dict[char] = u"b" | |
for char in u"تﺕﺖﺘﺗ": | |
dict[char] = u"t" | |
for char in u"ثﺙﺚﺜﺛ": | |
dict[char] = u"th" | |
for char in u"جﺝﺞﺠﺟ": | |
dict[char] = u"g" | |
for char in u"حﺡﺢﺤﺣ": | |
dict[char] = u"h" | |
for char in u"خﺥﺦﺨﺧ": | |
dict[char] = u"kh" | |
for char in u"دﺩﺪ": | |
dict[char] = u"d" | |
for char in u"ذﺫﺬ": | |
dict[char] = u"dh" | |
for char in u"رﺭﺮ": | |
dict[char] = u"r" | |
for char in u"زﺯﺰ": | |
dict[char] = u"z" | |
for char in u"سﺱﺲﺴﺳ": | |
dict[char] = u"s" | |
for char in u"شﺵﺶﺸﺷ": | |
dict[char] = u"sh" | |
for char in u"صﺹﺺﺼﺻ": | |
dict[char] = u"s" | |
for char in u"ضﺽﺾﻀﺿ": | |
dict[char] = u"d" | |
for char in u"طﻁﻂﻄﻃ": | |
dict[char] = u"t" | |
for char in u"ظﻅﻆﻈﻇ": | |
dict[char] = u"z" | |
for char in u"عﻉﻊﻌﻋ": | |
dict[char] = u"'" | |
for char in u"غﻍﻎﻐﻏ": | |
dict[char] = u"gh" | |
for char in u"فﻑﻒﻔﻓ": | |
dict[char] = u"f" | |
for char in u"قﻕﻖﻘﻗ": | |
dict[char] = u"q" | |
for char in u"كﻙﻚﻜﻛک": | |
dict[char] = u"k" | |
for char in u"لﻝﻞﻠﻟ": | |
dict[char] = u"l" | |
for char in u"مﻡﻢﻤﻣ": | |
dict[char] = u"m" | |
for char in u"نﻥﻦﻨﻧ": | |
dict[char] = u"n" | |
for char in u"هﻩﻪﻬﻫ": | |
dict[char] = u"h" | |
for char in u"وﻭﻮ": | |
dict[char] = u"w" | |
for char in u"یيﻱﻲﻴﻳ": | |
dict[char] = u"y" | |
# Arabic - additional letters, modified letters and ligatures | |
dict[u"ﺀ"] = u"'" | |
for char in u"آﺁﺂ": | |
dict[char] = u"'a" | |
for char in u"ةﺓﺔ": | |
dict[char] = u"th" | |
for char in u"ىﻯﻰ": | |
dict[char] = u"á" | |
for char in u"یﯼﯽﯿﯾ": | |
dict[char] = u"y" | |
dict[u"؟"] = u"?" | |
# Arabic - ligatures | |
for char in u"ﻻﻼ": | |
dict[char] = u"la" | |
dict[u"ﷲ"] = u"llah" | |
for char in u"إأ": | |
dict[char] = u"a'" | |
dict[u"ؤ"] = u"w'" | |
dict[u"ئ"] = u"y'" | |
for char in u"◌◌": | |
dict[char] = u"" # indicates absence of vowels | |
# Arabic vowels | |
dict[u"◌"] = u"a" | |
dict[u"◌"] = u"u" | |
dict[u"◌"] = u"i" | |
dict[u"◌"] = u"a" | |
dict[u"◌"] = u"ay" | |
dict[u"◌"] = u"ay" | |
dict[u"◌"] = u"u" | |
dict[u"◌"] = u"iy" | |
# Arab numerals | |
for char in u"٠۰": | |
dict[char] = u"0" | |
for char in u"١۱": | |
dict[char] = u"1" | |
for char in u"٢۲": | |
dict[char] = u"2" | |
for char in u"٣۳": | |
dict[char] = u"3" | |
for char in u"٤۴": | |
dict[char] = u"4" | |
for char in u"٥۵": | |
dict[char] = u"5" | |
for char in u"٦۶": | |
dict[char] = u"6" | |
for char in u"٧۷": | |
dict[char] = u"7" | |
for char in u"٨۸": | |
dict[char] = u"8" | |
for char in u"٩۹": | |
dict[char] = u"9" | |
# Perso-Arabic | |
for char in u"پﭙﭙپ": | |
dict[char] = u"p" | |
for char in u"چچچچ": | |
dict[char] = u"ch" | |
for char in u"ژژ": | |
dict[char] = u"zh" | |
for char in u"گﮔﮕﮓ": | |
dict[char] = u"g" | |
# Greek | |
dict.update({u"Α": u"A", u"α": u"a", u"Β": u"B", u"β": u"b", u"Γ": u"G", | |
u"γ": u"g", u"Δ": u"D", u"δ": u"d", u"Ε": u"E", u"ε": u"e", | |
u"Ζ": u"Z", u"ζ": u"z", u"Η": u"I", u"η": u"i", u"θ": u"th", | |
u"Θ": u"Th", u"Ι": u"I", u"ι": u"i", u"Κ": u"K", u"κ": u"k", | |
u"Λ": u"L", u"λ": u"l", u"Μ": u"M", u"μ": u"m", u"Ν": u"N", | |
u"ν": u"n", u"Ξ": u"X", u"ξ": u"x", u"Ο": u"O", u"ο": u"o", | |
u"Π": u"P", u"π": u"p", u"Ρ": u"R", u"ρ": u"r", u"Σ": u"S", | |
u"σ": u"s", u"ς": u"s", u"Τ": u"T", u"τ": u"t", u"Υ": u"Y", | |
u"υ": u"y", u"Φ": u"F", u"φ": u"f", u"Ψ": u"Ps", u"ψ": u"ps", | |
u"Ω": u"O", u"ω": u"o", u"ϗ": u"&", u"Ϛ": u"St", u"ϛ": u"st", | |
u"Ϙ": u"Q", u"Ϟ": u"Q", u"ϙ": u"q", u"ϟ": u"q", u"Ϻ": u"S", | |
u"ϻ": u"s", u"Ϡ": u"Ss", u"ϡ": u"ss", u"Ϸ": u"Sh", u"ϸ": u"sh", | |
u"·": u":", u"Ά": u"Á", u"ά": u"á", u"Έ": u"É", u"Ή": u"É", | |
u"έ": u"é", u"ή": u"é", u"Ί": u"Í", u"ί": u"í", u"Ϊ": u"Ï", | |
u"ϊ": u"ï", u"ΐ": u"ï", u"Ό": u"Ó", u"ό": u"ó", u"Ύ": u"Ý", | |
u"ύ": u"ý", u"Ϋ": u"Y", u"ϋ": u"ÿ", u"ΰ": u"ÿ", u"Ώ": u"Ó", | |
u"ώ": u"ó"}) | |
# Japanese (katakana and hiragana) | |
for char in u"アァあ": | |
dict[char] = u"a" | |
for char in u"イィい": | |
dict[char] = u"i" | |
for char in u"ウう": | |
dict[char] = u"u" | |
for char in u"エェえ": | |
dict[char] = u"e" | |
for char in u"オォお": | |
dict[char] = u"o" | |
for char in u"ャや": | |
dict[char] = u"ya" | |
for char in u"ュゆ": | |
dict[char] = u"yu" | |
for char in u"ョよ": | |
dict[char] = u"yo" | |
for char in u"カか": | |
dict[char] = u"ka" | |
for char in u"キき": | |
dict[char] = u"ki" | |
for char in u"クく": | |
dict[char] = u"ku" | |
for char in u"ケけ": | |
dict[char] = u"ke" | |
for char in u"コこ": | |
dict[char] = u"ko" | |
for char in u"サさ": | |
dict[char] = u"sa" | |
for char in u"シし": | |
dict[char] = u"shi" | |
for char in u"スす": | |
dict[char] = u"su" | |
for char in u"セせ": | |
dict[char] = u"se" | |
for char in u"ソそ": | |
dict[char] = u"so" | |
for char in u"タた": | |
dict[char] = u"ta" | |
for char in u"チち": | |
dict[char] = u"chi" | |
for char in u"ツつ": | |
dict[char] = u"tsu" | |
for char in u"テて": | |
dict[char] = u"te" | |
for char in u"トと": | |
dict[char] = u"to" | |
for char in u"ナな": | |
dict[char] = u"na" | |
for char in u"ニに": | |
dict[char] = u"ni" | |
for char in u"ヌぬ": | |
dict[char] = u"nu" | |
for char in u"ネね": | |
dict[char] = u"ne" | |
for char in u"ノの": | |
dict[char] = u"no" | |
for char in u"ハは": | |
dict[char] = u"ha" | |
for char in u"ヒひ": | |
dict[char] = u"hi" | |
for char in u"フふ": | |
dict[char] = u"fu" | |
for char in u"ヘへ": | |
dict[char] = u"he" | |
for char in u"ホほ": | |
dict[char] = u"ho" | |
for char in u"マま": | |
dict[char] = u"ma" | |
for char in u"ミみ": | |
dict[char] = u"mi" | |
for char in u"ムむ": | |
dict[char] = u"mu" | |
for char in u"メめ": | |
dict[char] = u"me" | |
for char in u"モも": | |
dict[char] = u"mo" | |
for char in u"ラら": | |
dict[char] = u"ra" | |
for char in u"リり": | |
dict[char] = u"ri" | |
for char in u"ルる": | |
dict[char] = u"ru" | |
for char in u"レれ": | |
dict[char] = u"re" | |
for char in u"ロろ": | |
dict[char] = u"ro" | |
for char in u"ワわ": | |
dict[char] = u"wa" | |
for char in u"ヰゐ": | |
dict[char] = u"wi" | |
for char in u"ヱゑ": | |
dict[char] = u"we" | |
for char in u"ヲを": | |
dict[char] = u"wo" | |
for char in u"ンん": | |
dict[char] = u"n" | |
for char in u"ガが": | |
dict[char] = u"ga" | |
for char in u"ギぎ": | |
dict[char] = u"gi" | |
for char in u"グぐ": | |
dict[char] = u"gu" | |
for char in u"ゲげ": | |
dict[char] = u"ge" | |
for char in u"ゴご": | |
dict[char] = u"go" | |
for char in u"ザざ": | |
dict[char] = u"za" | |
for char in u"ジじ": | |
dict[char] = u"ji" | |
for char in u"ズず": | |
dict[char] = u"zu" | |
for char in u"ゼぜ": | |
dict[char] = u"ze" | |
for char in u"ゾぞ": | |
dict[char] = u"zo" | |
for char in u"ダだ": | |
dict[char] = u"da" | |
for char in u"ヂぢ": | |
dict[char] = u"dji" | |
for char in u"ヅづ": | |
dict[char] = u"dzu" | |
for char in u"デで": | |
dict[char] = u"de" | |
for char in u"ドど": | |
dict[char] = u"do" | |
for char in u"バば": | |
dict[char] = u"ba" | |
for char in u"ビび": | |
dict[char] = u"bi" | |
for char in u"ブぶ": | |
dict[char] = u"bu" | |
for char in u"ベべ": | |
dict[char] = u"be" | |
for char in u"ボぼ": | |
dict[char] = u"bo" | |
for char in u"パぱ": | |
dict[char] = u"pa" | |
for char in u"ピぴ": | |
dict[char] = u"pi" | |
for char in u"プぷ": | |
dict[char] = u"pu" | |
for char in u"ペぺ": | |
dict[char] = u"pe" | |
for char in u"ポぽ": | |
dict[char] = u"po" | |
for char in u"ヴゔ": | |
dict[char] = u"vu" | |
dict[u"ヷ"] = u"va" | |
dict[u"ヸ"] = u"vi" | |
dict[u"ヹ"] = u"ve" | |
dict[u"ヺ"] = u"vo" | |
# Japanese and Chinese punctuation and typography | |
for char in u"・·": | |
dict[char] = u" " | |
for char in u"〃『』《》": | |
dict[char] = u'"' | |
for char in u"「」〈〉〘〙〚〛": | |
dict[char] = u"'" | |
for char in u"(〔": | |
dict[char] = u"(" | |
for char in u")〕": | |
dict[char] = u")" | |
for char in u"[【〖": | |
dict[char] = u"[" | |
for char in u"]】〗": | |
dict[char] = u"]" | |
for char in u"{": | |
dict[char] = u"{" | |
for char in u"}": | |
dict[char] = u"}" | |
for char in u"っ": | |
dict[char] = u":" | |
for char in u"ー": | |
dict[char] = u"h" | |
for char in u"゛": | |
dict[char] = u"'" | |
for char in u"゜": | |
dict[char] = u"p" | |
for char in u"。": | |
dict[char] = u". " | |
for char in u"、": | |
dict[char] = u", " | |
for char in u"・": | |
dict[char] = u" " | |
for char in u"〆": | |
dict[char] = u"shime" | |
for char in u"〜": | |
dict[char] = u"-" | |
for char in u"…": | |
dict[char] = u"..." | |
for char in u"‥": | |
dict[char] = u".." | |
for char in u"ヶ": | |
dict[char] = u"months" | |
for char in u"•◦": | |
dict[char] = u"_" | |
for char in u"※*": | |
dict[char] = u"*" | |
for char in u"Ⓧ": | |
dict[char] = u"(X)" | |
for char in u"Ⓨ": | |
dict[char] = u"(Y)" | |
for char in u"!": | |
dict[char] = u"!" | |
for char in u"?": | |
dict[char] = u"?" | |
for char in u";": | |
dict[char] = u";" | |
for char in u":": | |
dict[char] = u":" | |
for char in u"。": | |
dict[char] = u"." | |
for char in u",、": | |
dict[char] = u"," | |
# Georgian | |
for char in u"ა": | |
dict[char] = u"a" | |
for char in u"ბ": | |
dict[char] = u"b" | |
for char in u"გ": | |
dict[char] = u"g" | |
for char in u"დ": | |
dict[char] = u"d" | |
for char in u"ეჱ": | |
dict[char] = u"e" | |
for char in u"ვ": | |
dict[char] = u"v" | |
for char in u"ზ": | |
dict[char] = u"z" | |
for char in u"თ": | |
dict[char] = u"th" | |
for char in u"ი": | |
dict[char] = u"i" | |
for char in u"კ": | |
dict[char] = u"k" | |
for char in u"ლ": | |
dict[char] = u"l" | |
for char in u"მ": | |
dict[char] = u"m" | |
for char in u"ნ": | |
dict[char] = u"n" | |
for char in u"ო": | |
dict[char] = u"o" | |
for char in u"პ": | |
dict[char] = u"p" | |
for char in u"ჟ": | |
dict[char] = u"zh" | |
for char in u"რ": | |
dict[char] = u"r" | |
for char in u"ს": | |
dict[char] = u"s" | |
for char in u"ტ": | |
dict[char] = u"t" | |
for char in u"უ": | |
dict[char] = u"u" | |
for char in u"ფ": | |
dict[char] = u"ph" | |
for char in u"ქ": | |
dict[char] = u"q" | |
for char in u"ღ": | |
dict[char] = u"gh" | |
for char in u"ყ": | |
dict[char] = u"q'" | |
for char in u"შ": | |
dict[char] = u"sh" | |
for char in u"ჩ": | |
dict[char] = u"ch" | |
for char in u"ც": | |
dict[char] = u"ts" | |
for char in u"ძ": | |
dict[char] = u"dz" | |
for char in u"წ": | |
dict[char] = u"ts'" | |
for char in u"ჭ": | |
dict[char] = u"ch'" | |
for char in u"ხ": | |
dict[char] = u"kh" | |
for char in u"ჯ": | |
dict[char] = u"j" | |
for char in u"ჰ": | |
dict[char] = u"h" | |
for char in u"ჳ": | |
dict[char] = u"w" | |
for char in u"ჵ": | |
dict[char] = u"o" | |
for char in u"ჶ": | |
dict[char] = u"f" | |
# Devanagari | |
for char in u"पप": | |
dict[char] = u"p" | |
for char in u"अ": | |
dict[char] = u"a" | |
for char in u"आा": | |
dict[char] = u"aa" | |
for char in u"प": | |
dict[char] = u"pa" | |
for char in u"इि": | |
dict[char] = u"i" | |
for char in u"ईी": | |
dict[char] = u"ii" | |
for char in u"उु": | |
dict[char] = u"u" | |
for char in u"ऊू": | |
dict[char] = u"uu" | |
for char in u"एे": | |
dict[char] = u"e" | |
for char in u"ऐै": | |
dict[char] = u"ai" | |
for char in u"ओो": | |
dict[char] = u"o" | |
for char in u"औौ": | |
dict[char] = u"au" | |
for char in u"ऋृर": | |
dict[char] = u"r" | |
for char in u"ॠॄ": | |
dict[char] = u"rr" | |
for char in u"ऌॢल": | |
dict[char] = u"l" | |
for char in u"ॡॣ": | |
dict[char] = u"ll" | |
for char in u"क": | |
dict[char] = u"k" | |
for char in u"ख": | |
dict[char] = u"kh" | |
for char in u"ग": | |
dict[char] = u"g" | |
for char in u"घ": | |
dict[char] = u"gh" | |
for char in u"ङ": | |
dict[char] = u"ng" | |
for char in u"च": | |
dict[char] = u"c" | |
for char in u"छ": | |
dict[char] = u"ch" | |
for char in u"ज": | |
dict[char] = u"j" | |
for char in u"झ": | |
dict[char] = u"jh" | |
for char in u"ञ": | |
dict[char] = u"ñ" | |
for char in u"टत": | |
dict[char] = u"t" | |
for char in u"ठथ": | |
dict[char] = u"th" | |
for char in u"डद": | |
dict[char] = u"d" | |
for char in u"ढध": | |
dict[char] = u"dh" | |
for char in u"णन": | |
dict[char] = u"n" | |
for char in u"फ": | |
dict[char] = u"ph" | |
for char in u"ब": | |
dict[char] = u"b" | |
for char in u"भ": | |
dict[char] = u"bh" | |
for char in u"म": | |
dict[char] = u"m" | |
for char in u"य": | |
dict[char] = u"y" | |
for char in u"व": | |
dict[char] = u"v" | |
for char in u"श": | |
dict[char] = u"sh" | |
for char in u"षस": | |
dict[char] = u"s" | |
for char in u"ह": | |
dict[char] = u"h" | |
for char in u"क": | |
dict[char] = u"x" | |
for char in u"त": | |
dict[char] = u"tr" | |
for char in u"ज": | |
dict[char] = u"gj" | |
for char in u"क़": | |
dict[char] = u"q" | |
for char in u"फ": | |
dict[char] = u"f" | |
for char in u"ख": | |
dict[char] = u"hh" | |
for char in u"H": | |
dict[char] = u"gh" | |
for char in u"ज": | |
dict[char] = u"z" | |
for char in u"डढ": | |
dict[char] = u"r" | |
# Devanagari ligatures (possibly incomplete and/or incorrect) | |
for char in u"ख्": | |
dict[char] = u"khn" | |
for char in u"त": | |
dict[char] = u"tn" | |
for char in u"द्": | |
dict[char] = u"dn" | |
for char in u"श": | |
dict[char] = u"cn" | |
for char in u"ह्": | |
dict[char] = u"fn" | |
for char in u"अँ": | |
dict[char] = u"m" | |
for char in u"॒॑": | |
dict[char] = u"" | |
for char in u"०": | |
dict[char] = u"0" | |
for char in u"१": | |
dict[char] = u"1" | |
for char in u"२": | |
dict[char] = u"2" | |
for char in u"३": | |
dict[char] = u"3" | |
for char in u"४": | |
dict[char] = u"4" | |
for char in u"५": | |
dict[char] = u"5" | |
for char in u"६": | |
dict[char] = u"6" | |
for char in u"७": | |
dict[char] = u"7" | |
for char in u"८": | |
dict[char] = u"8" | |
for char in u"९": | |
dict[char] = u"9" | |
# Armenian | |
for char in u"Ա": | |
dict[char] = u"A" | |
for char in u"ա": | |
dict[char] = u"a" | |
for char in u"Բ": | |
dict[char] = u"B" | |
for char in u"բ": | |
dict[char] = u"b" | |
for char in u"Գ": | |
dict[char] = u"G" | |
for char in u"գ": | |
dict[char] = u"g" | |
for char in u"Դ": | |
dict[char] = u"D" | |
for char in u"դ": | |
dict[char] = u"d" | |
for char in u"Ե": | |
dict[char] = u"Je" | |
for char in u"ե": | |
dict[char] = u"e" | |
for char in u"Զ": | |
dict[char] = u"Z" | |
for char in u"զ": | |
dict[char] = u"z" | |
for char in u"Է": | |
dict[char] = u"É" | |
for char in u"է": | |
dict[char] = u"é" | |
for char in u"Ը": | |
dict[char] = u"Ë" | |
for char in u"ը": | |
dict[char] = u"ë" | |
for char in u"Թ": | |
dict[char] = u"Th" | |
for char in u"թ": | |
dict[char] = u"th" | |
for char in u"Ժ": | |
dict[char] = u"Zh" | |
for char in u"ժ": | |
dict[char] = u"zh" | |
for char in u"Ի": | |
dict[char] = u"I" | |
for char in u"ի": | |
dict[char] = u"i" | |
for char in u"Լ": | |
dict[char] = u"L" | |
for char in u"լ": | |
dict[char] = u"l" | |
for char in u"Խ": | |
dict[char] = u"Ch" | |
for char in u"խ": | |
dict[char] = u"ch" | |
for char in u"Ծ": | |
dict[char] = u"Ts" | |
for char in u"ծ": | |
dict[char] = u"ts" | |
for char in u"Կ": | |
dict[char] = u"K" | |
for char in u"կ": | |
dict[char] = u"k" | |
for char in u"Հ": | |
dict[char] = u"H" | |
for char in u"հ": | |
dict[char] = u"h" | |
for char in u"Ձ": | |
dict[char] = u"Dz" | |
for char in u"ձ": | |
dict[char] = u"dz" | |
for char in u"Ղ": | |
dict[char] = u"R" | |
for char in u"ղ": | |
dict[char] = u"r" | |
for char in u"Ճ": | |
dict[char] = u"Cz" | |
for char in u"ճ": | |
dict[char] = u"cz" | |
for char in u"Մ": | |
dict[char] = u"M" | |
for char in u"մ": | |
dict[char] = u"m" | |
for char in u"Յ": | |
dict[char] = u"J" | |
for char in u"յ": | |
dict[char] = u"j" | |
for char in u"Ն": | |
dict[char] = u"N" | |
for char in u"ն": | |
dict[char] = u"n" | |
for char in u"Շ": | |
dict[char] = u"S" | |
for char in u"շ": | |
dict[char] = u"s" | |
for char in u"Շ": | |
dict[char] = u"Vo" | |
for char in u"շ": | |
dict[char] = u"o" | |
for char in u"Չ": | |
dict[char] = u"Tsh" | |
for char in u"չ": | |
dict[char] = u"tsh" | |
for char in u"Պ": | |
dict[char] = u"P" | |
for char in u"պ": | |
dict[char] = u"p" | |
for char in u"Ջ": | |
dict[char] = u"Dz" | |
for char in u"ջ": | |
dict[char] = u"dz" | |
for char in u"Ռ": | |
dict[char] = u"R" | |
for char in u"ռ": | |
dict[char] = u"r" | |
for char in u"Ս": | |
dict[char] = u"S" | |
for char in u"ս": | |
dict[char] = u"s" | |
for char in u"Վ": | |
dict[char] = u"V" | |
for char in u"վ": | |
dict[char] = u"v" | |
for char in u"Տ": | |
dict[char] = u"T'" | |
for char in u"տ": | |
dict[char] = u"t'" | |
for char in u"Ր": | |
dict[char] = u"R" | |
for char in u"ր": | |
dict[char] = u"r" | |
for char in u"Ց": | |
dict[char] = u"Tsh" | |
for char in u"ց": | |
dict[char] = u"tsh" | |
for char in u"Ւ": | |
dict[char] = u"V" | |
for char in u"ւ": | |
dict[char] = u"v" | |
for char in u"Փ": | |
dict[char] = u"Ph" | |
for char in u"փ": | |
dict[char] = u"ph" | |
for char in u"Ք": | |
dict[char] = u"Kh" | |
for char in u"ք": | |
dict[char] = u"kh" | |
for char in u"Օ": | |
dict[char] = u"O" | |
for char in u"օ": | |
dict[char] = u"o" | |
for char in u"Ֆ": | |
dict[char] = u"F" | |
for char in u"ֆ": | |
dict[char] = u"f" | |
for char in u"և": | |
dict[char] = u"&" | |
for char in u"՟": | |
dict[char] = u"." | |
for char in u"՞": | |
dict[char] = u"?" | |
for char in u"՝": | |
dict[char] = u";" | |
for char in u"՛": | |
dict[char] = u"" | |
# Tamil | |
for char in u"க்": | |
dict[char] = u"k" | |
for char in u"ஙண்ந்ன்": | |
dict[char] = u"n" | |
for char in u"ச": | |
dict[char] = u"c" | |
for char in u"ஞ்": | |
dict[char] = u"ñ" | |
for char in u"ட்": | |
dict[char] = u"th" | |
for char in u"த": | |
dict[char] = u"t" | |
for char in u"ப": | |
dict[char] = u"p" | |
for char in u"ம்": | |
dict[char] = u"m" | |
for char in u"ய்": | |
dict[char] = u"y" | |
for char in u"ர்ழ்ற": | |
dict[char] = u"r" | |
for char in u"ல்ள": | |
dict[char] = u"l" | |
for char in u"வ்": | |
dict[char] = u"v" | |
for char in u"ஜ": | |
dict[char] = u"j" | |
for char in u"ஷ": | |
dict[char] = u"sh" | |
for char in u"ஸ": | |
dict[char] = u"s" | |
for char in u"ஹ": | |
dict[char] = u"h" | |
for char in u"க்ஷ": | |
dict[char] = u"x" | |
for char in u"அ": | |
dict[char] = u"a" | |
for char in u"ஆ": | |
dict[char] = u"aa" | |
for char in u"இ": | |
dict[char] = u"i" | |
for char in u"ஈ": | |
dict[char] = u"ii" | |
for char in u"உ": | |
dict[char] = u"u" | |
for char in u"ஊ": | |
dict[char] = u"uu" | |
for char in u"எ": | |
dict[char] = u"e" | |
for char in u"ஏ": | |
dict[char] = u"ee" | |
for char in u"ஐ": | |
dict[char] = u"ai" | |
for char in u"ஒ": | |
dict[char] = u"o" | |
for char in u"ஓ": | |
dict[char] = u"oo" | |
for char in u"ஔ": | |
dict[char] = u"au" | |
for char in u"ஃ": | |
dict[char] = "" | |
# Bengali | |
for char in u"অ": | |
dict[char] = u"ô" | |
for char in u"আা": | |
dict[char] = u"a" | |
for char in u"ইিঈী": | |
dict[char] = u"i" | |
for char in u"উুঊূ": | |
dict[char] = u"u" | |
for char in u"ঋৃ": | |
dict[char] = u"ri" | |
for char in u"এেয়": | |
dict[char] = u"e" | |
for char in u"ঐৈ": | |
dict[char] = u"oi" | |
for char in u"ওো": | |
dict[char] = u"o" | |
for char in u"ঔৌ": | |
dict[char] = "ou" | |
for char in u"্": | |
dict[char] = u"" | |
for char in u"ৎ": | |
dict[char] = u"t" | |
for char in u"ং": | |
dict[char] = u"n" | |
for char in u"ঃ": | |
dict[char] = u"h" | |
for char in u"ঁ": | |
dict[char] = u"ñ" | |
for char in u"ক": | |
dict[char] = u"k" | |
for char in u"খ": | |
dict[char] = u"kh" | |
for char in u"গ": | |
dict[char] = u"g" | |
for char in u"ঘ": | |
dict[char] = u"gh" | |
for char in u"ঙ": | |
dict[char] = u"ng" | |
for char in u"চ": | |
dict[char] = u"ch" | |
for char in u"ছ": | |
dict[char] = u"chh" | |
for char in u"জ": | |
dict[char] = u"j" | |
for char in u"ঝ": | |
dict[char] = u"jh" | |
for char in u"ঞ": | |
dict[char] = u"n" | |
for char in u"টত": | |
dict[char] = u"t" | |
for char in u"ঠথ": | |
dict[char] = u"th" | |
for char in u"ডদ": | |
dict[char] = u"d" | |
for char in u"ঢধ": | |
dict[char] = u"dh" | |
for char in u"ণন": | |
dict[char] = u"n" | |
for char in u"প": | |
dict[char] = u"p" | |
for char in u"ফ": | |
dict[char] = u"ph" | |
for char in u"ব": | |
dict[char] = u"b" | |
for char in u"ভ": | |
dict[char] = u"bh" | |
for char in u"ম": | |
dict[char] = u"m" | |
for char in u"য": | |
dict[char] = u"dzh" | |
for char in u"র": | |
dict[char] = u"r" | |
for char in u"ল": | |
dict[char] = u"l" | |
for char in u"শ": | |
dict[char] = u"s" | |
for char in u"হ": | |
dict[char] = u"h" | |
for char in u"য়": | |
dict[char] = u"-" | |
for char in u"ড়": | |
dict[char] = u"r" | |
for char in u"ঢ": | |
dict[char] = u"rh" | |
for char in u"০": | |
dict[char] = u"0" | |
for char in u"১": | |
dict[char] = u"1" | |
for char in u"২": | |
dict[char] = u"2" | |
for char in u"৩": | |
dict[char] = u"3" | |
for char in u"৪": | |
dict[char] = u"4" | |
for char in u"৫": | |
dict[char] = u"5" | |
for char in u"৬": | |
dict[char] = u"6" | |
for char in u"৭": | |
dict[char] = u"7" | |
for char in u"৮": | |
dict[char] = u"8" | |
for char in u"৯": | |
dict[char] = u"9" | |
# Thai (because of complications of the alphabet, dictliterations | |
# are very imprecise here) | |
for char in u"ก": | |
dict[char] = u"k" | |
for char in u"ขฃคฅฆ": | |
dict[char] = u"kh" | |
for char in u"ง": | |
dict[char] = u"ng" | |
for char in u"จฉชฌ": | |
dict[char] = u"ch" | |
for char in u"ซศษส": | |
dict[char] = u"s" | |
for char in u"ญย": | |
dict[char] = u"y" | |
for char in u"ฎด": | |
dict[char] = u"d" | |
for char in u"ฏต": | |
dict[char] = u"t" | |
for char in u"ฐฑฒถทธ": | |
dict[char] = u"th" | |
for char in u"ณน": | |
dict[char] = u"n" | |
for char in u"บ": | |
dict[char] = u"b" | |
for char in u"ป": | |
dict[char] = u"p" | |
for char in u"ผพภ": | |
dict[char] = u"ph" | |
for char in u"ฝฟ": | |
dict[char] = u"f" | |
for char in u"ม": | |
dict[char] = u"m" | |
for char in u"ร": | |
dict[char] = u"r" | |
for char in u"ฤ": | |
dict[char] = u"rue" | |
for char in u"ๅ": | |
dict[char] = u":" | |
for char in u"ลฬ": | |
dict[char] = u"l" | |
for char in u"ฦ": | |
dict[char] = u"lue" | |
for char in u"ว": | |
dict[char] = u"w" | |
for char in u"หฮ": | |
dict[char] = u"h" | |
for char in u"อ": | |
dict[char] = u"" | |
for char in u"ร": | |
dict[char] = u"ü" | |
for char in u"ว": | |
dict[char] = u"ua" | |
for char in u"อวโิ": | |
dict[char] = u"o" | |
for char in u"ะัา": | |
dict[char] = u"a" | |
for char in u"ว": | |
dict[char] = u"u" | |
for char in u"ำ": | |
dict[char] = u"am" | |
for char in u"ิ": | |
dict[char] = u"i" | |
for char in u"ี": | |
dict[char] = u"i:" | |
for char in u"ึ": | |
dict[char] = u"ue" | |
for char in u"ื": | |
dict[char] = u"ue:" | |
for char in u"ุ": | |
dict[char] = u"u" | |
for char in u"ู": | |
dict[char] = u"u:" | |
for char in u"เ็": | |
dict[char] = u"e" | |
for char in u"แ": | |
dict[char] = u"ae" | |
for char in u"ใไ": | |
dict[char] = u"ai" | |
for char in u"่้๊๋็์": | |
dict[char] = u"" | |
for char in u"ฯ": | |
dict[char] = u"." | |
for char in u"ๆ": | |
dict[char] = u"(2)" | |
# Korean (Revised Romanization system within possible, incomplete) | |
for char in u"국": | |
dict[char] = u"guk" | |
for char in u"명": | |
dict[char] = u"myeong" | |
for char in u"검": | |
dict[char] = u"geom" | |
for char in u"타": | |
dict[char] = u"ta" | |
for char in u"분": | |
dict[char] = u"bun" | |
for char in u"사": | |
dict[char] = u"sa" | |
for char in u"류": | |
dict[char] = u"ryu" | |
for char in u"포": | |
dict[char] = u"po" | |
for char in u"르": | |
dict[char] = u"reu" | |
for char in u"투": | |
dict[char] = u"tu" | |
for char in u"갈": | |
dict[char] = u"gal" | |
for char in u"어": | |
dict[char] = u"eo" | |
for char in u"노": | |
dict[char] = u"no" | |
for char in u"웨": | |
dict[char] = u"we" | |
for char in u"이": | |
dict[char] = u"i" | |
for char in u"라": | |
dict[char] = u"ra" | |
for char in u"틴": | |
dict[char] = u"tin" | |
for char in u"루": | |
dict[char] = u"ru" | |
for char in u"마": | |
dict[char] = u"ma" | |
for char in u"니": | |
dict[char] = u"ni" | |
for char in u"아": | |
dict[char] = u"a" | |
for char in u"독": | |
dict[char] = u"dok" | |
for char in u"일": | |
dict[char] = u"il" | |
for char in u"모": | |
dict[char] = u"mo" | |
for char in u"크": | |
dict[char] = u"keu" | |
for char in u"샤": | |
dict[char] = u"sya" | |
for char in u"영": | |
dict[char] = u"yeong" | |
for char in u"불": | |
dict[char] = u"bul" | |
for char in u"가": | |
dict[char] = u"ga" | |
for char in u"리": | |
dict[char] = u"ri" | |
for char in u"그": | |
dict[char] = u"geu" | |
for char in u"지": | |
dict[char] = u"ji" | |
for char in u"야": | |
dict[char] = u"ya" | |
for char in u"바": | |
dict[char] = u"ba" | |
for char in u"슈": | |
dict[char] = u"syu" | |
for char in u"키": | |
dict[char] = u"ki" | |
for char in u"프": | |
dict[char] = u"peu" | |
for char in u"랑": | |
dict[char] = u"rang" | |
for char in u"스": | |
dict[char] = u"seu" | |
for char in u"로": | |
dict[char] = u"ro" | |
for char in u"메": | |
dict[char] = u"me" | |
for char in u"역": | |
dict[char] = u"yeok" | |
for char in u"도": | |
dict[char] = u"do" | |
# Kannada | |
dict[u"ಅ"] = u"a" | |
for char in u"ಆಾ": | |
dict[char] = u"aa" | |
for char in u"ಇಿ": | |
dict[char] = u"i" | |
for char in u"ಈೀ": | |
dict[char] = u"ii" | |
for char in u"ಉು": | |
dict[char] = u"u" | |
for char in u"ಊೂ": | |
dict[char] = u"uu" | |
for char in u"ಋೂ": | |
dict[char] = u"r'" | |
for char in u"ಎೆ": | |
dict[char] = u"e" | |
for char in u"ಏೇ": | |
dict[char] = u"ee" | |
for char in u"ಐೈ": | |
dict[char] = u"ai" | |
for char in u"ಒೊ": | |
dict[char] = u"o" | |
for char in u"ಓೋ": | |
dict[char] = u"oo" | |
for char in u"ಔೌ": | |
dict[char] = u"au" | |
dict[u"ಂ"] = u"m'" | |
dict[u"ಃ"] = u"h'" | |
dict[u"ಕ"] = u"k" | |
dict[u"ಖ"] = u"kh" | |
dict[u"ಗ"] = u"g" | |
dict[u"ಘ"] = u"gh" | |
dict[u"ಙ"] = u"ng" | |
dict[u"ಚ"] = u"c" | |
dict[u"ಛ"] = u"ch" | |
dict[u"ಜ"] = u"j" | |
dict[u"ಝ"] = u"ny" | |
dict[u"ಟ"] = u"tt" | |
dict[u"ಠ"] = u"tth" | |
dict[u"ಡ"] = u"dd" | |
dict[u"ಢ"] = u"ddh" | |
dict[u"ಣ"] = u"nn" | |
dict[u"ತ"] = u"t" | |
dict[u"ಥ"] = u"th" | |
dict[u"ದ"] = u"d" | |
dict[u"ಧ"] = u"dh" | |
dict[u"ನ"] = u"n" | |
dict[u"ಪ"] = u"p" | |
dict[u"ಫ"] = u"ph" | |
dict[u"ಬ"] = u"b" | |
dict[u"ಭ"] = u"bh" | |
dict[u"ಮ"] = u"m" | |
dict[u"ಯ"] = u"y" | |
dict[u"ರ"] = u"r" | |
dict[u"ಲ"] = u"l" | |
dict[u"ವ"] = u"v" | |
dict[u"ಶ"] = u"sh" | |
dict[u"ಷ"] = u"ss" | |
dict[u"ಸ"] = u"s" | |
dict[u"ಹ"] = u"h" | |
dict[u"ಳ"] = u"ll" | |
dict[u"೦"] = u"0" | |
dict[u"೧"] = u"1" | |
dict[u"೨"] = u"2" | |
dict[u"೩"] = u"3" | |
dict[u"೪"] = u"4" | |
dict[u"೫"] = u"5" | |
dict[u"೬"] = u"6" | |
dict[u"೭"] = u"7" | |
dict[u"೮"] = u"8" | |
dict[u"೯"] = u"9" | |
# Telugu | |
for char in u"అ": | |
dict[char] = u"a" | |
for char in u"ఆా": | |
dict[char] = u"aa" | |
for char in u"ఇి": | |
dict[char] = u"i" | |
for char in u"ఈీ": | |
dict[char] = u"ii" | |
for char in u"ఉు": | |
dict[char] = u"u" | |
for char in u"ఊూ": | |
dict[char] = u"uu" | |
for char in u"ఋృ": | |
dict[char] = u"r'" | |
for char in u"ౠౄ": | |
dict[char] = u'r"' | |
dict[u"ఌ"] = u"l'" | |
dict[u"ౡ"] = u'l"' | |
for char in u"ఎె": | |
dict[char] = u"e" | |
for char in u"ఏే": | |
dict[char] = u"ee" | |
for char in u"ఐై": | |
dict[char] = u"ai" | |
for char in u"ఒొ": | |
dict[char] = u"o" | |
for char in u"ఓో": | |
dict[char] = u"oo" | |
for char in u"ఔౌ": | |
dict[char] = u"au" | |
dict[u"ం"] = u"'" | |
dict[u"ః"] = u'"' | |
dict[u"క"] = u"k" | |
dict[u"ఖ"] = u"kh" | |
dict[u"గ"] = u"g" | |
dict[u"ఘ"] = u"gh" | |
dict[u"ఙ"] = u"ng" | |
dict[u"చ"] = u"ts" | |
dict[u"ఛ"] = u"tsh" | |
dict[u"జ"] = u"j" | |
dict[u"ఝ"] = u"jh" | |
dict[u"ఞ"] = u"ñ" | |
for char in u"టత": | |
dict[char] = u"t" | |
for char in u"ఠథ": | |
dict[char] = u"th" | |
for char in u"డద": | |
dict[char] = u"d" | |
for char in u"ఢధ": | |
dict[char] = u"dh" | |
for char in u"ణన": | |
dict[char] = u"n" | |
dict[u"ప"] = u"p" | |
dict[u"ఫ"] = u"ph" | |
dict[u"బ"] = u"b" | |
dict[u"భ"] = u"bh" | |
dict[u"మ"] = u"m" | |
dict[u"య"] = u"y" | |
for char in u"రఱ": | |
dict[char] = u"r" | |
for char in u"లళ": | |
dict[char] = u"l" | |
dict[u"వ"] = u"v" | |
dict[u"శ"] = u"sh" | |
for char in u"షస": | |
dict[char] = u"s" | |
dict[u"హ"] = u"h" | |
dict[u"్"] = "" | |
for char in u"ంఁ": | |
dict[char] = u"^" | |
dict[u"ః"] = u"-" | |
dict[u"౦"] = u"0" | |
dict[u"౧"] = u"1" | |
dict[u"౨"] = u"2" | |
dict[u"౩"] = u"3" | |
dict[u"౪"] = u"4" | |
dict[u"౫"] = u"5" | |
dict[u"౬"] = u"6" | |
dict[u"౭"] = u"7" | |
dict[u"౮"] = u"8" | |
dict[u"౯"] = u"9" | |
dict[u"౹"] = u"1/4" | |
dict[u"౺"] = u"1/2" | |
dict[u"౻"] = u"3/4" | |
dict[u"౼"] = u"1/16" | |
dict[u"౽"] = u"1/8" | |
dict[u"౾"] = u"3/16" | |
# Lao - note: pronounciation in initial position is used; | |
# different pronounciation in final position is ignored | |
dict[u"ກ"] = "k" | |
for char in u"ຂຄ": | |
dict[char] = "kh" | |
dict[u"ງ"] = "ng" | |
dict[u"ຈ"] = "ch" | |
for char in u"ສຊ": | |
dict[char] = "s" | |
dict[u"ຍ"] = "ny" | |
dict[u"ດ"] = "d" | |
dict[u"ຕ"] = "t" | |
for char in u"ຖທ": | |
dict[char] = "th" | |
dict[u"ນ"] = "n" | |
dict[u"ບ"] = "b" | |
dict[u"ປ"] = "p" | |
for char in u"ຜພ": | |
dict[char] = "ph" | |
for char in u"ຝຟ": | |
dict[char] = "f" | |
for char in u"ມໝ": | |
dict[char] = "m" | |
dict[u"ຢ"] = "y" | |
for char in u"ຣຼ": | |
dict[char] = "r" | |
for char in u"ລຼ": | |
dict[char] = "l" | |
dict[u"ວ"] = "v" | |
for char in u"ຮ": | |
dict[char] = "h" | |
dict[u"ອ"] = "'" | |
for char in u"ະັ": | |
dict[char] = "a" | |
dict[u"ິ"] = "i" | |
dict[u"ຶ"] = "ue" | |
dict[u"ຸ"] = "u" | |
dict[u"ເ"] = u"é" | |
dict[u"ແ"] = u"è" | |
for char in u"ໂົາໍ": | |
dict[char] = "o" | |
dict[u"ຽ"] = "ia" | |
dict[u"ເຶ"] = "uea" | |
dict[u"ຍ"] = "i" | |
for char in u"ໄໃ": | |
dict[char] = "ai" | |
dict[u"ຳ"] = "am" | |
dict[u"າ"] = "aa" | |
dict[u"ີ"] = "ii" | |
dict[u"ື"] = "yy" | |
dict[u"ູ"] = "uu" | |
dict[u"ເ"] = "e" | |
dict[u"ແ"] = "ei" | |
dict[u"໐"] = "0" | |
dict[u"໑"] = "1" | |
dict[u"໒"] = "2" | |
dict[u"໓"] = "3" | |
dict[u"໔"] = "4" | |
dict[u"໕"] = "5" | |
dict[u"໖"] = "6" | |
dict[u"໗"] = "7" | |
dict[u"໘"] = "8" | |
dict[u"໙"] = "9" | |
return dict | |
f = open('output.txt', 'w') | |
f.write(json.dumps(dictgen())) | |
f.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment