Skip to content

Instantly share code, notes, and snippets.

@quanta-kt
Last active July 20, 2022 13:28
Show Gist options
  • Save quanta-kt/3d3a48edbafa216be29651081ef21bc3 to your computer and use it in GitHub Desktop.
Save quanta-kt/3d3a48edbafa216be29651081ef21bc3 to your computer and use it in GitHub Desktop.
A simple script to transliterate Devanagari text with Latin alphabet.

A simple script to transliterate words in Devanagari with Latin alphabet.

I wrote this as a sort of proof of concept and might fail with some cases. Consonants with a nuqta are not supported but should be fairly easy to extend this code to do so. (I am lazy, idk)

Why?

I was really bored.

Why does it add an "a" at the end of my name?

You might want to read about schwa deletion in Indo-Aryan Languages

Will you ever finish this?

Probably not.

Can I rely on this code to handle my-

NO. Please don't.

VYANJANA_MAP = {
"क": "k", "ख": "kh",
"ग": "g", "घ": "gh",
"ङ": "n",
"च": "ch", "छ": "chh",
"ज": "j", "झ": "jh",
"ञ": "n",
"ट": "t", "ठ": "th",
"ड": "d", "ढ": "dh",
"ण": "n",
"त": "t", "थ": "th",
"द": "d", "ध": "dh",
"न": "n",
"प": "p", "फ": "ph",
"ब": "b", "भ": "bh",
"म": "m",
"य": "y",
"र": "r",
"ल": "l",
"व": "v",
"श": "sh",
"ष": "sh",
"स": "s",
"ह": "h",
"ळ": "l",
}
OSHTHYA_VANJAN = {"प", "फ", "ब", "भ", "म"}
SVAR_MAP = {
"अ": "a", "आ": "aa",
"इ": "i", "ई": "ee",
"उ": "u", "ऊ": "oo",
"ए": "e", "ऐ": "ai",
"ओ": "o", "औ": "au",
"ऋ": "ri"
}
MATRA_MAP = {
"ा": "aa",
"ि": "i", "ी": "ee",
"ु": "u", "ू": "oo",
"े": "e", "ै": "ai",
"ो": "o", "ौ": "au",
"ः": "ah", "ृ": "ri"
}
ANUSVARA = "ं"
HALANTA = "्"
VISARGA = "ः"
def _get_nasalizer_for_vyanjana(vyanyana):
# If next vyanjana is a oshthya vyanjana, use "m" else "n"
if vyanyana in OSHTHYA_VANJAN:
return "m"
else:
return "n"
def _get_next(chars: list[str]) -> str:
"""Consume next syllable and return it's latin translitration"""
first = chars.pop(0)
if first in VYANJANA_MAP:
if not chars:
# We exhausted the characters, return the current vyanjana with the inherent "a"
return VYANJANA_MAP[first] + "a"
# Peek next
next = chars[0]
if next == HALANTA:
# Discard halant
chars.pop(0)
# If this is begining of a consonant cluster
if chars:
return VYANJANA_MAP[first] + _get_next(chars)
# The word ends with an halanta, simply return the consonant
else:
return VYANJANA_MAP[first]
# If this is a consonant + vowel
elif next in MATRA_MAP:
return VYANJANA_MAP[first] + MATRA_MAP[chars.pop(0)]
# If it's the anusvar
elif next == ANUSVARA:
# Discard anusavar
chars.pop(0)
if chars and chars[0] in VYANJANA_MAP:
return VYANJANA_MAP[first] + "a" + _get_nasalizer_for_vyanjana(chars[0])
else:
raise ValueError("Excpected a vyanjana after anusavara")
else:
return VYANJANA_MAP[first] + "a"
elif first in SVAR_MAP:
if not chars:
# Simply return the swar mapping
return SVAR_MAP[first]
next = chars[0]
if next == ANUSVARA:
# Discard anusvara
chars.pop(0)
if chars and chars[0] in VYANJANA_MAP:
return SVAR_MAP[first] + _get_nasalizer_for_vyanjana(chars[0])
else:
raise ValueError("Expected a vyanjana after anusvara")
elif next == VISARGA:
chars.pop(0)
return SVAR_MAP[first] + "h"
else:
return SVAR_MAP[first]
else:
raise ValueError(f"Invalid devanagari text, expected start of a syllable (a vyanjana or swara, got {first})")
def transliterate(devanagari_text: str) -> str:
"""Transforms devanagari text to it's transliterated equivalent in Latin alphabet"""
result = []
chars = list(devanagari_text)
while chars:
result.append(_get_next(chars))
return "".join(result)
def test_transliterate():
assert transliterate("अभिजीत").title() == "Abhijeeta"
assert transliterate("संस्कृत").title() == "Sanskrita"
assert transliterate("राणा").title() == "Raanaa"
assert transliterate("प्रकृति").title() == "Prakriti"
assert transliterate("फुल").title() == "Phula"
assert transliterate("अंबर").title() == "Ambara"
assert transliterate("अंक").title() == "Anka"
assert transliterate("आंसू").title() == "Aansoo"
assert transliterate("हिमः").title() == "Himah"
assert transliterate("इंजिन्").title() == "Injin"
assert transliterate("मोटर्").title() == "Motar"
assert transliterate("लक्षमन").title() == "Lakshamana"
if __name__ == "__main__":
test_transliterate()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment