srikanthlogic/cmudict.py

## cmudict.py
#! /usr/bin/env python
# -*- coding: utf-8 -*-
# Any Indian Language to any other Indian language transliterator
# Copyright 2008-2010 Santhosh Thottingal <santhosh.thottingal@gmail.com>
# http://www.smc.org.in
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Library General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
# If you find any bugs or have any suggestions email: santhosh.thottingal@gmail.com
# URL: http://www.smc.org.in

import string
import os
CMU_MALAYALAM_MAP = {
	"AA" : "à´“",
	"AH" : "à´…",
	"AE" : "à´",
	"AO" : "à´“",
	"AW" : "à´”",
	"AY" : "à´",
	"B"  : "à´¬àµ",
	"CH" : "à´šàµà´šàµ",
	"D"  : "à´¡àµ",
	"DH" : "à´¦àµ",
	"EA" : "à´ˆ",
	"EH" : "à´Ž",
	"ER" : "à´Žà´°àµâ€",
	"EY" : "à´Žà´¯àµ",
	"F"  : "à´«àµ",
	"G"  : "à´—àµ",
	"HH"  : "à´¹àµ",
	"IH" : "à´‡",
	"IY" : "à´ˆ",
	"J"   : "à´œàµ",
	"JH"  : "à´œàµ",
	"K"  : "à´•àµ",
	"L"  : "à´²àµâ€",
	"M"  : "à´®àµ",
	"N"  : "à´¨àµâ€",
	"NG"  : "à´™àµ",
	"OW"  : "à´’",
	"P"  : "à´ªàµ",
	"R"  : "à´°àµâ€",
	"S"  : "à´¸àµ",
	"SH"  : "à´·àµ",
	"T"  : "à´±àµà´±àµ",
	"TH"  : "à´¤àµ",
	"Y"  :  "à´¯àµ",
	"UW" : "à´‰",
	"W"  : "à´µàµ",
	"V" : "à´µàµ",
	"Z"  : "à´¸àµ",
}

CMU_TAMIL_MAP = {
	"AA" : "à®†",
	"AH" : "à®…",
	"AE" : "à®Ž",
	"AO" : "à®’",
	"AW" : "à®”",
	"AY" : "à®",
	"B"  : "à®ªà®¿",
	"CH" : "à®šà¯",
	"D"  : "à®Ÿà¯",
	"DH" : "à®¤à¯",
	"EA" : "à´ˆ",
	"EH" : "à®",
	"ER" : "à®…à®°à¯",
	"EY" : "à®",
	"F"  : "à®ƒà®ªà¯",
	"G"  : "à®•à¯",
	"HH"  : "à®¹à¯",
	"IH" : "à®‡",
	"IY" : "à®‡",
	"J"   : "à®œà¯",
	"JH"  : "à®œà¯",
	"K"  : "à®•à¯",
	"L"  : "à®²à¯",
	"M"  : "à®®à¯",
	"N"  : "à®©à¯",
	"NG"  : "à®™à¯",
	"OW"  : "à®”",
	"P"  : "à®ªà¯",
	"R"  : "à®°à¯",
	"S"  : "à®šà¯",
	"SH"  : "à®·à¯",
	"T"  : "à®Ÿà¯",
	"TH"  : "à®¤à¯",
	"Y"  :  "à®¯à¯",
	"UW" : "à®‰",
	"W"  : "à®µ",
	"V" : "à®µ",
	"Z"  : "à®¸à¯",
}

CMU_KANNADA_MAP = {
	"AA" : "à²†",
	"AH" : "à²…",
	"AE" : "à²",
	"AO" : "à²“",
	"AW" : "à²”",
	"AY" : "à²",
	"B"  : "à²¬à³",
	"CH" : "à²šà³",
	"D"  : "à²¡à³",
	"DH" : "à²¦à³",
	"EA" : "à²ˆ",
	"EH" : "à²Ž",
	"ER" : "à²…à²°à³",
	"EY" : "à²Žà²¯à³",
	"F"  : "à²«à³",
	"G"  : "à²—à³",
	"HH"  : "à²¹à³",
	"IH" : "à²‡",
	"IY" : "à²ˆ",
	"J"   : "à²œà³",
	"JH"  : "à²œà³",
	"K"  : "à²•à³",
	"L"  : "à²²à³",
	"M"  : "à²®à³",
	"N"  : "à²¨à³",
	"NG"  : "à²‚à²—à³",
	"OW"  : "à²’",
	"P"  : "à²ªà³",
	"R"  : "à²°à³",
	"S"  : "à²¸à³",
	"SH"  : "à²·à³",
	"T"  : "à²Ÿà³",
	"TH"  : "à²¤à³",
	"Y"  :  "à²¯à³",
	"UW" : "à²Š",
	"UH":"à²‰",
	"W"  : "à²µà³",
	"V" : "à²µà³",
	"Z":"à²¸à³",
	"ZH":"à²·à³",
}

class CMUDict():
	def __init__(self):
		self.dictionaryfile=os.path.join(os.path.dirname(__file__), 'cmudict.0.7a_SPHINX_40')
		self.cmudictionary =  None
	def load(self):
		fdict = open(self.dictionaryfile, "r")
		flines = fdict.readlines()
		linecount = len(flines)
		self.cmudictionary = dict()
		for line in flines:
			line  = line.strip()
			lhs = line.split()[0]
			rhs = line.split()[1:]
			self.cmudictionary[lhs] = rhs
	def find(self, word):
		if self.cmudictionary== None:
			self.load()
		return 	self.cmudictionary[word.upper()]
	def pronunciation(self,word, language):
		stripped_word = 	word.strip('!,.?:')
		punctuations = 	word[len(stripped_word):]
		try:
			cmu_pronunciation = self.find(stripped_word)
		except KeyError:
			#print "could not find the word " + stripped_word  + " in dictionary"
			return word
		pronunciation_str = ""
		if language =="ml_IN":
			for syl in cmu_pronunciation:
				try:
					pronunciation_str += CMU_MALAYALAM_MAP[syl]
				except KeyError:
					pronunciation_str += syl
			pronunciation_str = self._fix_vowel_signs_ml(pronunciation_str)

		if language == "kn_IN":
			for symbol in cmu_pronunciation:
				try:
					pronunciation_str += CMU_KANNADA_MAP[symbol]
				except KeyError:
					pronunciation_str += symbol
		        pronunciation_str = self._fix_vowel_signs_kn(pronunciation_str)
		return (pronunciation_str).decode("utf-8") +		punctuations

		if language == "ta_IN":
			for symbol in cmu_pronunciation:
				try:
					pronunciation_str += CMU_TAMIL_MAP[symbol]
				except KeyError:
					pronunciation_str += symbol
		        #pronunciation_str = self._fix_vowel_signs_kn(pronunciation_str)
		return (pronunciation_str).decode("utf-8") +		punctuations


	def _fix_vowel_signs_ml(self,text)	:
		text= text.replace("àµà´…","")
		text= text.replace("àµâ€à´…","")
		text= text.replace("àµà´†","à´¾")
		text= text.replace("àµâ€à´†","à´¾")
		text= text.replace("àµà´‡","à´¿")
		text= text.replace("àµâ€à´‡","à´¿")
		text= text.replace("àµà´ˆ","àµ€")
		text= text.replace("àµâ€à´ˆ","àµ€")
		text= text.replace("àµà´‰","àµ")
		text= text.replace("àµâ€à´‰","àµ")
		text= text.replace("àµà´Š","àµ‚")
		text= text.replace("àµâ€à´Š","àµ‚")
		text= text.replace("àµà´±","àµà´°")
		text= text.replace("àµà´Ž","àµ†")
		text= text.replace("àµâ€à´Ž","")
		text= text.replace("àµà´","àµ‡")
		text= text.replace("àµâ€à´","àµ‡")
		text= text.replace("àµà´","àµˆ")
		text= text.replace("àµâ€à´","àµˆ")
		text= text.replace("àµà´’","àµŠ")
		text= text.replace("àµâ€à´’","àµŠ")
		text= text.replace("àµà´“","àµ‹")
		text= text.replace("àµâ€à´“","àµ‹")
		text= text.replace("àµà´”","àµŒ")
		text= text.replace("àµâ€à´”","àµŒ")
		text= text.replace("à´°àµà´°","à´±àµà´±")
		text= text.replace("à´±àµà´°","à´±àµà´±")
		text= text.replace("à´¨àµâ€à´±àµà´±","à´¨àµà´±")
		return text

	def _fix_vowel_signs_kn(self,text)	:
		text= text.replace("à³à²…","")
		text= text.replace("à³à²†","à²¾")
		text= text.replace("à³à²‡","à²¿")
		text= text.replace("à³à²ˆ","à³€")
		text= text.replace("à³à²‰","à³")
		text= text.replace("à³à²Š","à³‚")
		text= text.replace("à³à²‹","à³ƒ")
		text= text.replace("à³à²Ž","à³†")
		text= text.replace("à³à²","à³‡")
		text= text.replace("à³à²","à³ˆ")
		text= text.replace("à³à²’","à³Š")
		text= text.replace("à³à²“","à³‹")
		text= text.replace("à³à²”","à³Œ")
		return text

## indic_en.py
#!/usr/bin/python
# -*- coding: utf-8 -*-
#indic_en.py
#
#Copyright 2010 Vasudev Kamath <kamathvasudev@gmail.com>
#
#This program is free software; you can redistribute it and/or modify
#it under the terms of the GNU  General Public License as published by
#the Free Software Foundation; either version 3 of the License, or
#(at your option) any later version.
#
#This program is distributed in the hope that it will be useful,
#but WITHOUT ANY WARRANTY; without even the implied warranty of
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#GNU General Public License for more details.
#
#You should have received a copy of the GNU General Public License
#along with this program; if not, write to the Free Software
#Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
#MA 02110-1301, USA.
#
'''
This file contains all language related dictionaries vowel and vowel signs
and function which returns a dictionary or vowel or vowel signs for a language

Trying to make indic_en transliteration more generic
'''

kannada_english_dict = {u'à²…':'a',u'à²†':'aa',u'à²‡':'i',u'à²ˆ':'i',u'à²‰':'u',\
                        u'à²Š':'u',u'à²‹':'rri',u'à²Ž':'e',u'à²':'e',u'à²':'ai',\
                        u'à²’':'o',u'à²“':'o',u'à²”':'au',u'à²‚':'m',u'à²ƒ':'h',\
                        u'à²•':'k',u'à²–':'kh',u'à²—':'g',u'à²˜':'gh',u'à²™':'ng',\
                        u'à²š':'ch',u'à²›':'chh',u'à²œ':'j',u'à²':'jhh',u'à²ž':'nj',\
                        u'à²¤':'th',u'à²¥':'thh',u'à²¦':'d',u'à²§':'dh',u'à²¨':'n',\
                        u'à²Ÿ':'T',u'à² ':'Th',u'à²¡':'D',u'à²¢':'Dh',u'à²£':'N',\
                        u'à²ª':'p',u'à²«':'ph',u'à²¬':'b',u'à²':'bh',u'à²®':'m',\
                        u'à²¯':'y',u'à²°':'r',u'à²²':'l',u'à²µ':'v',u'à²¶':'sh',\
                        u'à²·':'shh',u'à²¸':'s',u'à²¹':'h',u'à²³':'L',\
                        u'à²‹':'rri',u'à³':'',u'à²¾':'aa',u'à²¿':'i',u'à³€':'i',\
                        u'à³':'u',u'à³‚':'u',u'à³ƒ':'rri',u'à³†':'e',u'à³‡':'e',\
                        u'à³ˆ':'ai',u'à³Š':'o',u'à³‹':'o',u'à³Œ':'au',\
                        u'à²•à³à²·':'ksh',u'à²¤à³à²°':'tr',u'à²œà³à²ž':'jn',\
                        u'à³§':'1',u'à³¨':'2',u'à³©':'3',u'à³ª':'4',u'à³«':'5',\
                        u'à³¬':'6',u'à³':'7',u'à³®':'8',u'à³¯':'9',u'à³¦':'0'}

kn_vowels = [u'à²…',u'à²†',u'à²‡',u'à²ˆ',u'à²‰',u'à²Š',u'à²‹',u'à²Ž',u'à²',u'à²',\
                 u'à²’',u'à²“',u'à²”']
kn_vowel_signs = [u'à³',u'à²¾',u'à²¿',u'à³€',u'à³',u'à³‚',u'à³ƒ',u'à³†',u'à³‡',\
                        u'à³ˆ',u'à³Š',u'à³‹',u'à³Œ',u'à²‚',u'à²ƒ',u' ']

tamil_english_dict = {u'à®…':'a',u'à®†':'aa',u'à®‡':'i',u'à®ˆ':'ii',u'à®‰':'u',u'à®Š':'uu',u'à®Ž':'e',\
					u'à®':'ee',u'à®':'ai',u'à®’':'o',u'à®“':'oo',u'à®”':'au',\
		u'à®•à¯':'k', u'à®™à¯':'ng', u'à®šà¯':'s','à®žà¯':'nj', u'à®Ÿà¯':'d', u'à®£à¯':'N', u'à®¤à¯':'th', u'à®¨à¯':'w',\
		u'à®ªà¯':'p', u'à®®à¯':'m', u'à®¯à¯':'y', u'à®°à¯':'r', u'à®²à¯':'l', u'à®µà¯':'v', u'à®´à¯':'zh', u'à®³à¯':'L', u'à®±à¯':'R', u'à®©à¯':'n'}

tamil_vowels = [u'à®…',u'à®†',u'à®‡',u'à®ˆ',u'à®‰',u'à®Š',u'à®Ž',\
					u'à®',u'à®',u'à®’',u'à®“',u'à®”',u'à®ƒ']

tamil_vowel_signs = [u'à®¾',u'à®¿',u'à¯€',u'à¯',u'à¯‚',u'à¯†',u'à¯‡',u'à¯ˆ',u'à¯Š',u'à¯‹',u'à¯Œ',u'à¯']

malayalam_english_dict={u'à´…':'a',u'à´†':'aa',u'à´‡':'i',u'à´ˆ':'ee',u'à´‰':'u',u'à´Š':'oo',u'à´‹':'ri',\
                u'à´Ž':'e',u'à´':'e',u'à´':'ai',u'à´’':'o',u'à´“':'o',u'à´”':'au',\
                u'à´•':'k',u'à´–':'kh',u'à´—':'g',u'à´˜':'gh',u'à´™àµà´™':'ng',u'à´™':'ng',\
                u'à´š':'ch',u'à´›':'chh',u'à´œ':'j',u'à´':'jhh',u'à´ž':'nj',\
                u'à´Ÿ':'t',u'à´ ':'th',u'à´¡':'d',u'à´¢':'dh',u'à´£':'n',\
                u'à´¤':'th',u'à´¥':'th',u'à´¦':'d',u'à´§':'dh',u'à´¨':'n',\
                u'à´ª':'p',u'à´«':'ph',u'à´¬':'b',u'à´':'bh',u'à´®':'m',\
                u'à´¯':'y',u'à´°':'r',u'à´²':'l', u'à´µ':'v', u'à´±':'r',\
                u'à´¶':'s',u'à´·':'sh',u'à´¸':'s', u'à´¹':'h',u'à´³':'l',u'à´´':'zh',\
                u'àµ':'',u'à´‚':'m',u'à´¾':'aa',u'à´¿':'i' ,u'àµ€':'ee' ,u'àµ':'u',\
                u'àµ‚':'oo',u'àµƒ':'ri' ,u'àµ†':'e' ,u'àµ‡':'e',\
                u'àµˆ':'ai',u'àµŠ':'o' ,u'àµ‹':'oo' ,u'àµ—':'au',  u'àµŒ':'ou'}

ml_vowels = [u'à´…',u'à´†',u'à´‡',u'à´ˆ',u'à´‰' ,u'à´Š',u'à´‹', u'à´Ž',u'à´',u'à´',\
                         u'à´’',u'à´“',u'à´”']
ml_vowel_signs = [u'àµ',u'à´‚',u'à´¾',u'à´¿',u'àµ€',u'àµ', u'àµ‚',u'àµƒ' ,u'àµ†' ,u'àµ‡',\
                              u'àµˆ',u'àµŠ' ,u'àµ‹' ,u'àµ—' , u'àµŒ',u'â€']


# P.S: Please declare all language related variables above this and
# fill in the following mapping as you add dictionary vowels and
# vowel_signs for your language


# language dictionary mapping
language_dictionary = {"kn_IN":kannada_english_dict,\
                           "ml_IN":malayalam_english_dict}

# language vowels mapping
language_vowels = {"kn_IN":kn_vowels,"ml_IN":ml_vowels}

# language vowel signs mapping
language_vowel_signs = {"kn_IN":kn_vowel_signs,\
                            "ml_IN":ml_vowel_signs}

# language virama sign mapping
language_virama = {"kn_IN":u"à³","ml_IN":u"àµ"}
# language anuswara sign mapping
language_anuswara = {"kn_IN":u"à²‚","ml_IN":u'à´‚'}

def get_dictionary_for(lang="ml_IN"):
    """
     Returns the 'language'_english_dict if there
     is no dictionary available for a language then
     return ml_IN dictionary
     i.e cycle through language -> ml_IN -> en_US
    Arguments:
    - `lang`: Language for which dictionary is required
    """

    return language_dictionary.get(lang,"ml_IN")

def get_vowels_for(lang="ml_IN"):
    """
    Returns the 'lang'_vowels list. If vowel list
    is not available for a language retrun list for
    ml_IN

    Arguments:
    - `lang`: Language for which vowel list should be returned
    """

    return language_vowels.get(lang,"ml_IN")

def get_vowel_signs_for(lang="ml_IN"):
    """
    Returns the 'lang'_vowels list. If vowel list
    is not available for a language retrun list for
    ml_IN

    Arguments:
    - `lang`: Language for which vowel signs list should be returned
    """

    return language_vowel_signs.get(lang,"ml_IN")

def get_virama_for(lang="ml_IN"):
    """
    Return the virama symbol for given language
    Arguments:
    - `lang`: Language for which virama symbol should be returned
    """

    return language_virama.get(lang,"ml_IN")

def get_anuswara_for(lang="ml_IN"):
    """
    Return the anuswara symbol for the language
    Arguments:
    - `lang`: Language for which anuswara symbol is needed
    """

    return language_anuswara.get(lang,"ml_IN")

## transliterate.py
#! /usr/bin/env python
# -*- coding: utf-8 -*-
# Any Indian Language to any other Indian language transliterator
# Copyright 2009-2010 Santhosh Thottingal <santhosh.thottingal@gmail.com>
# http://www.smc.org.in
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
# If you find any bugs or have any suggestions
# email: santhosh.thottingal@gmail.com
# URL: http://www.smc.org.in

from common import *
from utils import *
import string
import os
from cmudict import CMUDict
from indic_en import *

class Transliterator(SilpaModule):
    def __init__(self):
        self.template=os.path.join(os.path.dirname(__file__),\
                                       'transliterate.html')
        self.cmu = CMUDict()
        self.response = SilpaResponse(self.template)

    def transliterate_en_ml(self, word):
        """
        Transliterate English to Malayalam with the help of
        CMU pronuciation dictionary
        """
        return self.cmu.pronunciation(word,"ml_IN")

    def transliterate_en_kn(self, word):
        """
        Transliterate English to Kannada with the help of
        CMU pronuciation dictionary
        """
        return self.cmu.pronunciation(word,"kn_IN")

    def transliterate_en_ta(self, word):
        """
        Transliterate English to Tamil with the help of
        CMU pronuciation dictionary
        """
        return self.cmu.pronunciation(word,"ta_IN")


    def transliterate_en_xx(self,word, target_lang):
        """
        Transliterate English to any Indian Language.
        """
        if target_lang=="en_IN"  or target_lang=="en_US":
            return word
        if target_lang == "kn_IN":
            tx_str = self.transliterate_en_kn(word)
        if target_lang == "ta_IN":
            tx_str = self.transliterate_en_ta(word)
	    return tx_str
	else:
	    tx_str = self.transliterate_en_ml(word)

        if target_lang == "ml_IN":
            return tx_str
        #chain it through indic indic transliteratioin
        #first remove malayalam specific zwj
        tx_str = tx_str.replace(u'â€', '') # remove instances of zwnj
        if tx_str[-1:] == u'àµ' and (target_lang == "hi_IN"\
                                        or target_lang == "gu_IN"\
                                        or target_lang == "bn_IN" ) :
            tx_str = tx_str[:-(len(u'àµ'))] #remove the last virama'

        return self.transliterate_indic_indic(tx_str, "ml_IN", target_lang)

    def transliterate_xx_en(self,word, src_lang):
        """
        Transliterate Indian Language to English.
        """
        if src_lang == "en_IN" or src_lang == "en_US":
            return word

        # TODO: the function is generic now so no need of testing the lanuguage
        # but since the indic_en contains only for kn_IN and ml_IN we need this
        # check.
        # Add all indic language to indic_en
        # remplace this block with single call to indic_en function
        if src_lang == "kn_IN":
            return self.transliterate_indic_en(word,src_lang)
        if not src_lang == "ml_IN":
            word = self.transliterate_indic_indic(word, src_lang, "ml_IN")

        return self.transliterate_indic_en(word,"ml_IN")

    def transliterate_iso15919(self, word, src_language):
        tx_str = ""
        index=0;
        word_length = len(word)
        for chr in word:
            index+=1
            offset = ord(chr) - lang_bases[src_language]
            #76 is the virama offset for all indian languages from its base
            if offset >= 61  and offset <=76:
                tx_str = tx_str[:-1] #remove the last 'a'
            if offset>0 and offset<=128:
                tx_str = tx_str + charmap["ISO15919"][offset]
            #delete the inherent 'a' at the end of the word from hindi
            if tx_str[-1:]=='a' and (src_language == "hi_IN"\
                                         or src_language == "gu_IN"\
                                         or src_language == "bn_IN" ) :
                if word_length ==  index and word_length>1: #if last letter
                    tx_str = tx_str[:-1] #remove the last 'a'
        return tx_str .decode("utf-8")

    def transliterate_ipa(self, word, src_language):
        """
        Transliterate the given word in src_language to
        IPA - International Phonetical Alphabet notation.
        """
        tx_str = ""
        index=0;
        word_length = len(word)
        for chr in word:
            index+=1
            if ord(chr) < 255 : #ASCII characters + English
                tx_str += chr
                continue
            offset = ord(chr) - lang_bases[src_language]
            #76 is the virama offset for all indian languages from its base
            if offset >= 61  and offset <=76:
                tx_str = tx_str[:-(len('É™'))] #remove the last 'É™'
            if offset>0 and offset<=128:
                tx_str = tx_str + charmap["IPA"][offset]
            #delete the inherent 'a' at the end of the word from hindi
            if tx_str[-1:]=='É™' and (src_language == "hi_IN"\
                                         or src_language == "gu_IN"\
                                         or src_language == "bn_IN" ) :
                if word_length ==  index and word_length>1: #if last letter
                    tx_str = tx_str[:-(len('É™'))] #remove the last 'a'
        return tx_str .decode("utf-8")


    def _malayalam_fixes(self, text):
        try:
            text = text.replace(u"à´®àµ ",u"à´‚ ")
            text = text.replace(u"à´®àµ,",u"à´‚,")
            text = text.replace(u"à´®àµ.",u"à´‚.")
            text = text.replace(u"à´®àµ)",u"à´‚)")
            text = text.replace(u"à´©",u"à´¨")
            text = text.replace(u"àµ¤",u".")   #danda by fullstop
        except:
            pass
        return text

    def transliterate_indic_indic(self, word, src_lang, target_lang) :
        """
            Transliterate from an Indian languge word
            to another indian language word
        """
        index = 0
        tx_str = ""
        word = normalizer.normalize(word)
        if src_lang == "ml_IN" and target_lang != "ml_IN" :
            word = word.replace(u"\u200C",u"")
            word = word.replace(u"\u200D",u"")

            #replace all samvruthokaram by u vowels
            word = word.replace(u"àµàµ",u"")


        for chr in word:
            index += 1
            if chr in string.punctuation or (ord(chr)<=2304 and ord(chr)>=3071):
                tx_str = tx_str + chr
                continue
            offset = ord(chr) + self.getOffset(src_lang, target_lang)
            if(offset>0):
                tx_str = tx_str + unichr (offset)
            #schwa deletion
            baseoffset = offset - lang_bases[target_lang]
            #76 : virama
            if (index ==  len(word)
                and baseoffset == 76
                and ( target_lang == "hi_IN" or
                      target_lang == "gu_IN" or
                      target_lang == "pa_IN" or
                      target_lang == "bn_IN")) :
                #TODO Add more languages having schwa deletion characteristic
                tx_str = tx_str[:-(len(chr))] #remove the last 'a'

            if target_lang == "ml_IN" and src_lang == "ta_IN":
                tx_str = tx_str.replace(u"à´©" , u"à´¨")

            if target_lang == "ta_IN":
                tx_str = tx_str.replace(u'\u0B96' , u"à®•")
                tx_str = tx_str.replace(u'\u0B97' , u"à®•")
                tx_str = tx_str.replace(u'\u0B98' , u"à®•")
                tx_str = tx_str.replace(u'\u0B9B' , u"à®š")
                tx_str = tx_str.replace(u'\u0B9D' , u"à®š")
                tx_str = tx_str.replace(u'\u0BA0' , u"à®Ÿ")
                tx_str = tx_str.replace(u'\u0BA1' , u"à®Ÿ")
                tx_str = tx_str.replace(u'\u0BA2' , u"à®Ÿ")
                tx_str = tx_str.replace(u'\u0BA5' , u"à®¤")
                tx_str = tx_str.replace(u'\u0BA6' , u"à®¤")
                tx_str = tx_str.replace(u'\u0BA7' , u"à®¤")
                tx_str = tx_str.replace(u'\u0BAB' , u"à®ª")
                tx_str = tx_str.replace(u'\u0BAC' , u"à®ª")
                tx_str = tx_str.replace(u'\u0BAD' , u"à®ª")
                tx_str = tx_str.replace(u'\u0BC3' , u"à®¿à®°à¯")
                tx_str = tx_str.replace(u'à®‚',u'à®®à¯')
            #If target is malayalam, we need to add the virama
        if ( (target_lang == "ml_IN")
                and (src_lang == "hi_IN" or
                src_lang == "gu_IN" or
                src_lang == "pa_IN" or
                src_lang == "bn_IN")
                and tx_str[-1].isalpha()
                ):
            tx_str = tx_str+u"àµ"
        return tx_str

    def transliterate_indic_en(self,word,src_lang):
        """

        Arguments:
        - `self`:
        - `word`: Word to be transliterated (sentence)
        - `src_lang`: Language from which we need to transilterate
        """

        # Get all the language related stuffs
        dictionary = get_dictionary_for(src_lang)
        vowels = get_vowels_for(src_lang)
        vowel_signs = get_vowel_signs_for(src_lang)
        virama = get_virama_for(src_lang)
        anuswara = get_anuswara_for(src_lang)


        word_length = len(word)
        index = 0
        tx_string = ""
        while index < word_length:

            # If current charachter is a punctuation symbol
            # skip it.
            # Added to avoid getting extra 'a' to the begining
            # of word next to punctuation symbol
            #

            if word[index] in string.punctuation:
                tx_string += word[index]
                index += 1
                continue

            # Virama = conjucter
            if word[index] == virama:
                index+=1
                continue;

            # Get english equivalaent of the charachter.
            try:
                tx_string += dictionary[word[index]]
            except KeyError:
                # If charachter isn't present in the dict
                # just append the charachter to string
                # This case is now handled by punctuation checking

                tx_string += word[index]


            if index+1 < word_length and not word[index+1] in vowel_signs\
                and word[index+1] in dictionary \
                and not word[index] in vowels\
                and not word[index] in vowel_signs :
                tx_string +='a'

            if index+1 == word_length and not word[index] in vowel_signs\
                and word[index] in dictionary:
                tx_string +='a'

            #handle am sign
            if index+1 < word_length and word[index+1] == anuswara\
                and  not word[index] in vowel_signs:
                tx_string += 'a'
            index+=1
        return tx_string


    @ServiceMethod
    def transliterate(self,text, target_lang_code):
        tx_str=""
        lines=text.split("\n")
        for line in lines:
            words=line.split(" ")
            for word in words:
                if(word.strip()>""):
                    try:
                        src_lang_code=detect_lang(word)[word]
                    except:
                        tx_str = tx_str + " " + word
                        continue #FIXME


                    if target_lang_code=="ISO15919" :
                        tx_str=tx_str + \
                            self.transliterate_iso15919(word, src_lang_code)\
                            + " "
                        continue

                    if target_lang_code=="IPA" :
                        tx_str=tx_str + \
                            self.transliterate_ipa(word, src_lang_code)   + " "
                        continue

                    if src_lang_code=="en_US" :
                        tx_str = tx_str + \
                            self.transliterate_en_xx(word, target_lang_code)+" "
                        continue

                    if target_lang_code=="en_US" or target_lang_code=="en_IN"  :
                        tx_str=tx_str + \
                            self.transliterate_xx_en(word, src_lang_code)  + " "
                        continue

                    tx_str += self.transliterate_indic_indic(word,\
                                                                 src_lang_code,\
                                                               target_lang_code)
                    if len(lines)>1:
                        tx_str += " "

                else:
                    tx_str = tx_str   +  word
            if len(lines)>1:
                tx_str += "\n"
        # Language specific fixes
        if target_lang_code == "ml_IN":
            tx_str = self._malayalam_fixes(tx_str)
        return  tx_str

    def getOffset(self,src,target):
        src_id=0
        target_id=0
        try:
            src_id=lang_bases[src]
            target_id=lang_bases[target]
            return (target_id - src_id)
        except:
            return 0

    def get_module_name(self):
        return "Transliterator"
    def get_info(self):
        return  "Transliterate the text between any Indian Language"

def getInstance():
    return Transliterator()
	#! /usr/bin/env python
	# -- coding: utf-8 --
	# Any Indian Language to any other Indian language transliterator
	# Copyright 2008-2010 Santhosh Thottingal <santhosh.thottingal@gmail.com>
	# http://www.smc.org.in
	#
	# This program is free software; you can redistribute it and/or modify
	# it under the terms of the GNU General Public License as published by
	# the Free Software Foundation; either version 3 of the License, or
	# (at your option) any later version.
	#
	# This program is distributed in the hope that it will be useful,
	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	# GNU Library General Public License for more details.
	#
	# You should have received a copy of the GNU General Public License
	# along with this program; if not, write to the Free Software
	# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
	#
	# If you find any bugs or have any suggestions email: santhosh.thottingal@gmail.com
	# URL: http://www.smc.org.in

	import string
	import os
	CMU_MALAYALAM_MAP = {
	"AA" : "à´“",
	"AH" : "à´…",
	"AE" : "à´",
	"AO" : "à´“",
	"AW" : "à´”",
	"AY" : "à´",
	"B" : "à´¬àµ",
	"CH" : "à´šàµà´šàµ",
	"D" : "à´¡àµ",
	"DH" : "à´¦àµ",
	"EA" : "à´ˆ",
	"EH" : "à´Ž",
	"ER" : "à´Žà´°àµâ€",
	"EY" : "à´Žà´¯àµ",
	"F" : "à´«àµ",
	"G" : "à´—àµ",
	"HH" : "à´¹àµ",
	"IH" : "à´‡",
	"IY" : "à´ˆ",
	"J" : "à´œàµ",
	"JH" : "à´œàµ",
	"K" : "à´•àµ",
	"L" : "à´²àµâ€",
	"M" : "à´®àµ",
	"N" : "à´¨àµâ€",
	"NG" : "à´™àµ",
	"OW" : "à´’",
	"P" : "à´ªàµ",
	"R" : "à´°àµâ€",
	"S" : "à´¸àµ",
	"SH" : "à´·àµ",
	"T" : "à´±àµà´±àµ",
	"TH" : "à´¤àµ",
	"Y" : "à´¯àµ",
	"UW" : "à´‰",
	"W" : "à´µàµ",
	"V" : "à´µàµ",
	"Z" : "à´¸àµ",
	}

	CMU_TAMIL_MAP = {
	"AA" : "à®†",
	"AH" : "à®…",
	"AE" : "à®Ž",
	"AO" : "à®’",
	"AW" : "à®”",
	"AY" : "à®",
	"B" : "à®ªà®¿",
	"CH" : "à®šà¯",
	"D" : "à®Ÿà¯",
	"DH" : "à®¤à¯",
	"EA" : "à´ˆ",
	"EH" : "à®",
	"ER" : "à®…à®°à¯",
	"EY" : "à®",
	"F" : "à®ƒà®ªà¯",
	"G" : "à®•à¯",
	"HH" : "à®¹à¯",
	"IH" : "à®‡",
	"IY" : "à®‡",
	"J" : "à®œà¯",
	"JH" : "à®œà¯",
	"K" : "à®•à¯",
	"L" : "à®²à¯",
	"M" : "à®®à¯",
	"N" : "à®©à¯",
	"NG" : "à®™à¯",
	"OW" : "à®”",
	"P" : "à®ªà¯",
	"R" : "à®°à¯",
	"S" : "à®šà¯",
	"SH" : "à®·à¯",
	"T" : "à®Ÿà¯",
	"TH" : "à®¤à¯",
	"Y" : "à®¯à¯",
	"UW" : "à®‰",
	"W" : "à®µ",
	"V" : "à®µ",
	"Z" : "à®¸à¯",
	}

	CMU_KANNADA_MAP = {
	"AA" : "à²†",
	"AH" : "à²…",
	"AE" : "à²",
	"AO" : "à²“",
	"AW" : "à²”",
	"AY" : "à²",
	"B" : "à²¬à³",
	"CH" : "à²šà³",
	"D" : "à²¡à³",
	"DH" : "à²¦à³",
	"EA" : "à²ˆ",
	"EH" : "à²Ž",
	"ER" : "à²…à²°à³",
	"EY" : "à²Žà²¯à³",
	"F" : "à²«à³",
	"G" : "à²—à³",
	"HH" : "à²¹à³",
	"IH" : "à²‡",
	"IY" : "à²ˆ",
	"J" : "à²œà³",
	"JH" : "à²œà³",
	"K" : "à²•à³",
	"L" : "à²²à³",
	"M" : "à²®à³",
	"N" : "à²¨à³",
	"NG" : "à²‚à²—à³",
	"OW" : "à²’",
	"P" : "à²ªà³",
	"R" : "à²°à³",
	"S" : "à²¸à³",
	"SH" : "à²·à³",
	"T" : "à²Ÿà³",
	"TH" : "à²¤à³",
	"Y" : "à²¯à³",
	"UW" : "à²Š",
	"UH":"à²‰",
	"W" : "à²µà³",
	"V" : "à²µà³",
	"Z":"à²¸à³",
	"ZH":"à²·à³",
	}

	class CMUDict():
	def __init__(self):
	self.dictionaryfile=os.path.join(os.path.dirname(__file__), 'cmudict.0.7a_SPHINX_40')
	self.cmudictionary = None
	def load(self):
	fdict = open(self.dictionaryfile, "r")
	flines = fdict.readlines()
	linecount = len(flines)
	self.cmudictionary = dict()
	for line in flines:
	line = line.strip()
	lhs = line.split()[0]
	rhs = line.split()[1:]
	self.cmudictionary[lhs] = rhs
	def find(self, word):
	if self.cmudictionary== None:
	self.load()
	return self.cmudictionary[word.upper()]
	def pronunciation(self,word, language):
	stripped_word = word.strip('!,.?:')
	punctuations = word[len(stripped_word):]
	try:
	cmu_pronunciation = self.find(stripped_word)
	except KeyError:
	#print "could not find the word " + stripped_word + " in dictionary"
	return word
	pronunciation_str = ""
	if language =="ml_IN":
	for syl in cmu_pronunciation:
	try:
	pronunciation_str += CMU_MALAYALAM_MAP[syl]
	except KeyError:
	pronunciation_str += syl
	pronunciation_str = self._fix_vowel_signs_ml(pronunciation_str)

	if language == "kn_IN":
	for symbol in cmu_pronunciation:
	try:
	pronunciation_str += CMU_KANNADA_MAP[symbol]
	except KeyError:
	pronunciation_str += symbol
	pronunciation_str = self._fix_vowel_signs_kn(pronunciation_str)
	return (pronunciation_str).decode("utf-8") + punctuations

	if language == "ta_IN":
	for symbol in cmu_pronunciation:
	try:
	pronunciation_str += CMU_TAMIL_MAP[symbol]
	except KeyError:
	pronunciation_str += symbol
	#pronunciation_str = self._fix_vowel_signs_kn(pronunciation_str)
	return (pronunciation_str).decode("utf-8") + punctuations



	def _fix_vowel_signs_ml(self,text) :
	text= text.replace("àµà´…","")
	text= text.replace("àµâ€à´…","")
	text= text.replace("àµà´†","à´¾")
	text= text.replace("àµâ€à´†","à´¾")
	text= text.replace("àµà´‡","à´¿")
	text= text.replace("àµâ€à´‡","à´¿")
	text= text.replace("àµà´ˆ","àµ€")
	text= text.replace("àµâ€à´ˆ","àµ€")
	text= text.replace("àµà´‰","àµ")
	text= text.replace("àµâ€à´‰","àµ")
	text= text.replace("àµà´Š","àµ‚")
	text= text.replace("àµâ€à´Š","àµ‚")
	text= text.replace("àµà´±","àµà´°")
	text= text.replace("àµà´Ž","àµ†")
	text= text.replace("àµâ€à´Ž","")
	text= text.replace("àµà´","àµ‡")
	text= text.replace("àµâ€à´","àµ‡")
	text= text.replace("àµà´","àµˆ")
	text= text.replace("àµâ€à´","àµˆ")
	text= text.replace("àµà´’","àµŠ")
	text= text.replace("àµâ€à´’","àµŠ")
	text= text.replace("àµà´“","àµ‹")
	text= text.replace("àµâ€à´“","àµ‹")
	text= text.replace("àµà´”","àµŒ")
	text= text.replace("àµâ€à´”","àµŒ")
	text= text.replace("à´°àµà´°","à´±àµà´±")
	text= text.replace("à´±àµà´°","à´±àµà´±")
	text= text.replace("à´¨àµâ€à´±àµà´±","à´¨àµà´±")
	return text

	def _fix_vowel_signs_kn(self,text) :
	text= text.replace("à³à²…","")
	text= text.replace("à³à²†","à²¾")
	text= text.replace("à³à²‡","à²¿")
	text= text.replace("à³à²ˆ","à³€")
	text= text.replace("à³à²‰","à³")
	text= text.replace("à³à²Š","à³‚")
	text= text.replace("à³à²‹","à³ƒ")
	text= text.replace("à³à²Ž","à³†")
	text= text.replace("à³à²","à³‡")
	text= text.replace("à³à²","à³ˆ")
	text= text.replace("à³à²’","à³Š")
	text= text.replace("à³à²“","à³‹")
	text= text.replace("à³à²”","à³Œ")
	return text
	#!/usr/bin/python
	# -- coding: utf-8 --
	#indic_en.py
	#
	#Copyright 2010 Vasudev Kamath <kamathvasudev@gmail.com>
	#
	#This program is free software; you can redistribute it and/or modify
	#it under the terms of the GNU General Public License as published by
	#the Free Software Foundation; either version 3 of the License, or
	#(at your option) any later version.
	#
	#This program is distributed in the hope that it will be useful,
	#but WITHOUT ANY WARRANTY; without even the implied warranty of
	#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	#GNU General Public License for more details.
	#
	#You should have received a copy of the GNU General Public License
	#along with this program; if not, write to the Free Software
	#Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
	#MA 02110-1301, USA.
	#
	'''
	This file contains all language related dictionaries vowel and vowel signs
	and function which returns a dictionary or vowel or vowel signs for a language

	Trying to make indic_en transliteration more generic
	'''

	kannada_english_dict = {u'à²…':'a',u'à²†':'aa',u'à²‡':'i',u'à²ˆ':'i',u'à²‰':'u',\
	u'à²Š':'u',u'à²‹':'rri',u'à²Ž':'e',u'à²':'e',u'à²':'ai',\
	u'à²’':'o',u'à²“':'o',u'à²”':'au',u'à²‚':'m',u'à²ƒ':'h',\
	u'à²•':'k',u'à²–':'kh',u'à²—':'g',u'à²˜':'gh',u'à²™':'ng',\
	u'à²š':'ch',u'à²›':'chh',u'à²œ':'j',u'à²':'jhh',u'à²ž':'nj',\
	u'à²¤':'th',u'à²¥':'thh',u'à²¦':'d',u'à²§':'dh',u'à²¨':'n',\
	u'à²Ÿ':'T',u'à² ':'Th',u'à²¡':'D',u'à²¢':'Dh',u'à²£':'N',\
	u'à²ª':'p',u'à²«':'ph',u'à²¬':'b',u'à²':'bh',u'à²®':'m',\
	u'à²¯':'y',u'à²°':'r',u'à²²':'l',u'à²µ':'v',u'à²¶':'sh',\
	u'à²·':'shh',u'à²¸':'s',u'à²¹':'h',u'à²³':'L',\
	u'à²‹':'rri',u'à³':'',u'à²¾':'aa',u'à²¿':'i',u'à³€':'i',\
	u'à³':'u',u'à³‚':'u',u'à³ƒ':'rri',u'à³†':'e',u'à³‡':'e',\
	u'à³ˆ':'ai',u'à³Š':'o',u'à³‹':'o',u'à³Œ':'au',\
	u'à²•à³à²·':'ksh',u'à²¤à³à²°':'tr',u'à²œà³à²ž':'jn',\
	u'à³§':'1',u'à³¨':'2',u'à³©':'3',u'à³ª':'4',u'à³«':'5',\
	u'à³¬':'6',u'à³':'7',u'à³®':'8',u'à³¯':'9',u'à³¦':'0'}

	kn_vowels = [u'à²…',u'à²†',u'à²‡',u'à²ˆ',u'à²‰',u'à²Š',u'à²‹',u'à²Ž',u'à²',u'à²',\
	u'à²’',u'à²“',u'à²”']
	kn_vowel_signs = [u'à³',u'à²¾',u'à²¿',u'à³€',u'à³',u'à³‚',u'à³ƒ',u'à³†',u'à³‡',\
	u'à³ˆ',u'à³Š',u'à³‹',u'à³Œ',u'à²‚',u'à²ƒ',u' ']

	tamil_english_dict = {u'à®…':'a',u'à®†':'aa',u'à®‡':'i',u'à®ˆ':'ii',u'à®‰':'u',u'à®Š':'uu',u'à®Ž':'e',\
	u'à®':'ee',u'à®':'ai',u'à®’':'o',u'à®“':'oo',u'à®”':'au',\
	u'à®•à¯':'k', u'à®™à¯':'ng', u'à®šà¯':'s','à®žà¯':'nj', u'à®Ÿà¯':'d', u'à®£à¯':'N', u'à®¤à¯':'th', u'à®¨à¯':'w',\
	u'à®ªà¯':'p', u'à®®à¯':'m', u'à®¯à¯':'y', u'à®°à¯':'r', u'à®²à¯':'l', u'à®µà¯':'v', u'à®´à¯':'zh', u'à®³à¯':'L', u'à®±à¯':'R', u'à®©à¯':'n'}

	tamil_vowels = [u'à®…',u'à®†',u'à®‡',u'à®ˆ',u'à®‰',u'à®Š',u'à®Ž',\
	u'à®',u'à®',u'à®’',u'à®“',u'à®”',u'à®ƒ']

	tamil_vowel_signs = [u'à®¾',u'à®¿',u'à¯€',u'à¯',u'à¯‚',u'à¯†',u'à¯‡',u'à¯ˆ',u'à¯Š',u'à¯‹',u'à¯Œ',u'à¯']

	malayalam_english_dict={u'à´…':'a',u'à´†':'aa',u'à´‡':'i',u'à´ˆ':'ee',u'à´‰':'u',u'à´Š':'oo',u'à´‹':'ri',\
	u'à´Ž':'e',u'à´':'e',u'à´':'ai',u'à´’':'o',u'à´“':'o',u'à´”':'au',\
	u'à´•':'k',u'à´–':'kh',u'à´—':'g',u'à´˜':'gh',u'à´™àµà´™':'ng',u'à´™':'ng',\
	u'à´š':'ch',u'à´›':'chh',u'à´œ':'j',u'à´':'jhh',u'à´ž':'nj',\
	u'à´Ÿ':'t',u'à´ ':'th',u'à´¡':'d',u'à´¢':'dh',u'à´£':'n',\
	u'à´¤':'th',u'à´¥':'th',u'à´¦':'d',u'à´§':'dh',u'à´¨':'n',\
	u'à´ª':'p',u'à´«':'ph',u'à´¬':'b',u'à´':'bh',u'à´®':'m',\
	u'à´¯':'y',u'à´°':'r',u'à´²':'l', u'à´µ':'v', u'à´±':'r',\
	u'à´¶':'s',u'à´·':'sh',u'à´¸':'s', u'à´¹':'h',u'à´³':'l',u'à´´':'zh',\
	u'àµ':'',u'à´‚':'m',u'à´¾':'aa',u'à´¿':'i' ,u'àµ€':'ee' ,u'àµ':'u',\
	u'àµ‚':'oo',u'àµƒ':'ri' ,u'àµ†':'e' ,u'àµ‡':'e',\
	u'àµˆ':'ai',u'àµŠ':'o' ,u'àµ‹':'oo' ,u'àµ—':'au', u'àµŒ':'ou'}

	ml_vowels = [u'à´…',u'à´†',u'à´‡',u'à´ˆ',u'à´‰' ,u'à´Š',u'à´‹', u'à´Ž',u'à´',u'à´',\
	u'à´’',u'à´“',u'à´”']
	ml_vowel_signs = [u'àµ',u'à´‚',u'à´¾',u'à´¿',u'àµ€',u'àµ', u'àµ‚',u'àµƒ' ,u'àµ†' ,u'àµ‡',\
	u'àµˆ',u'àµŠ' ,u'àµ‹' ,u'àµ—' , u'àµŒ',u'â€']


	# P.S: Please declare all language related variables above this and
	# fill in the following mapping as you add dictionary vowels and
	# vowel_signs for your language


	# language dictionary mapping
	language_dictionary = {"kn_IN":kannada_english_dict,\
	"ml_IN":malayalam_english_dict}

	# language vowels mapping
	language_vowels = {"kn_IN":kn_vowels,"ml_IN":ml_vowels}

	# language vowel signs mapping
	language_vowel_signs = {"kn_IN":kn_vowel_signs,\
	"ml_IN":ml_vowel_signs}

	# language virama sign mapping
	language_virama = {"kn_IN":u"à³","ml_IN":u"àµ"}
	# language anuswara sign mapping
	language_anuswara = {"kn_IN":u"à²‚","ml_IN":u'à´‚'}

	def get_dictionary_for(lang="ml_IN"):
	"""
	Returns the 'language'_english_dict if there
	is no dictionary available for a language then
	return ml_IN dictionary
	i.e cycle through language -> ml_IN -> en_US
	Arguments:
	- `lang`: Language for which dictionary is required
	"""

	return language_dictionary.get(lang,"ml_IN")

	def get_vowels_for(lang="ml_IN"):
	"""
	Returns the 'lang'_vowels list. If vowel list
	is not available for a language retrun list for
	ml_IN

	Arguments:
	- `lang`: Language for which vowel list should be returned
	"""

	return language_vowels.get(lang,"ml_IN")

	def get_vowel_signs_for(lang="ml_IN"):
	"""
	Returns the 'lang'_vowels list. If vowel list
	is not available for a language retrun list for
	ml_IN

	Arguments:
	- `lang`: Language for which vowel signs list should be returned
	"""

	return language_vowel_signs.get(lang,"ml_IN")

	def get_virama_for(lang="ml_IN"):
	"""
	Return the virama symbol for given language
	Arguments:
	- `lang`: Language for which virama symbol should be returned
	"""

	return language_virama.get(lang,"ml_IN")

	def get_anuswara_for(lang="ml_IN"):
	"""
	Return the anuswara symbol for the language
	Arguments:
	- `lang`: Language for which anuswara symbol is needed
	"""

	return language_anuswara.get(lang,"ml_IN")