Skip to content

Instantly share code, notes, and snippets.

@hrishikeshrt
Last active February 9, 2022 20:05
Show Gist options
  • Save hrishikeshrt/e4f6b1d0a26618afb8fe2ed90efc7782 to your computer and use it in GitHub Desktop.
Save hrishikeshrt/e4f6b1d0a26618afb8fe2ed90efc7782 to your computer and use it in GitHub Desktop.
Transliteration CLI
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Transliteration Interface
"""
###############################################################################
__author__ = """Hrishikesh Terdalkar"""
__version__ = '0.2.0'
__created_on__ = "Sat Jan 29 20:04:40 2022"
###############################################################################
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Transliteration CLI
@author: Hrishikesh Terdalkar
"""
import argparse
from indic_transliteration import detect
from indic_transliteration.sanscript import SCHEMES, transliterate
parser = argparse.ArgumentParser(description='Transliterate text to and from various schemes')
parser.add_argument('-i', '--input', help='Input File', default='/dev/stdin')
parser.add_argument('-o', '--output', help='Output File', default='/dev/stdout')
parser.add_argument('-f', '--from', help='Source Scheme', default='detect')
parser.add_argument('-t', '--to', help='Target Scheme', default='auto')
args = vars(parser.parse_args())
infile = args['input']
outfile = args['output']
from_scheme = args['from']
to_scheme = args['to']
with open(infile, 'r') as ifp:
input_text = ifp.read()
if from_scheme == 'detect':
from_scheme = detect.detect(input_text).lower()
if to_scheme == 'auto':
if from_scheme == 'devanagari':
to_scheme = 'iast'
else:
to_scheme = 'devanagari'
if from_scheme.lower() not in SCHEMES:
raise ValueError(f'Unknown source scheme "{from_scheme}"')
if to_scheme.lower() not in SCHEMES:
raise ValueError(f'Unknown target scheme "{from_scheme}"')
output_text = transliterate(input_text, from_scheme.lower(), to_scheme.lower())
if to_scheme in ['Iast']:
output_text = output_text.title()
if to_scheme in ['IAST']:
output_text = output_text.upper()
with open(outfile, 'w') as ofp:
ofp.write(output_text)
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Transliteration REPL Shell
"""
###############################################################################
import os
import cmd
import logging
try:
import readline
except ImportError:
readline = None
from pathlib import Path
from indic_transliteration import detect, sanscript
from indic_transliteration.sanscript import SCHEMES, transliterate
from tabulate import tabulate
try:
from . import __version__
except Exception:
from __init__ import __version__
###############################################################################
class BasicShell(cmd.Cmd):
def emptyline(self):
pass
# a line beginning with '!' is dispatched to the method do_shell()
def do_shell(self, commad):
"""Execute shell commands"""
os.system(commad)
def do_exit(self, arg):
"""Exit the shell"""
print("Bye")
return True
# do_EOF corresponds to Ctrl + D
do_EOF = do_exit
###############################################################################
class TransliterationShell(BasicShell):
intro = f"indic-transliteration shell ({__version__})"
desc = "Type any input to transliterate." \
" (help or ? for list of options)"
prompt = "(its) "
def __init__(self, history_file='~/.its_history', history_size=1000):
super(self.__class__, self).__init__()
self.debug = False
self.input_scheme = 'detect'
self.output_schemes = [
sanscript.DEVANAGARI,
sanscript.IAST,
sanscript.IAST.title(),
sanscript.IAST.upper(),
sanscript.ITRANS,
sanscript.VELTHUIS,
sanscript.WX,
sanscript.SLP1,
sanscript.HK
]
# History
self.history_file = Path(history_file).expanduser()
self.history_size = history_size
# Setup Logging
self.logger = logging.getLogger(f'{self.__class__.__name__}')
self.logger.addHandler(logging.StreamHandler())
self.logger.setLevel(logging.INFO)
# ----------------------------------------------------------------------- #
# Persistent History
def preloop(self):
if readline and Path(self.history_file).exists():
readline.read_history_file(self.history_file)
def postloop(self):
if readline:
readline.set_history_length(self.history_size)
readline.write_history_file(self.history_file)
# ----------------------------------------------------------------------- #
# Debug Mode
def do_debug(self, arg):
"""Turn debug mode on/off"""
if arg.lower() in ["true", "on", "yes"]:
self.debug = True
self.logger.setLevel(logging.DEBUG)
if arg.lower() in ["false", "off", "no"]:
self.debug = False
self.logger.setLevel(logging.INFO)
print(f"Debug: {self.debug}")
# ----------------------------------------------------------------------- #
# Input/Output Transliteration Scheme
def complete_input(self, text, line, begidx, endidx):
valid_schemes = ['detect'] + list(SCHEMES)
return [sch for sch in valid_schemes if sch.startswith(text)]
def do_input(self, scheme):
"""Change the input transliteration scheme"""
valid_schemes = ['detect'] + list(SCHEMES)
if not scheme:
print(f"Input scheme: {self.input_scheme}")
else:
if scheme.lower() not in valid_schemes:
print(f"Invalid scheme. (valid schemes are {valid_schemes}")
else:
self.input_scheme = scheme.lower()
print(f"Input scheme: {self.input_scheme}")
# ----------------------------------------------------------------------- #
def do_output(self, line):
"""Set the output transliteration schemes"""
self.output_schemes = [
word for word in line.split() if word.lower() in SCHEMES
]
print(f"Output schemes: {self.output_schemes}")
# ----------------------------------------------------------------------- #
def default(self, line):
if self.input_scheme == 'detect':
input_scheme = detect.detect(line).lower()
else:
input_scheme = self.input_scheme
self.logger.debug(f"Detected: {input_scheme}")
output_text = {}
for output_scheme in self.output_schemes:
output_text[output_scheme] = transliterate(
line,
input_scheme.lower(),
output_scheme.lower()
)
if output_scheme == output_scheme.title():
output_text[output_scheme] = output_text[output_scheme].title()
if output_scheme == output_scheme.upper():
output_text[output_scheme] = output_text[output_scheme].upper()
print(tabulate(output_text.items(), tablefmt='fancy_grid'))
# ----------------------------------------------------------------------- #
def cmdloop(self, intro=None):
if intro is None:
intro = self.intro
print(intro)
print(self.desc)
while True:
try:
super(self.__class__, self).cmdloop(intro="")
break
except KeyboardInterrupt:
print("\nKeyboardInterrupt")
# ----------------------------------------------------------------------- #
def do_version(self, text):
print(__version__)
# ----------------------------------------------------------------------- #
###############################################################################
def main():
TransliterationShell().cmdloop()
if __name__ == '__main__':
main()
###############################################################################

Transliteration CLIs

Place all files in a folder. Make the shell.py and cli.py executable.

Run cli.py --help to see available options. Run shell.py from command line for REPL.

Example of REPL

indic-transliteration shell (0.2.0)
Type any input to transliterate. (help or ? for list of options)
(its) help

Documented commands (type help <topic>):
========================================
EOF  debug  exit  help  input  ouput  shell

Undocumented commands:
======================
version

(its) रामो राजमणिः
╒════════════╤════════════════════╕
│ devanagari │ रामो राजमणिः           |
├────────────┼────────────────────┤
│ iast       │ rāmo rājamaṇiḥ     │
├────────────┼────────────────────┤
│ Iast       │ Rāmo Rājamaṇiḥ     │
├────────────┼────────────────────┤
│ IAST       │ RĀMO RĀJAMAṆIḤ     │
├────────────┼────────────────────┤
│ itrans     │ rAmo rAjamaNiH     │
├────────────┼────────────────────┤
│ velthuis   │ raamo raajama.ni.h │
├────────────┼────────────────────┤
│ wx         │ rAmo rAjamaNiH     │
├────────────┼────────────────────┤
│ slp1       │ rAmo rAjamaRiH     │
├────────────┼────────────────────┤
│ hk         │ rAmo rAjamaNiH     │
╘════════════╧════════════════════╛
(its) input iast
Input scheme: iast
(its) output velthuis
Output schemes: ['velthuis']
(its)  rāmo rājamaṇiḥ 
╒══════════╤════════════════════╕
│ velthuis │ raamo raajama.ni.h │
╘══════════╧════════════════════╛
(its) input devanagari
Input scheme: devanagari
(its) output iast Iast IAST
Output schemes: ['iast', 'Iast', 'IAST']
(its) ॐ नमो भगवते रुद्राय
╒══════╤═══════════════════════════╕
│ iast │ oṃ namo bhagavate rudrāya │
├──────┼───────────────────────────┤
│ Iast │ Oṃ Namo Bhagavate Rudrāya │
├──────┼───────────────────────────┤
│ IAST │ OṂ NAMO BHAGAVATE RUDRĀYA │
╘══════╧═══════════════════════════╛
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment