Created
May 13, 2015 03:29
-
-
Save swanhtet1992/5b5cc07edf6fce1c8be3 to your computer and use it in GitHub Desktop.
Zawgyi <-> Unicode Converter written in Scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Zawgyi<>Unicode converter python module | |
// Based on rules from Parabaik Myanmar Text Converter Copyright (C) 2014 Ngwe Tun (Solveware Solution) | |
// Copyright (C) 2014 Swan Htet Aung | |
/** | |
* | |
* @author SH (github.com/swanhtet1992) | |
*/ | |
class ParseParms(val help: String) { | |
private var parms = Map[String,(String,String,Boolean)]() | |
private var cache: Option[String] = None // save parm name across calls | |
// used by req and rex methods | |
def parm(name: String) = { | |
parms += name -> ("", "^.*$", false ) ;cache = Some(name) | |
this | |
} | |
def parm(name: String, default: String) = { | |
parms += name -> (default, defRex(default), false); cache = Some(name) | |
this | |
} | |
def parm(name: String, default: String, rex: String) = { | |
parms += name -> (default, rex, false); cache = Some(name) | |
this | |
} | |
def parm(name: String, default: String, rex: String, req: Boolean) = { | |
parms += name -> (default, rex, req); cache = Some(name) | |
this | |
} | |
def parm(name: String, default: String, req: Boolean) = { | |
parms += name -> (default, defRex(default), req); cache = Some(name) | |
this | |
} | |
def req(value: Boolean) = { // update required flag | |
val k = checkName // for current parameter name | |
if( k.length > 0 ) { // stored in cache | |
val pvalue = parms(k) // parmeter tuple value | |
val ntuple = (pvalue._1,pvalue._2,value) // new tuple | |
parms += cache.get -> ntuple // update entry in parms | |
} // .parm("-p1","1").req(true) | |
this // enables chained calls | |
} | |
def rex(value: String) = { // update regular-expression | |
val k = checkName // for current name | |
if( k.length > 0 ) { // stored in cache | |
val pvalue = parms(k) // parameter tuple value | |
val ntuple = (pvalue._1,value,pvalue._3) // new tuple | |
parms += cache.get -> ntuple // update tuple for key in parms | |
} // .parm("-p1","1").rex(".+") | |
this // enables chained calls | |
} | |
private def checkName = { // checks name stored in cache | |
cache match { // to be a parm-name used for | |
case Some(key) => key // req and rex methods | |
case _ => "" // req & rex will not update | |
} // entries if cache other than | |
} // Some(key) | |
private def defRex(default: String): String = { | |
if( default.matches("^\\d+$") ) "^\\d+$" else "^.*$" | |
} | |
private def genMap(args: List[String] ) = { // return a Map of args | |
var argsMap = Map[String,String]() // result object | |
if( ( args.length % 2 ) != 0 ) argsMap // must have pairs: -name value | |
else { // to return a valid Map | |
for( i <- 0.until(args.length,2) ){ // iterate through args by 2 | |
argsMap += args(i) -> args(i+1) // add -name value pair | |
} | |
argsMap // return -name value Map | |
} | |
} | |
private def testRequired( args: Map[String,String] ) = { | |
val ParmsNotSupplied = new collection.mutable.ListBuffer[String] | |
for{ (key,value) <- parms // iterate trough parms | |
if value._3 // if parm is required | |
if !args.contains(key) // and it is not in args | |
} ParmsNotSupplied += key // add it to List | |
ParmsNotSupplied.toList // empty: all required present | |
} | |
private def validParms( args: Map[String,String] ) = { | |
val invalidParms = new collection.mutable.ListBuffer[String] | |
for{ (key,value) <- args // iterate through args | |
if parms.contains(key) // if it is a defined parm | |
rex = parms(key)._2 // parm defined rex | |
if !value.matches(rex) // if regex does not match | |
} invalidParms += key // add invalid arg | |
invalidParms.toList // empty: all parms valid | |
} | |
private def mergeParms( args: Map[String,String] ) = { | |
//val mergedMap = collection.mutable.Map[String,String]() | |
var mergedMap = Map[String,String]() // name value Map of results | |
for{ (key,value) <- parms // iterate through parms | |
//mValue = if( args.contains(key) ) args(key) else value(0) | |
mValue = args.getOrElse(key,value._1) // args(key) or default | |
} mergedMap += key -> mValue // update result Map | |
mergedMap // return mergedMap | |
} | |
private def mkString(l1: List[String],l2: List[String]) = { | |
"\nhelp: " + help + "\n\trequired parms missing: " + | |
( if( !l1.isEmpty ) l1.mkString(" ") else "" ) + | |
( if( !l2.isEmpty ) "\n\tinvalid parms: " + | |
l2.mkString(" ") + "\n" else "" ) | |
} | |
def validate( args: List[String] ) = { // validate args to parms | |
val argsMap = genMap( args ) // Map of args: -name value | |
val reqList = testRequired( argsMap ) // List of missing required | |
val validList = validParms( argsMap ) // List of (in)valid args | |
if( reqList.isEmpty && validList.isEmpty ) {// successful return | |
(true,"",mergeParms( argsMap )) // true, "", mergedParms | |
} else (false,mkString(reqList,validList),Map[String,String]()) | |
} | |
} | |
object Converter { | |
def main(args: Array[String]) = { | |
val helpString = " -to zg/uni -path file.txt" | |
val pp = new ParseParms(helpString) | |
pp.parm("-to", "zg").req(true) | |
.parm("-path", "file.txt").rex("^.*\\.txt$").req(true) | |
val result = pp.validate(args.toList) | |
if (result._1) { | |
val options = result._3.values | |
convertFile(options.head, options.last) | |
} else { | |
println(result._2) | |
} | |
} | |
def convertFile(to: String, path: String) { | |
import tools.nsc.io.File | |
import io.Source | |
val text = Source.fromFile(path).mkString | |
to match { | |
case "zg" => File("./zawgyi.txt").writeAll(uni512zg1(text)) | |
case "uni" => File("./unicode.txt").writeAll(zg12uni51(text)) | |
case _ => println("Please give me valid options") | |
} | |
} | |
def uni512zg1(input: String) = { | |
var outputText = input | |
outputText = outputText.replaceAll("\\u104e\\u1004\\u103a\\u1038", "\u104e") | |
outputText = outputText.replaceAll("\\u102b\\u103a", "\u105a") | |
outputText = outputText.replaceAll("\\u102d\\u1036", "\u108e") | |
outputText = outputText.replaceAll("\\u103f", "\u1086") | |
outputText = outputText.replaceAll("(?<=\\u102f)\\u1037", "\u1094") | |
outputText = outputText.replaceAll("(?<=\\u102f\\u1036)\\u1037", "\u1094") | |
outputText = outputText.replaceAll("(?<=\\u1030)\\u1037", "\u1094") | |
outputText = outputText.replaceAll("(?<=\\u1030\\u1036)\\u1037", "\u1094") | |
outputText = outputText.replaceAll("(?<=\\u1014)\\u1037", "\u1094") | |
outputText = outputText.replaceAll("(?<=\\u1014[\\u103a\\u1032])\\u1037", "\u1094") | |
outputText = outputText.replaceAll("(?<=\\u103b)\\u1037", "\u1095") | |
outputText = outputText.replaceAll("(?<=\\u103b[\\u1032\\u1036])\\u1037", "\u1095") | |
outputText = outputText.replaceAll("(?<=\\u103d)\\u1037", "\u1095") | |
outputText = outputText.replaceAll("(?<=\\u103d[\\u1032])\\u1037", "\u1095") | |
outputText = outputText.replaceAll("(?<=[\\u103b\\u103c\\u103d])\\u102f", "\u1033") | |
outputText = outputText.replaceAll("(?<=[\\u103b\\u103c\\u103d][\\u102d\\u1036])\\u102f", "\u1033") | |
outputText = outputText.replaceAll("(?<=(\\u1039[\\u1000-\\u1021]))\\u102f", "\u1033") | |
outputText = outputText.replaceAll("(?<=(\\u1039[\\u1000-\\u1021])[\\u102d\\u1036])\\u102f", "\u1033") | |
outputText = outputText.replaceAll("(?<=[\\u100a\\u100c\\u1020\\u1025\\u1029])\\u102f", "\u1033") | |
outputText = outputText.replaceAll("(?<=[\\u100a\\u100c\\u1020\\u1025\\u1029][\\u102d\\u1036])\\u102f", "\u1033") | |
outputText = outputText.replaceAll("(?<=[\\u103b\\u103c])\\u1030", "\u1034") | |
outputText = outputText.replaceAll("(?<=[\\u103b\\u103c][\\u103d])\\u1030", "\u1034") | |
outputText = outputText.replaceAll("(?<=[\\u103b\\u103c][\\u103e])\\u1030", "\u1034") | |
outputText = outputText.replaceAll("(?<=[\\u103b\\u103c][\\u102d\\u1036])\\u1030", "\u1034") | |
outputText = outputText.replaceAll("(?<=[\\u103b\\u103c][\\u103d][\\u103e])\\u1030", "\u1034") | |
outputText = outputText.replaceAll("(?<=[\\u103b\\u103c][\\u103d][\\u102d\\u1036])\\u1030", "\u1034") | |
outputText = outputText.replaceAll("(?<=[\\u103b\\u103c][\\u103e][\\u102d\\u1036])\\u1030", "\u1034") | |
outputText = outputText.replaceAll("(?<=[\\u103b\\u103c][\\u103d][\\u103e][\\u102d\\u1036])\\u1030", "\u1034") | |
outputText = outputText.replaceAll("(?<=(\\u1039[\\u1000-\\u1021]))\\u1030", "\u1034") | |
outputText = outputText.replaceAll("(?<=(\\u1039[\\u1000-\\u1021])[\\u102d\\u1036])\\u1030", "\u1034") | |
outputText = outputText.replaceAll("(?<=[\\u100a\\u100c\\u1020\\u1025\\u1029])\\u1030", "\u1034") | |
outputText = outputText.replaceAll("(?<=[\\u100a\\u100c\\u1020\\u1025\\u1029][\\u102d\\u1036])\\u1030", "\u1034") | |
outputText = outputText.replaceAll("(?<=\\u103c)\\u103e", "\u1087") | |
outputText = outputText.replaceAll("\\u1009(?=[\\u103a])", "\u1025") | |
outputText = outputText.replaceAll("\\u1009(?=\\u1039[\\u1000-\\u1021])", "\u1025") | |
outputText = outputText.replaceAll("([\\u1000-\\u1021\\u1029])((?:\\u1039[\\u1000-\\u1021])?)((?:[\\u103b-\\u103e\\u1087]*)?)\\u1031", "\u1031$1$2$3") | |
outputText = outputText.replaceAll("([\\u1000-\\u1021\\u1029])((?:\\u1039[\\u1000-\\u1021\\u1000-\\u1021])?)(\\u103c)", "$3$1$2") | |
outputText = outputText.replaceAll("\\u1004\\u103a\\u1039", "\u1064") | |
outputText = outputText.replaceAll("(\\u1064)((?:\\u1031)?)((?:\\u103c)?)([\\u1000-\\u1021])\\u102d", "$2$3$4\u108b") | |
outputText = outputText.replaceAll("(\\u1064)((?:\\u1031)?)((?:\\u103c)?)([\\u1000-\\u1021])\\u102e", "$2$3$4\u108c") | |
outputText = outputText.replaceAll("(\\u1064)((?:\\u1031)?)((?:\\u103c)?)([\\u1000-\\u1021])\\u1036", "$2$3$4\u108d") | |
outputText = outputText.replaceAll("(\\u1064)((?:\\u1031)?)((?:\\u103c)?)([\\u1000-\\u1021])", "$2$3$4\u1064") | |
outputText = outputText.replaceAll("\\u100a(?=[\\u1039\\u102f\\u1030])", "\u106b") | |
outputText = outputText.replaceAll("\\u100a", "\u100a") | |
outputText = outputText.replaceAll("\\u101b(?=[\\u102f\\u1030])", "\u1090") | |
outputText = outputText.replaceAll("\\u101b", "\u101b") | |
outputText = outputText.replaceAll("\\u1014(?=[\\u1039\\u103d\\u103e\\u102f\\u1030])", "\u108f") | |
outputText = outputText.replaceAll("\\u1014", "\u1014") | |
outputText = outputText.replaceAll("\\u1039\\u1000", "\u1060") | |
outputText = outputText.replaceAll("\\u1039\\u1001", "\u1061") | |
outputText = outputText.replaceAll("\\u1039\\u1002", "\u1062") | |
outputText = outputText.replaceAll("\\u1039\\u1003", "\u1063") | |
outputText = outputText.replaceAll("\\u1039\\u1005", "\u1065") | |
outputText = outputText.replaceAll("\\u1039\\u1006", "\u1066") | |
outputText = outputText.replaceAll("(?<=[\\u1001\\u1002\\u1004\\u1005\\u1007\\u1012\\u1013\\u108f\\u1015\\u1016\\u1017\\u1019\\u101d])\\u1066", "\u1067") | |
outputText = outputText.replaceAll("\\u1039\\u1007", "\u1068") | |
outputText = outputText.replaceAll("\\u1039\\u1008", "\u1069") | |
outputText = outputText.replaceAll("\\u1039\\u100f", "\u1070") | |
outputText = outputText.replaceAll("\\u1039\\u1010", "\u1071") | |
outputText = outputText.replaceAll("(?<=[\\u1001\\u1002\\u1004\\u1005\\u1007\\u1012\\u1013\\u108f\\u1015\\u1016\\u1017\\u1019\\u101d])\\u1071", "\u1072") | |
outputText = outputText.replaceAll("\\u1039\\u1011", "\u1073") | |
outputText = outputText.replaceAll("(?<=[\\u1001\\u1002\\u1004\\u1005\\u1007\\u1012\\u1013\\u108f\\u1015\\u1016\\u1017\\u1019\\u101d])\\u1073", "\u1074") | |
outputText = outputText.replaceAll("\\u1039\\u1012", "\u1075") | |
outputText = outputText.replaceAll("\\u1039\\u1013", "\u1076") | |
outputText = outputText.replaceAll("\\u1039\\u1014", "\u1077") | |
outputText = outputText.replaceAll("\\u1039\\u1015", "\u1078") | |
outputText = outputText.replaceAll("\\u1039\\u1016", "\u1079") | |
outputText = outputText.replaceAll("\\u1039\\u1017", "\u107a") | |
outputText = outputText.replaceAll("\\u1039\\u1018", "\u107b") | |
outputText = outputText.replaceAll("\\u1039\\u1019", "\u107c") | |
outputText = outputText.replaceAll("\\u1039\\u101c", "\u1085") | |
outputText = outputText.replaceAll("\\u100f\\u1039\\u100d", "\u1091") | |
outputText = outputText.replaceAll("\\u100b\\u1039\\u100c", "\u1092") | |
outputText = outputText.replaceAll("\\u1039\\u100c", "\u106d") | |
outputText = outputText.replaceAll("\\u100b\\u1039\\u100b", "\u1097") | |
outputText = outputText.replaceAll("\\u1039\\u100b", "\u106c") | |
outputText = outputText.replaceAll("\\u100e\\u1039\\u100d", "\u106f") | |
outputText = outputText.replaceAll("\\u100d\\u1039\\u100d", "\u106e") | |
outputText = outputText.replaceAll("\\u1009(?=\\u103a)", "\u1025") | |
outputText = outputText.replaceAll("\\u1025(?=[\\u1039\\u102f\\u1030])", "\u106a") | |
outputText = outputText.replaceAll("\\u1025", "\u1025") | |
outputText = outputText.replaceAll("\\u103a", "\u1039") | |
outputText = outputText.replaceAll("\\u103b\\u103d\\u103e", "\u107d\u108a") | |
outputText = outputText.replaceAll("\\u103d\\u103e", "\u108a") | |
outputText = outputText.replaceAll("\\u103b", "\u103a") | |
outputText = outputText.replaceAll("\\u103c", "\u103b") | |
outputText = outputText.replaceAll("\\u103d", "\u103c") | |
outputText = outputText.replaceAll("\\u103e", "\u103d") | |
outputText = outputText.replaceAll("\\u103a(?=[\\u103c\\u103d\\u108a])", "\u107d") | |
outputText = outputText.replaceAll("(?<=\\u100a(?:[\\u102d\\u102e\\u1036\\u108b\\u108c\\u108d\\u108e]))\\u103d", "\u1087") | |
outputText = outputText.replaceAll("(?<=\\u100a)\\u103d", "\u1087") | |
outputText = outputText.replaceAll("\\u103b(?=[\\u1000\\u1003\\u1006\\u100f\\u1010\\u1011\\u1018\\u101a\\u101c\\u101e\\u101f\\u1021])", "\u107e") | |
outputText = outputText.replaceAll("\\u107e([\\u1000-\\u1021\\u108f])(?=[\\u102d\\u102e\\u1036\\u108b\\u108c\\u108d\\u108e])", "\u1080$1") | |
outputText = outputText.replaceAll("\\u107e([\\u1000-\\u1021\\u108f])(?=[\\u103c\\u108a])", "\u1082$1") | |
outputText = outputText.replaceAll("\\u103b([\\u1000-\\u1021\\u108f])(?=[\\u102d\\u102e\\u1036\\u108b\\u108c\\u108d\\u108e])", "\u107f$1") | |
outputText = outputText.replaceAll("\\u103b([\\u1000-\\u1021\\u108f])(?=[\\u103c\\u108a])", "\u1081$1") | |
outputText = outputText.replaceAll("(?<=\\u1014)\\u1037", "\u1094") | |
outputText = outputText.replaceAll("(?<=\\u1014[\\u103a\\u1032])\\u1037", "\u1094") | |
outputText = outputText.replaceAll("(?<=\\u1033)\\u1094", "\u1095") | |
outputText = outputText.replaceAll("(?<=\\u1033[\\u1036])\\u1094", "\u1095") | |
outputText = outputText.replaceAll("(?<=\\u1034)\\u1094", "\u1095") | |
outputText = outputText.replaceAll("(?<=\\u1034[\\u1036])\\u1094", "\u1095") | |
outputText = outputText.replaceAll("(?<=[\\u103c\\u103d\\u108a])\\u1037", "\u1095") | |
outputText = outputText.replaceAll("(?<=[\\u103c\\u103d\\u108a][\\u1032])\\u1037", "\u1095") | |
outputText | |
} | |
def zg12uni51(input: String) = { | |
var outputText = input | |
outputText = outputText.replaceAll("\\u106a", "\u1009") | |
outputText = outputText.replaceAll("\\u1025(?=[\\u1039\\u102c])", "\u1009") | |
outputText = outputText.replaceAll("\\u1025\\u102e", "\u1026") | |
outputText = outputText.replaceAll("\\u106b", "\u100a") | |
outputText = outputText.replaceAll("\\u1090", "\u101b") | |
outputText = outputText.replaceAll("\\u1040", "\u1040") | |
outputText = outputText.replaceAll("\\u108f", "\u1014") | |
outputText = outputText.replaceAll("\\u1012", "\u1012") | |
outputText = outputText.replaceAll("\\u1013", "\u1013") | |
outputText = outputText.replaceAll("[\\u103d\\u1087]", "\u103e") | |
outputText = outputText.replaceAll("\\u103c", "\u103d") | |
outputText = outputText.replaceAll("[\\u103b\\u107e\\u107f\\u1080\\u1081\\u1082\\u1083\\u1084]", "\u103c") | |
outputText = outputText.replaceAll("[\\u103a\\u107d]", "\u103b") | |
outputText = outputText.replaceAll("\\u103d\\u103b", "\u103b\u103d") | |
outputText = outputText.replaceAll("\\u108a", "\u103d\u103d") | |
outputText = outputText.replaceAll("\\u103d\\u103d", "\u103d\u103d") | |
outputText = outputText.replaceAll("((?:\\u1031)?)((?:\\u103c)?)([\\u1000-\\u1021])\\u1064", "\u1064$1$2$3") | |
outputText = outputText.replaceAll("((?:\\u1031)?)((?:\\u103c)?)([\\u1000-\\u1021])\\u108b", "\u1064$1$2$3\u102d") | |
outputText = outputText.replaceAll("((?:\\u1031)?)((?:\\u103c)?)([\\u1000-\\u1021])\\u108c", "\u1064$1$2$3\u102e") | |
outputText = outputText.replaceAll("((?:\\u1031)?)((?:\\u103c)?)([\\u1000-\\u1021])\\u108d", "\u1064$1$2$3\u1036") | |
outputText = outputText.replaceAll("\\u105a", "\u102b\u103a") | |
outputText = outputText.replaceAll("\\u108e", "\u102d\u1036") | |
outputText = outputText.replaceAll("\\u1033", "\u102f") | |
outputText = outputText.replaceAll("\\u1034", "\u1030") | |
outputText = outputText.replaceAll("\\u1088", "\u103d\u102f") | |
outputText = outputText.replaceAll("\\u1089", "\u103d\u1030") | |
outputText = outputText.replaceAll("\\u1039", "\u103a") | |
outputText = outputText.replaceAll("[\\u1094\\u1095]", "\u1037") | |
outputText = outputText.replaceAll("([\\u1000-\\u1021])([\\u102c\\u102d\\u102e\\u1032\\u1036]){1,2}([\\u1060\\u1061\\u1062\\u1063\\u1065\\u1066\\u1067\\u1068\\u1069\\u1070\\u1071\\u1072\\u1073\\u1074\\u1075\\u1076\\u1077\\u1078\\u1079\\u107a\\u107b\\u107c\\u1085])", "$1$3$2") | |
outputText = outputText.replaceAll("\\u1064", "\u1004\u103a\u1039") | |
outputText = outputText.replaceAll("\\u104e", "\u104e\u1004\u103a\u1038") | |
outputText = outputText.replaceAll("\\u1086", "\u103f") | |
outputText = outputText.replaceAll("\\u1060", "\u1039\u1000") | |
outputText = outputText.replaceAll("\\u1061", "\u1039\u1001") | |
outputText = outputText.replaceAll("\\u1062", "\u1039\u1002") | |
outputText = outputText.replaceAll("\\u1063", "\u1039\u1003") | |
outputText = outputText.replaceAll("\\u1065", "\u1039\u1005") | |
outputText = outputText.replaceAll("[\\u1066\\u1067]", "\u1039\u1006") | |
outputText = outputText.replaceAll("\\u1068", "\u1039\u1007") | |
outputText = outputText.replaceAll("\\u1069", "\u1039\u1008") | |
outputText = outputText.replaceAll("\\u106c", "\u1039\u100b") | |
outputText = outputText.replaceAll("\\u1070", "\u1039\u100f") | |
outputText = outputText.replaceAll("[\\u1071\\u1072]", "\u1039\u1010") | |
outputText = outputText.replaceAll("[\\u1073\\u1074]", "\u1039\u1011") | |
outputText = outputText.replaceAll("\\u1075", "\u1039\u1012") | |
outputText = outputText.replaceAll("\\u1076", "\u1039\u1013") | |
outputText = outputText.replaceAll("\\u1077", "\u1039\u1014") | |
outputText = outputText.replaceAll("\\u1078", "\u1039\u1015") | |
outputText = outputText.replaceAll("\\u1079", "\u1039\u1016") | |
outputText = outputText.replaceAll("\\u107a", "\u1039\u1017") | |
outputText = outputText.replaceAll("\\u107b", "\u1039\u1018") | |
outputText = outputText.replaceAll("\\u107c", "\u1039\u1019") | |
outputText = outputText.replaceAll("\\u1085", "\u1039\u101c") | |
outputText = outputText.replaceAll("\\u106d", "\u1039\u100c") | |
outputText = outputText.replaceAll("\\u1091", "\u100f\u1039\u100d") | |
outputText = outputText.replaceAll("\\u1092", "\u100b\u1039\u100c") | |
outputText = outputText.replaceAll("\\u1097", "\u100b\u1039\u100b") | |
outputText = outputText.replaceAll("\\u106f", "\u100e\u1039\u100d") | |
outputText = outputText.replaceAll("\\u106e", "\u100d\u1039\u100d") | |
outputText = outputText.replaceAll("(\\u103c)([\\u1000-\\u1021])((?:\\u1039[\\u1000-\\u1021])?)", "$2$3$1") | |
outputText = outputText.replaceAll("(\\u103d)(\\u103d)([\\u103b\\u103c])", "$3$2$1") | |
outputText = outputText.replaceAll("(\\u103d)([\\u103b\\u103c])", "$2$1") | |
outputText = outputText.replaceAll("(\\u103d)([\\u103b\\u103c])", "$2$1") | |
outputText = outputText.replaceAll("(?<=([\\u1000-\\u101c\\u101e-\\u102a\\u102c\\u102e-\\u103d\\u104c-\\u109f]))(\\u1040)(?=\\s)?", "\u101d") | |
outputText = outputText.replaceAll("(?<=(\\u101d))(\\u1040)(?=\\s)?", "\u101d") | |
outputText = outputText.replaceAll("(?<=([\\u1000-\\u101c\\u101e-\\u102a\\u102c\\u102e-\\u103d\\u104c-\\u109f\\s]))(\\u1047)", "\u101b") | |
outputText = outputText.replaceAll("(\\u1047)(?=[\\u1000-\\u101c\\u101e-\\u102a\\u102c\\u102e-\\u103d\\u104c-\\u109f\\s])", "\u101b") | |
outputText = outputText.replaceAll("((?:\\u1031)?)([\\u1000-\\u1021])((?:\\u1039[\\u1000-\\u1021])?)((?:[\\u102d\\u102e\\u1032])?)([\\u1036\\u1037\\u1038]{0,2})([\\u103b-\\u103d]{0,3})((?:[\\u102f\\u1030])?)([\\u1036\\u1037\\u1038]{0,2})((?:[\\u102d\\u102e\\u1032])?)", "$2$3$6$1$4$9$7$5$8") | |
outputText = outputText.replaceAll("\\u1036\\u102f", "\u102f\u1036") | |
outputText = outputText.replaceAll("(\\u103a)(\\u1037)", "$2$1") | |
outputText | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment