Skip to content

Instantly share code, notes, and snippets.

@willnode
Last active July 20, 2021 07:22
Show Gist options
  • Save willnode/f25f4c4fbd1c36da7ee7f9c496a901c7 to your computer and use it in GitHub Desktop.
Save willnode/f25f4c4fbd1c36da7ee7f9c496a901c7 to your computer and use it in GitHub Desktop.
// TEXSupRTLSupport is a helper script with the goal to intellegently filter strings
// before being processed to existing arabic plugins in Unity Game Engine.
// It's originally developed for my TEXDraw plugin and released here without any changes.
// The script allows you to mix arabic - english (bidirectional) text without any problem,
// along with LaTeX syntaxes and commands that implemented in TEXDraw.
// However it's not guaranteed to work 100% in regular Text Component,
// because the code doesn't handle HTML tags, and Text Component it self
// doesn't handle reversed wrapping (See PersianFixer git for the solution).
// Code implementation starts from ReplaceString ()
// (C) 2017 Wildan Mubarok released under MIT License
using UnityEngine;
using System.Collections.Generic;
using System.Text;
using System;
namespace TexDrawLib
{
[AddComponentMenu ("TEXDraw/Supplemets/TEXSup RTL Support")]
#if TEXDRAW_ARABICSUPPORT || TEXDRAW_PERSIANFIXER
[TEXSupHelpTip ("Reverse the character to Support RTL language with given algorithm")]
#else
[TEXSupHelpTip("Reverse the character to Support RTL language with given algorithm.\nEnable Arabic/Parsi integration (in Tools/TEXDraw) for more algorithm options")]
#endif
public class TEXSupRTLSupport : TEXDrawSupplementBase
{
public enum RTLAlgorithm
{
CharacterReversal = 0,
#if TEXDRAW_ARABICSUPPORT
ArabicSupportByAbdullahKonash = 1,
#endif
#if TEXDRAW_PERSIANFIXER
PersianFixerByHamidMoghaddam = 2,
#endif
}
public enum DetectionOption
{
Depends = 0,
AsLTR = 1,
AsRTL = 2,
}
#if TEXDRAW_ARABICSUPPORT
public RTLAlgorithm m_Algorithm = RTLAlgorithm.ArabicSupportByAbdullahKonash;
#elif TEXDRAW_PERSIANFIXER
public RTLAlgorithm m_Algorithm = RTLAlgorithm.PersianFixerByHamidMoghaddam;
#else
public RTLAlgorithm m_Algorithm = RTLAlgorithm.CharacterReversal;
#endif
[Tooltip("Bidirectional detection allows the supplement to filter which part of text is necessary to be converted by individual RTL algorithm.\nIt also contains code that specific to TEXDraw behaviour")]
public bool m_Bidirectional = true;
[Header ("Bidirectional Detection Config")]
[Tooltip("Supplement always determine the common order by first recognizable character. Override this only if something wrong or you want more consistent control")]
public DetectionOption m_AssumeFirstChar = DetectionOption.Depends;
[Tooltip("Assume any non-latin chars as ...")]
public DetectionOption m_DetectNonLatin = DetectionOption.AsRTL;
[Tooltip("Assume whitespace/non-printable chars as ...")]
public DetectionOption m_DetectWhiteSpace = DetectionOption.Depends;
[Tooltip("Assume number as ...")]
public DetectionOption m_DetectNumber = DetectionOption.AsLTR;
[Tooltip("Assume punctuation as ...")]
public DetectionOption m_DetectPunctuation = DetectionOption.Depends;
[NonSerialized]
StringBuilder m_builder = new StringBuilder ();
//[NonSerialized]
//StringBuilder m_build = new StringBuilder ();
public override string ReplaceString (string original)
{
// Simple checks
if (string.IsNullOrEmpty (original))
return original;
if (!m_Bidirectional)
return Reverse (original);
return Parse (original);
}
// This function can be called recursively
string Parse (string original)
{
if (string.IsNullOrEmpty(original))
return original;
original = original.Replace("\r", "");
var b = new StringBuilder ();
int l = 0, e = 0, i = 0;
bool onRTL = IsFirstRecognizableCharIsRTL (original, 0), commonlyRTL = onRTL;
while (l < original.Length) {
var c = original [l];
var p = l == 0 ? '\0' : original [l - 1];
var n = l == original.Length - 1 ? '\0' : original [l + 1];
bool ignored = IsIgnoredChar (c);
// If character is a new line
if (c == '\n') {
if (e != l) {
// This means something is left behind. Lets clear-em-up
Insert (b, original.Substring (e, l - e), onRTL, commonlyRTL, ref i);
e = l;
}
b.Append('\n');
e = ++l;
i = b.Length;
if (l < original.Length)
onRTL = commonlyRTL = IsFirstRecognizableCharIsRTL (original, l);
}
// If character is an opening brace
else if(c == '{' && p != '\\' && n != '}' && n != '\0') {
if (e != l) {
Insert (b, original.Substring (e, l - e), onRTL, commonlyRTL, ref i);
e = l;
}
var substring = TexFormulaParser.ReadGroup (original, ref l, '{', '}');
e = l;
var parsed = "{" + Parse (substring) + "}";
Insert (b, parsed, false, commonlyRTL, ref i);
}
// If character is a backslash (signals a command) (absolutely specific to TEXDraw behaviour)
else if (c == '\\' && p != '\\' && char.IsLetter (n))
{
if (e != l) {
Insert (b, original.Substring (e, l - e), onRTL, commonlyRTL, ref i);
e = l;
}
l++;
var command = TexFormulaParser.LookForAWord (original, ref l);
var param1 = string.Empty;
var param2 = string.Empty;
if (l >= original.Length) {
break;
} else if (command.Contains ("frac")) {
param1 = "{" + Parse(TexFormulaParser.ReadGroup (original, ref l, '{', '}')) + "}";
param2 = "{" + Parse(TexFormulaParser.ReadGroup (original, ref l, '{', '}')) + "}";
} else if (command == "rtl" || command == "ltr") {
if (e != (l - 4)) {
Insert (b, original.Substring (e, (l - 4) - e), onRTL, commonlyRTL, ref i);
}
SkipWhiteSpace(original, ref l);
e = l;
onRTL = commonlyRTL = command == "rtl";
continue;
} else if (TexFormulaParser.isCommandRegistered (command) || TEXPreference.main.GetFontIndexByID (command) >= 0) {
if (original [l] == '[')
{
if (command == "root")
param1 = "[" + Parse(TexFormulaParser.ReadGroup (original, ref l, '[', ']')) + "]";
else
param1 = "[" + (TexFormulaParser.ReadGroup (original, ref l, '[', ']')) + "]";
}
if (l < original.Length && original [l] != '{' || !command.Contains ("hold"))
param2 = "{" + Parse(TexFormulaParser.ReadGroup (original, ref l, '{', '}')) + "}";
}
var parsed = "\\" + command + param1 + param2;
Insert (b, parsed, false, commonlyRTL, ref i);
e = l;
}
// If character is a script sign (also specific to TEXDraw behaviour)
else if (c == '_' || c == '^' && p != '\\')
{
if (e != l) {
Insert (b, original.Substring (e, l - e), onRTL, false, ref i);
e = l;
}
var iBackup = i;
while (c == '_' || c == '^') {
l++;
var parsed = Parse(TexFormulaParser.ReadScriptGroup(original, ref l));
Insert(b, new string(c, 1) + parsed, onRTL, false, ref i);
c = l == original.Length ? '\0' : original[l];
}
e = l;
if (commonlyRTL)
i = iBackup;
}
// If character/space is different than current RTL mode
else if ((IsRTLChar (c) != onRTL && !ignored) ||
(ignored && l < original.Length - 1 && (onRTL ^ commonlyRTL)
&& ((IsRTLChar (c = original [l + 1])) != onRTL))) {
Insert (b, original.Substring (e, l - e), onRTL, commonlyRTL, ref i);
onRTL = IsRTLChar (c);
e = l++;
} else
l++;
}
Insert (b, original.Substring (e), onRTL, commonlyRTL, ref i);
var result = b.ToString ();
b.Length = 0;
return result;
}
void Insert (StringBuilder b, string substring, bool onRTL, bool commonlyRTL, ref int i)
{
var l = b.Length;
b.Insert (i, onRTL ? Reverse (substring) : substring);
if (!commonlyRTL)
i += b.Length - l; // Must do this delta calc because sometimes like two char alif lam can be condensed to one char
}
static public void SkipWhiteSpace (string value, ref int position)
{
while (position < value.Length && (value [position] == ' ')) {
position++;
}
}
bool IsFirstRecognizableCharIsRTL (string s, int i)
{
if (m_AssumeFirstChar != DetectionOption.Depends)
return m_AssumeFirstChar == DetectionOption.AsRTL;
while (i < s.Length) {
var c = s[i];
i++;
if (c == '\\') {
var f = TexFormulaParser.LookForAWord(s, ref i);
if (f.Length > 1 && i < s.Length && s[i] == '[')
TexFormulaParser.ReadGroup(s, ref i, '[', ']');
continue;
}
if (InternalChars.Contains(c) || IsIgnoredChar(c))
continue;
return IsRTLChar(c);
}
return false;
}
static readonly HashSet<char> InternalChars = new HashSet<char> (new char[] { '{', '}', '\\', '_', '^', '[', ']' });
protected bool IsRTLChar (char c)
{
return (m_DetectNonLatin == DetectionOption.AsRTL && c > '\xFF') ||
(m_DetectWhiteSpace == DetectionOption.AsRTL && char.IsWhiteSpace (c)) ||
(m_DetectPunctuation == DetectionOption.AsRTL && char.IsPunctuation (c) && !InternalChars.Contains (c)) ||
(m_DetectNumber == DetectionOption.AsRTL && char.IsDigit (c));
}
protected bool IsIgnoredChar (char c)
{
return (m_DetectNonLatin == DetectionOption.Depends && c > '\xFF') ||
(m_DetectWhiteSpace == DetectionOption.Depends && char.IsWhiteSpace (c)) ||
(m_DetectPunctuation == DetectionOption.Depends && char.IsPunctuation (c) && !InternalChars.Contains (c)) ||
(m_DetectNumber == DetectionOption.Depends && char.IsDigit (c));
}
protected string Reverse (string original)
{
switch (m_Algorithm) {
case RTLAlgorithm.CharacterReversal:
m_builder.Length = 0;
int l = 0, e = 0;
while (l < original.Length) {
e = l;
l = original.IndexOf ('\n', e);
if (l == -1)
l = original.Length;
for (int i = l; i-- > e;) {
m_builder.Append (original [i], 1);
}
l++;
}
return m_builder.ToString ();
#if TEXDRAW_ARABICSUPPORT
case RTLAlgorithm.ArabicSupportByAbdullahKonash:
// Arabic Support by Abdullah Konash
// https://github.com/Konash/arabic-support-unity/
return ArabicSupport.ArabicFixer.Fix (original, true, true);
#endif
#if TEXDRAW_PERSIANFIXER
case RTLAlgorithm.PersianFixerByHamidMoghaddam:
// Persian Fixer by Hamid Moghaddam
// https://github.com/HamidMoghaddam/unitypersiansupport/
return UnityPersianSupport.PersianFixer.FixText (original);
#endif
default:
return original;
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment