swanhtet1992/ZgDetector.java

## ZgDetector.java
package android.widget;

import java.util.regex.Matcher;
import java.util.regex.Pattern;
import android.text.TextUtils;

/**
*
* @author SH
*/
public class ZgDetector {
    private static final Pattern ZAWGYI_DETECT_PATTERN = Pattern.compile(
        // A regular expression matched if text is Zawgyi encoding.
        // Using the ranges 1033-1034 or 1060-1097 will report Shan, Karen,
        // etc. as Zawgyi.
        "[\u105a\u1060-\u1097]|" // Zawgyi characters outside Unicode range
            + "[\u1033\u1034]|" // These are Mon characters
            + "\u1031\u108f|"
            + "\u1031[\u103b-\u103e]|" // Medial right after \u1031
            + "[\u102b-\u1030\u1032]\u1031|" // Vowel sign right after before \u1031
            + " \u1031| \u103b|" // Unexpected characters after a space
            + "^\u1031|^\u103b|\u1038\u103b|\u1038\u1031|"
            + "[\u102d\u102e\u1032]\u103b|\u1039[^\u1000-\u1021]|\u1039$"
            + "|\u1004\u1039[\u1001-\u102a\u103f\u104e]" // Missing ASAT in Kinzi
            + "|\u1039[^u1000-\u102a\u103f\u104e]" // 1039 not before a consonant
            // Out of order medials
            + "|\u103c\u103b|\u103d\u103b"
            + "|\u103e\u103b|\u103d\u103c"
            + "|\u103e\u103c|\u103e\u103d"
            // Bad medial combos
            + "|\u103b\u103c"
            // Out of order vowel signs
            + "|[\u102f\u1030\u102b\u102c][\u102d\u102e\u1032]"
            + "|[\u102b\u102c][\u102f\u102c]"
            // Digit before diacritic
            + "|[\u1040-\u1049][\u102b-\u103e\u102b-\u1030\u1032\u1036\u1037\u1038\u103a]"
            // Single digit 0, 7 at start
            + "|^[\u1040\u1047][^\u1040-\u1049]"
            // Second 1039 with bad followers
            + "|[\u1000-\u102a\u103f\u104e]\u1039[\u101a\u101b\u101d\u101f\u1022-\u103f]"
            // Other bad combos.
            + "|\u103a\u103e"
            + "|\u1036\u102b]"
            // multiple upper vowels
            + "|\u102d[\u102e\u1032]|\u102e[\u102d\u1032]|\u1032[\u102d\u102e]"
            // Multiple lower vowels
            + "|\u102f\u1030|\u1030\u102f"
            // Multiple A vowels
            + "|\u102b\u102c|\u102c\u102b"
            // Shan digits with vowels or medials or other signs
            + "|[\u1090-\u1099][\u102b-\u1030\u1032\u1037\u103a-\u103e]"
            // Isolated Shan digit
            + "|[\u1000-\u10f4][\u1090-\u1099][\u1000-\u104f]"
            + "|^[\u1090-\u1099][\u1000-\u102a\u103f\u104e\u104a\u104b]"
            + "|[\u1000-\u104f][\u1090-\u1099]$"
            // Diacritics with non-Burmese vowel signs
            + "|[\u105e-\u1060\u1062-\u1064\u1067-\u106d\u1071-\u1074\u1082-\u108d"
            + "\u108f\u109a-\u109d]"
            + "[\u102b-\u103e]"
            // Consonant 103a + some vowel signs
            + "|[\u1000-\u102a]\u103a[\u102d\u102e\u1032]"
            // 1031 after other vowel signs
            + "|[\u102b-\u1030\u1032\u1036-\u1038\u103a]\u1031"
            // Using Shan combining characters with other languages.
            + "|[\u1087-\u108d][\u106e-\u1070\u1072-\u1074]"
            // Non-Burmese diacritics at start, following space, or following sections
            + "|^[\u105e-\u1060\u1062-\u1064\u1067-\u106d\u1071-\u1074"
            + "\u1082-\u108d\u108f\u109a-\u109d]"
            + "|[\u0020\u104a\u104b][\u105e-\u1060\u1062-\u1064\u1067-\u106d"
            + "\u1071-\u1074\u1082-\u108d\u108f\u109a-\u109d]"
            // Wrong order with 1036
            + "|[\u1036\u103a][\u102d-\u1030\u1032]"
            // Odd stacking
            + "|[\u1025\u100a]\u1039"
            // More mixing of non-Burmese languages
            + "|[\u108e-\u108f][\u1050-\u108d]"
            // Bad diacritic combos.
            + "|\u102d-\u1030\u1032\u1036-\u1037]\u1039]"
            // Dot before subscripted consonant
            + "|[\u1000-\u102a\u103f\u104e]\u1037\u1039"
            // Odd subscript + vowel signs
            + "|[\u1000-\u102a\u103f\u104e]\u102c\u1039[\u1000-\u102a\u103f\u104e]"
            // Medials after vowels
            + "|[\u102b-\u1030\u1032][\u103b-\u103e]"
            // Medials
            + "|\u1032[\u103b-\u103e]"
            // Medial with 101b
            + "|\u101b\u103c"
            // Stacking too deeply: consonant 1039 consonant 1039 consonant
            + "|[\u1000-\u102a\u103f\u104e]\u1039[\u1000-\u102a\u103f\u104e]\u1039"
            + "[\u1000-\u102a\u103f\u104e]"
            // Stacking pattern consonant 1039 consonant 103a other vowel signs
            + "|[\u1000-\u102a\u103f\u104e]\u1039[\u1000-\u102a\u103f\u104e]"
            + "[\u102b\u1032\u103d]"
            // Odd stacking over u1021, u1019, and u1000
            + "|[\u1000\u1005\u100f\u1010\u1012\u1014\u1015\u1019\u101a]\u1039\u1021"
            + "|[\u1000\u1010]\u1039\u1019"
            + "|\u1004\u1039\u1000"
            + "|\u1015\u1039[\u101a\u101e]"
            + "|\u1000\u1039\u1001\u1036"
            + "|\u1039\u1011\u1032"
            // Vowel sign in wrong order
            + "|\u1037\u1032"
            + "|\u1036\u103b"
            // Duplicated vowel
            + "|\u102f\u102f"
        );

	public static CharSequence hint(CharSequence input){
		return zg2uni(input, true); // true means not append the original text
	}

	public static CharSequence text(CharSequence input) {
		return zg2uni(input, false); // false means will append the original text
    }

    public static CharSequence zg2uni(CharSequence input, boolean notAppend) {
    	if(input == null)
            return input;

        Matcher matcher = ZAWGYI_DETECT_PATTERN.matcher(input);

        if(matcher.find()){

        	String output = input.toString();

            output = output.replaceAll("\\u106a", "\u1009");
            output = output.replaceAll("\\u1025(?=[\\u1039\\u102c])", "\u1009");
            output = output.replaceAll("\\u1025\\u102e", "\u1026");
            output = output.replaceAll("\\u106b", "\u100a");
            output = output.replaceAll("\\u1090", "\u101b");
            output = output.replaceAll("\\u1040", "\u1040");
            output = output.replaceAll("\\u108f", "\u1014");
            output = output.replaceAll("\\u1012", "\u1012");
            output = output.replaceAll("\\u1013", "\u1013");
            output = output.replaceAll("[\\u103d\\u1087]", "\u103e");
            output = output.replaceAll("\\u103c", "\u103d");
            output = output.replaceAll("[\\u103b\\u107e\\u107f\\u1080\\u1081\\u1082\\u1083\\u1084]", "\u103c");
            output = output.replaceAll("[\\u103a\\u107d]", "\u103b");
            output = output.replaceAll("\\u103d\\u103b", "\u103b\u103d");
            output = output.replaceAll("\\u108a","\u103d\u103e");
            output = output.replaceAll("\\u103e\\u103d", "\u103d\u103e");
            output = output.replaceAll("((?:\\u1031)?)((?:\\u103c)?)([\\u1000-\\u1021])\\u1064", "\u1064$1$2$3");
            output = output.replaceAll("((?:\\u1031)?)((?:\\u103c)?)([\\u1000-\\u1021])\\u108b", "\u1064$1$2$3\u102d");
            output = output.replaceAll("((?:\\u1031)?)((?:\\u103c)?)([\\u1000-\\u1021])\\u108c", "\u1064$1$2$3\u102e");
            output = output.replaceAll("((?:\\u1031)?)((?:\\u103c)?)([\\u1000-\\u1021])\\u108d", "\u1064$1$2$3\u1036");
            output = output.replaceAll("\\u105a", "\u102b\u103a");
            output = output.replaceAll("\\u108e", "\u102d\u1036");
            output = output.replaceAll("\\u1033", "\u102f");
            output = output.replaceAll("\\u1034", "\u1030");
            output = output.replaceAll("\\u1088", "\u103e\u102f");
            output = output.replaceAll("\\u1089", "\u103e\u1030");
            output = output.replaceAll("\\u1039", "\u103a");
            output = output.replaceAll("[\\u1094\\u1095]", "\u1037");
            output = output.replaceAll("([\\u1000-\\u1021])([\\u102c\\u102d\\u102e\\u1032\\u1036]){1,2}([\\u1060\\u1061\\u1062\\u1063\\u1065\\u1066\\u1067\\u1068\\u1069\\u1070\\u1071\\u1072\\u1073\\u1074\\u1075\\u1076\\u1077\\u1078\\u1079\\u107a\\u107b\\u107c\\u1085])", "$1$3$2");
            output = output.replaceAll("\\u1064", "\u1004\u103a\u1039");
            output = output.replaceAll("\\u104e", "\u104e\u1004\u103a\u1038");
            output = output.replaceAll("\\u1086", "\u103f");
            output = output.replaceAll("\\u1060", "\u1039\u1000");
            output = output.replaceAll("\\u1061", "\u1039\u1001");
            output = output.replaceAll("\\u1062", "\u1039\u1002");
            output = output.replaceAll("\\u1063", "\u1039\u1003");
            output = output.replaceAll("\\u1065", "\u1039\u1005");
            output = output.replaceAll("[\\u1066\\u1067]", "\u1039\u1006");
            output = output.replaceAll("\\u1068", "\u1039\u1007");
            output = output.replaceAll("\\u1069", "\u1039\u1008");
            output = output.replaceAll("\\u106c", "\u1039\u100b");
            output = output.replaceAll("\\u1070", "\u1039\u100f");
            output = output.replaceAll("[\\u1071\\u1072]", "\u1039\u1010");
            output = output.replaceAll("[\\u1073\\u1074]", "\u1039\u1011");
            output = output.replaceAll("\\u1075", "\u1039\u1012");
            output = output.replaceAll("\\u1076", "\u1039\u1013");
            output = output.replaceAll("\\u1077", "\u1039\u1014");
            output = output.replaceAll("\\u1078", "\u1039\u1015");
            output = output.replaceAll("\\u1079", "\u1039\u1016");
            output = output.replaceAll("\\u107a", "\u1039\u1017");
            output = output.replaceAll("\\u107b", "\u1039\u1018");
            output = output.replaceAll("\\u107c", "\u1039\u1019");
            output = output.replaceAll("\\u1085", "\u1039\u101c");
            output = output.replaceAll("\\u106d", "\u1039\u100c");
            output = output.replaceAll("\\u1091", "\u100f\u1039\u100d");
            output = output.replaceAll("\\u1092", "\u100b\u1039\u100c");
            output = output.replaceAll("\\u1097", "\u100b\u1039\u100b");
            output = output.replaceAll("\\u106f", "\u100e\u1039\u100d");
            output = output.replaceAll("\\u106e", "\u100d\u1039\u100d");
            output = output.replaceAll("(\\u103c)([\\u1000-\\u1021])((?:\\u1039[\\u1000-\\u1021])?)", "$2$3$1");
            output = output.replaceAll("(\\u103d)(\\u103d)([\\u103b\\u103c])", "$3$2$1");
            output = output.replaceAll("(\\u103d)([\\u103b\\u103c])", "$2$1");
            output = output.replaceAll("(\\u103d)([\\u103b\\u103c])", "$2$1");
            output = output.replaceAll("(?<=([\\u1000-\\u101c\\u101e-\\u102a\\u102c\\u102e-\\u103d\\u104c-\\u109f\\s]))(\\u1047)", "\u101b");
            output = output.replaceAll("(\\u1047)(?=[\\u1000-\\u101c\\u101e-\\u102a\\u102c\\u102e-\\u103d\\u104c-\\u109f\\s])", "\u101b");
            output = output.replaceAll("((?:\\u1031)?)([\\u1000-\\u1021])((?:\\u1039[\\u1000-\\u1021])?)((?:[\\u102d\\u102e\\u1032])?)([\\u1036\\u1037\\u1038]{0,2})([\\u103b-\\u103e]{0,3})((?:[\\u102f\\u1030])?)([\\u1036\\u1037\\u1038]{0,2})((?:[\\u102d\\u102e\\u1032])?)", "$2$3$6$1$4$9$7$5$8");
            output = output.replaceAll("\\u1036\\u102f", "\u102f\u1036");
            output = output.replaceAll("(\\u103a)(\\u1037)", "$2$1");

        	if(notAppend)
        		return output;

        	CharSequence appendOutput = TextUtils.concat(output, "========", input);
        	return appendOutput;
    	}else{
    		return input;
    	}
    }
}
	package android.widget;

	import java.util.regex.Matcher;
	import java.util.regex.Pattern;
	import android.text.TextUtils;

	/**
	*
	* @author SH
	*/
	public class ZgDetector {
	private static final Pattern ZAWGYI_DETECT_PATTERN = Pattern.compile(
	// A regular expression matched if text is Zawgyi encoding.
	// Using the ranges 1033-1034 or 1060-1097 will report Shan, Karen,
	// etc. as Zawgyi.
	"[\u105a\u1060-\u1097]\|" // Zawgyi characters outside Unicode range
	+ "[\u1033\u1034]\|" // These are Mon characters
	+ "\u1031\u108f\|"
	+ "\u1031[\u103b-\u103e]\|" // Medial right after \u1031
	+ "[\u102b-\u1030\u1032]\u1031\|" // Vowel sign right after before \u1031
	+ " \u1031\| \u103b\|" // Unexpected characters after a space
	+ "^\u1031\|^\u103b\|\u1038\u103b\|\u1038\u1031\|"
	+ "[\u102d\u102e\u1032]\u103b\|\u1039[^\u1000-\u1021]\|\u1039$"
	+ "\|\u1004\u1039[\u1001-\u102a\u103f\u104e]" // Missing ASAT in Kinzi
	+ "\|\u1039[^u1000-\u102a\u103f\u104e]" // 1039 not before a consonant
	// Out of order medials
	+ "\|\u103c\u103b\|\u103d\u103b"
	+ "\|\u103e\u103b\|\u103d\u103c"
	+ "\|\u103e\u103c\|\u103e\u103d"
	// Bad medial combos
	+ "\|\u103b\u103c"
	// Out of order vowel signs
	+ "\|[\u102f\u1030\u102b\u102c][\u102d\u102e\u1032]"
	+ "\|[\u102b\u102c][\u102f\u102c]"
	// Digit before diacritic
	+ "\|[\u1040-\u1049][\u102b-\u103e\u102b-\u1030\u1032\u1036\u1037\u1038\u103a]"
	// Single digit 0, 7 at start
	+ "\|^[\u1040\u1047][^\u1040-\u1049]"
	// Second 1039 with bad followers
	+ "\|[\u1000-\u102a\u103f\u104e]\u1039[\u101a\u101b\u101d\u101f\u1022-\u103f]"
	// Other bad combos.
	+ "\|\u103a\u103e"
	+ "\|\u1036\u102b]"
	// multiple upper vowels
	+ "\|\u102d[\u102e\u1032]\|\u102e[\u102d\u1032]\|\u1032[\u102d\u102e]"
	// Multiple lower vowels
	+ "\|\u102f\u1030\|\u1030\u102f"
	// Multiple A vowels
	+ "\|\u102b\u102c\|\u102c\u102b"
	// Shan digits with vowels or medials or other signs
	+ "\|[\u1090-\u1099][\u102b-\u1030\u1032\u1037\u103a-\u103e]"
	// Isolated Shan digit
	+ "\|[\u1000-\u10f4][\u1090-\u1099][\u1000-\u104f]"
	+ "\|^[\u1090-\u1099][\u1000-\u102a\u103f\u104e\u104a\u104b]"
	+ "\|[\u1000-\u104f][\u1090-\u1099]$"
	// Diacritics with non-Burmese vowel signs
	+ "\|[\u105e-\u1060\u1062-\u1064\u1067-\u106d\u1071-\u1074\u1082-\u108d"
	+ "\u108f\u109a-\u109d]"
	+ "[\u102b-\u103e]"
	// Consonant 103a + some vowel signs
	+ "\|[\u1000-\u102a]\u103a[\u102d\u102e\u1032]"
	// 1031 after other vowel signs
	+ "\|[\u102b-\u1030\u1032\u1036-\u1038\u103a]\u1031"
	// Using Shan combining characters with other languages.
	+ "\|[\u1087-\u108d][\u106e-\u1070\u1072-\u1074]"
	// Non-Burmese diacritics at start, following space, or following sections
	+ "\|^[\u105e-\u1060\u1062-\u1064\u1067-\u106d\u1071-\u1074"
	+ "\u1082-\u108d\u108f\u109a-\u109d]"
	+ "\|[\u0020\u104a\u104b][\u105e-\u1060\u1062-\u1064\u1067-\u106d"
	+ "\u1071-\u1074\u1082-\u108d\u108f\u109a-\u109d]"
	// Wrong order with 1036
	+ "\|[\u1036\u103a][\u102d-\u1030\u1032]"
	// Odd stacking
	+ "\|[\u1025\u100a]\u1039"
	// More mixing of non-Burmese languages
	+ "\|[\u108e-\u108f][\u1050-\u108d]"
	// Bad diacritic combos.
	+ "\|\u102d-\u1030\u1032\u1036-\u1037]\u1039]"
	// Dot before subscripted consonant
	+ "\|[\u1000-\u102a\u103f\u104e]\u1037\u1039"
	// Odd subscript + vowel signs
	+ "\|[\u1000-\u102a\u103f\u104e]\u102c\u1039[\u1000-\u102a\u103f\u104e]"
	// Medials after vowels
	+ "\|[\u102b-\u1030\u1032][\u103b-\u103e]"
	// Medials
	+ "\|\u1032[\u103b-\u103e]"
	// Medial with 101b
	+ "\|\u101b\u103c"
	// Stacking too deeply: consonant 1039 consonant 1039 consonant
	+ "\|[\u1000-\u102a\u103f\u104e]\u1039[\u1000-\u102a\u103f\u104e]\u1039"
	+ "[\u1000-\u102a\u103f\u104e]"
	// Stacking pattern consonant 1039 consonant 103a other vowel signs
	+ "\|[\u1000-\u102a\u103f\u104e]\u1039[\u1000-\u102a\u103f\u104e]"
	+ "[\u102b\u1032\u103d]"
	// Odd stacking over u1021, u1019, and u1000
	+ "\|[\u1000\u1005\u100f\u1010\u1012\u1014\u1015\u1019\u101a]\u1039\u1021"
	+ "\|[\u1000\u1010]\u1039\u1019"
	+ "\|\u1004\u1039\u1000"
	+ "\|\u1015\u1039[\u101a\u101e]"
	+ "\|\u1000\u1039\u1001\u1036"
	+ "\|\u1039\u1011\u1032"
	// Vowel sign in wrong order
	+ "\|\u1037\u1032"
	+ "\|\u1036\u103b"
	// Duplicated vowel
	+ "\|\u102f\u102f"
	);

	public static CharSequence hint(CharSequence input){
	return zg2uni(input, true); // true means not append the original text
	}

	public static CharSequence text(CharSequence input) {
	return zg2uni(input, false); // false means will append the original text
	}

	public static CharSequence zg2uni(CharSequence input, boolean notAppend) {
	if(input == null)
	return input;

	Matcher matcher = ZAWGYI_DETECT_PATTERN.matcher(input);

	if(matcher.find()){

	String output = input.toString();

	output = output.replaceAll("\\u106a", "\u1009");
	output = output.replaceAll("\\u1025(?=[\\u1039\\u102c])", "\u1009");
	output = output.replaceAll("\\u1025\\u102e", "\u1026");
	output = output.replaceAll("\\u106b", "\u100a");
	output = output.replaceAll("\\u1090", "\u101b");
	output = output.replaceAll("\\u1040", "\u1040");
	output = output.replaceAll("\\u108f", "\u1014");
	output = output.replaceAll("\\u1012", "\u1012");
	output = output.replaceAll("\\u1013", "\u1013");
	output = output.replaceAll("[\\u103d\\u1087]", "\u103e");
	output = output.replaceAll("\\u103c", "\u103d");
	output = output.replaceAll("[\\u103b\\u107e\\u107f\\u1080\\u1081\\u1082\\u1083\\u1084]", "\u103c");
	output = output.replaceAll("[\\u103a\\u107d]", "\u103b");
	output = output.replaceAll("\\u103d\\u103b", "\u103b\u103d");
	output = output.replaceAll("\\u108a","\u103d\u103e");
	output = output.replaceAll("\\u103e\\u103d", "\u103d\u103e");
	output = output.replaceAll("((?:\\u1031)?)((?:\\u103c)?)([\\u1000-\\u1021])\\u1064", "\u1064$1$2$3");
	output = output.replaceAll("((?:\\u1031)?)((?:\\u103c)?)([\\u1000-\\u1021])\\u108b", "\u1064$1$2$3\u102d");
	output = output.replaceAll("((?:\\u1031)?)((?:\\u103c)?)([\\u1000-\\u1021])\\u108c", "\u1064$1$2$3\u102e");
	output = output.replaceAll("((?:\\u1031)?)((?:\\u103c)?)([\\u1000-\\u1021])\\u108d", "\u1064$1$2$3\u1036");
	output = output.replaceAll("\\u105a", "\u102b\u103a");
	output = output.replaceAll("\\u108e", "\u102d\u1036");
	output = output.replaceAll("\\u1033", "\u102f");
	output = output.replaceAll("\\u1034", "\u1030");
	output = output.replaceAll("\\u1088", "\u103e\u102f");
	output = output.replaceAll("\\u1089", "\u103e\u1030");
	output = output.replaceAll("\\u1039", "\u103a");
	output = output.replaceAll("[\\u1094\\u1095]", "\u1037");
	output = output.replaceAll("([\\u1000-\\u1021])([\\u102c\\u102d\\u102e\\u1032\\u1036]){1,2}([\\u1060\\u1061\\u1062\\u1063\\u1065\\u1066\\u1067\\u1068\\u1069\\u1070\\u1071\\u1072\\u1073\\u1074\\u1075\\u1076\\u1077\\u1078\\u1079\\u107a\\u107b\\u107c\\u1085])", "$1$3$2");
	output = output.replaceAll("\\u1064", "\u1004\u103a\u1039");
	output = output.replaceAll("\\u104e", "\u104e\u1004\u103a\u1038");
	output = output.replaceAll("\\u1086", "\u103f");
	output = output.replaceAll("\\u1060", "\u1039\u1000");
	output = output.replaceAll("\\u1061", "\u1039\u1001");
	output = output.replaceAll("\\u1062", "\u1039\u1002");
	output = output.replaceAll("\\u1063", "\u1039\u1003");
	output = output.replaceAll("\\u1065", "\u1039\u1005");
	output = output.replaceAll("[\\u1066\\u1067]", "\u1039\u1006");
	output = output.replaceAll("\\u1068", "\u1039\u1007");
	output = output.replaceAll("\\u1069", "\u1039\u1008");
	output = output.replaceAll("\\u106c", "\u1039\u100b");
	output = output.replaceAll("\\u1070", "\u1039\u100f");
	output = output.replaceAll("[\\u1071\\u1072]", "\u1039\u1010");
	output = output.replaceAll("[\\u1073\\u1074]", "\u1039\u1011");
	output = output.replaceAll("\\u1075", "\u1039\u1012");
	output = output.replaceAll("\\u1076", "\u1039\u1013");
	output = output.replaceAll("\\u1077", "\u1039\u1014");
	output = output.replaceAll("\\u1078", "\u1039\u1015");
	output = output.replaceAll("\\u1079", "\u1039\u1016");
	output = output.replaceAll("\\u107a", "\u1039\u1017");
	output = output.replaceAll("\\u107b", "\u1039\u1018");
	output = output.replaceAll("\\u107c", "\u1039\u1019");
	output = output.replaceAll("\\u1085", "\u1039\u101c");
	output = output.replaceAll("\\u106d", "\u1039\u100c");
	output = output.replaceAll("\\u1091", "\u100f\u1039\u100d");
	output = output.replaceAll("\\u1092", "\u100b\u1039\u100c");
	output = output.replaceAll("\\u1097", "\u100b\u1039\u100b");
	output = output.replaceAll("\\u106f", "\u100e\u1039\u100d");
	output = output.replaceAll("\\u106e", "\u100d\u1039\u100d");
	output = output.replaceAll("(\\u103c)([\\u1000-\\u1021])((?:\\u1039[\\u1000-\\u1021])?)", "$2$3$1");
	output = output.replaceAll("(\\u103d)(\\u103d)([\\u103b\\u103c])", "$3$2$1");
	output = output.replaceAll("(\\u103d)([\\u103b\\u103c])", "$2$1");
	output = output.replaceAll("(\\u103d)([\\u103b\\u103c])", "$2$1");
	output = output.replaceAll("(?<=([\\u1000-\\u101c\\u101e-\\u102a\\u102c\\u102e-\\u103d\\u104c-\\u109f\\s]))(\\u1047)", "\u101b");
	output = output.replaceAll("(\\u1047)(?=[\\u1000-\\u101c\\u101e-\\u102a\\u102c\\u102e-\\u103d\\u104c-\\u109f\\s])", "\u101b");
	output = output.replaceAll("((?:\\u1031)?)([\\u1000-\\u1021])((?:\\u1039[\\u1000-\\u1021])?)((?:[\\u102d\\u102e\\u1032])?)([\\u1036\\u1037\\u1038]{0,2})([\\u103b-\\u103e]{0,3})((?:[\\u102f\\u1030])?)([\\u1036\\u1037\\u1038]{0,2})((?:[\\u102d\\u102e\\u1032])?)", "$2$3$6$1$4$9$7$5$8");
	output = output.replaceAll("\\u1036\\u102f", "\u102f\u1036");
	output = output.replaceAll("(\\u103a)(\\u1037)", "$2$1");

	if(notAppend)
	return output;

	CharSequence appendOutput = TextUtils.concat(output, "========", input);
	return appendOutput;
	}else{
	return input;
	}
	}
	}