Created
September 13, 2021 13:20
-
-
Save avrebarra/f4597440465c722df8e5dca2853f594b to your computer and use it in GitHub Desktop.
convert latin to pegon
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const vocal_list = ["i", "u", "e", "o"]; | |
const ignore_list = [" ", "-", "+", "-", "=", "(", ")"]; | |
const mapper = { | |
ng: "ع", | |
ny: "ۑ", | |
0: "٠", | |
1: "١", | |
2: "٢", | |
3: "٣", | |
4: "٤", | |
5: "٥", | |
6: "٦", | |
7: "٧", | |
8: "٨", | |
9: "٩", | |
a: "ا", | |
b: "ب", | |
c: "چ", | |
d: "د", | |
e: "ي", | |
f: "ف", | |
g: "ك", | |
h: "ه", | |
i: "ي", | |
j: "ج", | |
k: "ك", | |
l: "ل", | |
m: "م", | |
n: "ن", | |
o: "و", | |
p: "ف", | |
q: "ق", | |
r: "ر", | |
s: "س", | |
t: "ت", | |
u: "و", | |
v: "ف", | |
w: "و", | |
x: "كس", | |
y: "ي", | |
z: "ز", | |
}; | |
let input = | |
"aku adalah seorang kapten di tanah canda tawa erasmus/arasmus (iyairamoussa). tanah nyanyian orang-orang terpandang."; | |
let output = ""; | |
let cur = 0; | |
while (cur < input.length) { | |
const remainder = input.slice(cur, input.length); | |
// if ignore list then just treat as space | |
const ignoredch = ignore_list.find((x) => x == remainder[0]); | |
console.log(cur, remainder); | |
if (ignoredch) { | |
output = output + remainder[0]; | |
input[cur + 1] = " "; | |
cur += 1; | |
continue; | |
} | |
// if char is vocal and previous value is space or ignored char, add alif | |
if ( | |
vocal_list.includes(remainder[0]) && | |
(cur == 0 || ignore_list.includes(input.slice(cur - 1, input.length)[0])) | |
) { | |
output = output + "ا"; | |
} | |
// iterative search with regex | |
let hit = false; | |
for (const ch in mapper) { | |
const rx = new RegExp(`^${ch}`); | |
const res = remainder.search(rx) == 0; | |
if (res) { | |
output = output + mapper[ch]; | |
hit = true; | |
cur += ch.length; | |
break; | |
} | |
} | |
if (!hit) { | |
output = output + remainder[0]; | |
cur++; | |
} | |
// if duplicated character skip and just add siddah | |
if ( | |
remainder.length >= 2 && | |
!vocal_list.includes(remainder[0]) && | |
remainder[0] == remainder[1] | |
) { | |
cur++; | |
} | |
} | |
console.log(output); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment