Last active
October 4, 2024 13:44
-
-
Save jjkavalam/460c5398739a4bcf3d2c4159df8edca6 to your computer and use it in GitHub Desktop.
Convert Thiruvachanam font to Unicode
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const Virama = "\u0d4d"; | |
const DirectMapping = { | |
0x41: 'അ', | |
0x42: 'ആ', | |
0x43: 'ഇ', | |
0x44: 'ഉ', | |
0x45: 'ഋ', | |
0x46: 'എ', | |
0x47: 'ഏ', | |
0x48: 'ഒ', | |
0x49: 'ക', | |
0x4A: 'ഖ', | |
0x4B: 'ഗ', | |
0x4C: 'ഘ', | |
0x4D: 'ങ', | |
0x4E: 'ച', | |
0x4F: 'ഛ', | |
0x50: 'ജ', | |
0x51: 'ഝ', | |
0x52: 'ഞ', | |
0x53: 'ട', | |
0x54: 'ഠ', | |
0x55: 'ഡ', | |
0x56: 'ഢ', | |
0x57: 'ണ', | |
0x58: 'ത', | |
0x59: 'ഥ', | |
0x5A: 'ദ', | |
0x5B: 'ധ', | |
0x5C: 'ന', | |
0x5D: 'പ', | |
0x5E: 'ഫ', | |
0x5F: 'ബ', | |
0x60: 'ഭ', | |
0x61: 'മ', | |
0x62: 'യ', | |
0x63: 'ര', | |
0x64: 'റ', | |
0x65: 'ല', | |
0x66: 'ള', | |
0x67: 'ഴ', | |
0x68: 'വ', | |
0x69: 'ശ', | |
0x6A: 'ഷ', | |
0x6B: 'സ', | |
0x6C: 'ഹ', | |
0x6D: 'ാ', | |
0x6E: 'ി', | |
0x6F: 'ീ', | |
0x70: 'ു', | |
0x71: 'ൂ', | |
0x72: 'ൃ', | |
0x75: 'ൗ', | |
0x76: '്', | |
0x77: 'ം', | |
0x78: 'ഃ', | |
0xA1: 'ക്ക', | |
0xA2: 'ക്ല', | |
0xA3: 'ക്ഷ', | |
0xA4: 'ഗ്ഗ', | |
0xA5: 'ഗ്ല', | |
0xA6: 'ങ്ക', | |
0xA7: 'ങ്ങ', | |
0xA8: 'ച്ച', | |
0xA9: 'ഞ്ച', | |
0xAA: 'ഞ്ഞ', | |
0xAB: 'ട്ട', | |
0xAC: 'ൺ', | |
0xAD: 'ണ്ട', | |
0xAE: 'ണ്ണ', | |
0xAF: 'ത്ത', | |
0xB0: 'തഥ', | |
0xB1: 'ദ്ദ', | |
0xB2: 'ദ്ധ', | |
0xB3: 'ൻ', | |
0xB4: 'ന്ത', | |
0xB5: 'ന്ദ', | |
0xB6: 'ന്ന', | |
0xB7: 'ന്മ', | |
0xB8: 'പ്പ', | |
0xB9: 'പ്ല', | |
0xBA: 'ബ്ബ', | |
0xBB: 'ബ്ല', | |
0xBC: 'മ്പ', | |
0xBD: 'മ്മ', | |
0xBE: 'മ്ല', | |
0xBF: 'യ്യ', | |
0xC0: 'ർ', | |
0xC1: 'റ്റ', | |
0xC2: 'ൽ', | |
0xC3: 'ല്ല', | |
0xC4: 'ൾ', | |
0xC5: 'ള്ള', | |
0xC6: 'വ്വ', | |
0xC7: 'ശ്ല', | |
0xC8: 'ശ്ശ', | |
0xC9: 'സ്ല', | |
0xCA: 'സ്സ', | |
0xCB: 'ഹ്ല', | |
0xCC: 'സ്റ്റ', | |
0xCD: 'ഡ്ഡ', | |
0xCE: 'ക്ട', | |
0xCF: 'ബ്ധ', | |
0xD0: 'ബ്ദ', | |
0xD1: 'ച്ഛ', | |
0xD2: 'പ്മ', | |
0xD3: 'ഹന', | |
0xD4: 'ന്ധ', | |
0xD5: 'ത്സ', | |
0xD6: 'ജ്ജ', | |
0xD7: 'ണ്മ', | |
0xD8: 'സ്ഥ', | |
0xD9: 'ന്ഥ', | |
0xDA: 'ജ്ഞ', | |
0xDB: 'ത്ഭ', | |
0xDC: 'ഗ്മ', | |
0xDD: 'ശ്ച', | |
0xDE: 'ണ്ഡ', | |
0xDF: 'ത്മ', | |
0xE0: 'ക്ത', | |
0xE1: 'ഗ്ന', | |
0xE2: 'ന്റ', | |
0xE3: 'ഷ്ട', | |
0xE4: 'റ്റ', | |
0xE5: 'ല്പ', | |
0xE6: 'കു', | |
0xE7: 'ക്കു', | |
0xE8: 'ങ്കു', | |
0xE9: 'ണു', | |
0xEA: 'രു', | |
0xEB: 'നു', | |
0xEC: 'ന്നു', | |
0xED: 'യ്ക്കു', | |
0xEE: 'ൻ', | |
0xEF: 'ണ്ട', | |
0x79: Virama + "\u0d2f", // vya | |
0x7A: Virama + "\u0d35", // vva | |
0x7C: Virama + "\u0d2f" + Virama + "\u0d02", // vyam | |
}; | |
function map(c) { | |
if (DirectMapping[c]) { | |
return DirectMapping[c]; | |
} | |
return String.fromCharCode(c); | |
} | |
/** | |
* Parses input and converts it to unicode | |
* | |
* @param {string} input - raw input encoded using Thiruvachanam font | |
* @param {string} nl - (optional) string sequence used to represent a newline in the input | |
* @returns {string} | |
*/ | |
function convertThiruvachanamToUnicode(input, nl = null) { | |
const N = input.length; | |
let result = ""; | |
for (let i = 0; i < N; i++) { | |
if (nl && input.substring(i, i + nl.length) === nl) { | |
result += "\n"; | |
// newline detected; skip ahead | |
i += nl.length - 1; | |
continue | |
} | |
let c1 = input.charCodeAt(i); | |
const c2 = input.charCodeAt(i + 1); | |
const c3 = input.charCodeAt(i + 2); | |
const c4 = input.charCodeAt(i + 3); | |
// special cases: two characters combine into one | |
if (c1 === 0x43 && c2 === 0x75) { | |
result += "ഈ"; | |
i++; | |
continue; | |
} | |
else if (c1 === 0x44 && c2 === 0x75) { | |
result += "ഊ"; | |
i++; | |
continue; | |
} | |
else if (c1 === 0x73 && c2 === 0x46) { | |
result += "ഐ"; | |
i++; | |
continue; | |
} | |
else if (c1 === 0x48 && c2 === 0x6D) { | |
result += "ഓ"; | |
i++; | |
continue; | |
} | |
else if (c1 === 0x48 && c2 === 0x75) { | |
result += "ഔ"; | |
i++; | |
continue; | |
} | |
// 4 character special combos where 0x7B (Virama + "\u0d30") is involved | |
else if (c1 === 0x73 && c2 === 0x7B && c4 === 0x6D) { | |
// e.g. വ്രൊ | |
result += map(c3) + Virama + "\u0d30" + "\u0d4A"; | |
i += 3; | |
continue; | |
} | |
else if (c1 === 0x74 && c2 === 0x7B && c4 === 0x6D) { | |
// e.g. വ്രോ | |
result += map(c3) + Virama + "\u0d30" + "\u0d4B"; | |
i += 3; | |
continue; | |
} | |
else if (c1 === 0x73 && c2 === 0x73 && c3 === 0x7B) { | |
// e.g. വ്രൈ | |
i += 3; | |
result += map(c4) + Virama + "\u0d30" + "\u0d48"; | |
continue; | |
} | |
else if (c1 === 0x73 && c2 === 0x7B) { | |
// e.g. വ്രെ | |
i += 2; | |
result += map(c3) + Virama + "\u0d30" + "\u0d46"; | |
continue; | |
} | |
else if (c1 === 0x74 && c2 === 0x7B) { | |
// e.g. വ്രേ | |
i += 2; | |
result += map(c3) + Virama + "\u0d30" + "\u0d47"; | |
continue; | |
} | |
// special cases: reordering | |
else if (c1 === 0x73 && c3 === 0x6D) { | |
// e.g. വൊ | |
result += map(c2) + "\u0d4A"; | |
i += 2; | |
continue; | |
} | |
else if (c1 === 0x74 && c3 === 0x6D) { | |
// e.g. വോ | |
result += map(c2) + "\u0d4B"; | |
i += 2; | |
continue; | |
} | |
else if (c1 === 0x73 && c2 === 0x73) { | |
// e.g. വൈ | |
i += 2; | |
result += map(c3) + "\u0d48"; | |
continue; | |
} | |
else if (c1 === 0x7B) { | |
// e.g. വ്ര | |
i += 1; | |
result += map(c2) + Virama + "\u0d30"; | |
continue; | |
} | |
else if (c1 === 0x73) { | |
// e.g. വെ | |
i += 1; | |
result += map(c2) + "\u0d46"; | |
continue; | |
} | |
else if (c1 === 0x74) { | |
// e.g. വേ | |
i += 1; | |
result += map(c2) + "\u0d47"; | |
continue; | |
} | |
// simple direct mapping | |
result += map(c1); | |
} | |
return result; | |
} | |
module.exports = { | |
convertThiruvachanamToUnicode | |
}; | |
// തിരുവചനം | |
// console.log(convertThiruvachanamToUnicode("XncphN\\w")); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment