Skip to content

Instantly share code, notes, and snippets.

@jjkavalam
Last active October 4, 2024 13:44
Show Gist options
  • Save jjkavalam/460c5398739a4bcf3d2c4159df8edca6 to your computer and use it in GitHub Desktop.
Save jjkavalam/460c5398739a4bcf3d2c4159df8edca6 to your computer and use it in GitHub Desktop.
Convert Thiruvachanam font to Unicode
const Virama = "\u0d4d";
const DirectMapping = {
0x41: 'അ',
0x42: 'ആ',
0x43: 'ഇ',
0x44: 'ഉ',
0x45: 'ഋ',
0x46: 'എ',
0x47: 'ഏ',
0x48: 'ഒ',
0x49: 'ക',
0x4A: 'ഖ',
0x4B: 'ഗ',
0x4C: 'ഘ',
0x4D: 'ങ',
0x4E: 'ച',
0x4F: 'ഛ',
0x50: 'ജ',
0x51: 'ഝ',
0x52: 'ഞ',
0x53: 'ട',
0x54: 'ഠ',
0x55: 'ഡ',
0x56: 'ഢ',
0x57: 'ണ',
0x58: 'ത',
0x59: 'ഥ',
0x5A: 'ദ',
0x5B: 'ധ',
0x5C: 'ന',
0x5D: 'പ',
0x5E: 'ഫ',
0x5F: 'ബ',
0x60: 'ഭ',
0x61: 'മ',
0x62: 'യ',
0x63: 'ര',
0x64: 'റ',
0x65: 'ല',
0x66: 'ള',
0x67: 'ഴ',
0x68: 'വ',
0x69: 'ശ',
0x6A: 'ഷ',
0x6B: 'സ',
0x6C: 'ഹ',
0x6D: 'ാ',
0x6E: 'ി',
0x6F: 'ീ',
0x70: 'ു',
0x71: 'ൂ',
0x72: 'ൃ',
0x75: 'ൗ',
0x76: '്',
0x77: 'ം',
0x78: 'ഃ',
0xA1: 'ക്ക',
0xA2: 'ക്ല',
0xA3: 'ക്ഷ',
0xA4: 'ഗ്ഗ',
0xA5: 'ഗ്ല',
0xA6: 'ങ്ക',
0xA7: 'ങ്ങ',
0xA8: 'ച്ച',
0xA9: 'ഞ്ച',
0xAA: 'ഞ്ഞ',
0xAB: 'ട്ട',
0xAC: 'ൺ',
0xAD: 'ണ്ട',
0xAE: 'ണ്ണ',
0xAF: 'ത്ത',
0xB0: 'തഥ',
0xB1: 'ദ്ദ',
0xB2: 'ദ്ധ',
0xB3: 'ൻ',
0xB4: 'ന്ത',
0xB5: 'ന്ദ',
0xB6: 'ന്ന',
0xB7: 'ന്മ',
0xB8: 'പ്പ',
0xB9: 'പ്ല',
0xBA: 'ബ്ബ',
0xBB: 'ബ്ല',
0xBC: 'മ്പ',
0xBD: 'മ്മ',
0xBE: 'മ്ല',
0xBF: 'യ്യ',
0xC0: 'ർ',
0xC1: 'റ്റ',
0xC2: 'ൽ',
0xC3: 'ല്ല',
0xC4: 'ൾ',
0xC5: 'ള്ള',
0xC6: 'വ്വ',
0xC7: 'ശ്ല',
0xC8: 'ശ്ശ',
0xC9: 'സ്ല',
0xCA: 'സ്സ',
0xCB: 'ഹ്ല',
0xCC: 'സ്റ്റ',
0xCD: 'ഡ്ഡ',
0xCE: 'ക്ട',
0xCF: 'ബ്ധ',
0xD0: 'ബ്ദ',
0xD1: 'ച്ഛ',
0xD2: 'പ്മ',
0xD3: 'ഹന',
0xD4: 'ന്ധ',
0xD5: 'ത്സ',
0xD6: 'ജ്ജ',
0xD7: 'ണ്മ',
0xD8: 'സ്ഥ',
0xD9: 'ന്ഥ',
0xDA: 'ജ്ഞ',
0xDB: 'ത്ഭ',
0xDC: 'ഗ്മ',
0xDD: 'ശ്ച',
0xDE: 'ണ്ഡ',
0xDF: 'ത്മ',
0xE0: 'ക്ത',
0xE1: 'ഗ്ന',
0xE2: 'ന്റ',
0xE3: 'ഷ്ട',
0xE4: 'റ്റ',
0xE5: 'ല്പ',
0xE6: 'കു',
0xE7: 'ക്കു',
0xE8: 'ങ്കു',
0xE9: 'ണു',
0xEA: 'രു',
0xEB: 'നു',
0xEC: 'ന്നു',
0xED: 'യ്ക്കു',
0xEE: 'ൻ',
0xEF: 'ണ്ട',
0x79: Virama + "\u0d2f", // vya
0x7A: Virama + "\u0d35", // vva
0x7C: Virama + "\u0d2f" + Virama + "\u0d02", // vyam
};
function map(c) {
if (DirectMapping[c]) {
return DirectMapping[c];
}
return String.fromCharCode(c);
}
/**
* Parses input and converts it to unicode
*
* @param {string} input - raw input encoded using Thiruvachanam font
* @param {string} nl - (optional) string sequence used to represent a newline in the input
* @returns {string}
*/
function convertThiruvachanamToUnicode(input, nl = null) {
const N = input.length;
let result = "";
for (let i = 0; i < N; i++) {
if (nl && input.substring(i, i + nl.length) === nl) {
result += "\n";
// newline detected; skip ahead
i += nl.length - 1;
continue
}
let c1 = input.charCodeAt(i);
const c2 = input.charCodeAt(i + 1);
const c3 = input.charCodeAt(i + 2);
const c4 = input.charCodeAt(i + 3);
// special cases: two characters combine into one
if (c1 === 0x43 && c2 === 0x75) {
result += "ഈ";
i++;
continue;
}
else if (c1 === 0x44 && c2 === 0x75) {
result += "ഊ";
i++;
continue;
}
else if (c1 === 0x73 && c2 === 0x46) {
result += "ഐ";
i++;
continue;
}
else if (c1 === 0x48 && c2 === 0x6D) {
result += "ഓ";
i++;
continue;
}
else if (c1 === 0x48 && c2 === 0x75) {
result += "ഔ";
i++;
continue;
}
// 4 character special combos where 0x7B (Virama + "\u0d30") is involved
else if (c1 === 0x73 && c2 === 0x7B && c4 === 0x6D) {
// e.g. വ്രൊ
result += map(c3) + Virama + "\u0d30" + "\u0d4A";
i += 3;
continue;
}
else if (c1 === 0x74 && c2 === 0x7B && c4 === 0x6D) {
// e.g. വ്രോ
result += map(c3) + Virama + "\u0d30" + "\u0d4B";
i += 3;
continue;
}
else if (c1 === 0x73 && c2 === 0x73 && c3 === 0x7B) {
// e.g. വ്രൈ
i += 3;
result += map(c4) + Virama + "\u0d30" + "\u0d48";
continue;
}
else if (c1 === 0x73 && c2 === 0x7B) {
// e.g. വ്രെ
i += 2;
result += map(c3) + Virama + "\u0d30" + "\u0d46";
continue;
}
else if (c1 === 0x74 && c2 === 0x7B) {
// e.g. വ്രേ
i += 2;
result += map(c3) + Virama + "\u0d30" + "\u0d47";
continue;
}
// special cases: reordering
else if (c1 === 0x73 && c3 === 0x6D) {
// e.g. വൊ
result += map(c2) + "\u0d4A";
i += 2;
continue;
}
else if (c1 === 0x74 && c3 === 0x6D) {
// e.g. വോ
result += map(c2) + "\u0d4B";
i += 2;
continue;
}
else if (c1 === 0x73 && c2 === 0x73) {
// e.g. വൈ
i += 2;
result += map(c3) + "\u0d48";
continue;
}
else if (c1 === 0x7B) {
// e.g. വ്ര
i += 1;
result += map(c2) + Virama + "\u0d30";
continue;
}
else if (c1 === 0x73) {
// e.g. വെ
i += 1;
result += map(c2) + "\u0d46";
continue;
}
else if (c1 === 0x74) {
// e.g. വേ
i += 1;
result += map(c2) + "\u0d47";
continue;
}
// simple direct mapping
result += map(c1);
}
return result;
}
module.exports = {
convertThiruvachanamToUnicode
};
// തിരുവചനം
// console.log(convertThiruvachanamToUnicode("XncphN\\w"));
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment