jjkavalam/thiruvachanamToUnicode.js

## thiruvachanamToUnicode.js
const Virama = "\u0d4d";

const DirectMapping = {
    0x41: 'അ',
    0x42: 'ആ',
    0x43: 'ഇ',
    0x44: 'ഉ',
    0x45: 'ഋ',
    0x46: 'എ',
    0x47: 'ഏ',
    0x48: 'ഒ',
    0x49: 'ക',
    0x4A: 'ഖ',
    0x4B: 'ഗ',
    0x4C: 'ഘ',
    0x4D: 'ങ',
    0x4E: 'ച',
    0x4F: 'ഛ',
    0x50: 'ജ',
    0x51: 'ഝ',
    0x52: 'ഞ',
    0x53: 'ട',
    0x54: 'ഠ',
    0x55: 'ഡ',
    0x56: 'ഢ',
    0x57: 'ണ',
    0x58: 'ത',
    0x59: 'ഥ',
    0x5A: 'ദ',
    0x5B: 'ധ',
    0x5C: 'ന',
    0x5D: 'പ',
    0x5E: 'ഫ',
    0x5F: 'ബ',
    0x60: 'ഭ',
    0x61: 'മ',
    0x62: 'യ',
    0x63: 'ര',
    0x64: 'റ',
    0x65: 'ല',
    0x66: 'ള',
    0x67: 'ഴ',
    0x68: 'വ',
    0x69: 'ശ',
    0x6A: 'ഷ',
    0x6B: 'സ',
    0x6C: 'ഹ',
    0x6D: 'ാ',
    0x6E: 'ി',
    0x6F: 'ീ',
    0x70: 'ു',
    0x71: 'ൂ',
    0x72: 'ൃ',
    0x75: 'ൗ',
    0x76: '്',
    0x77: 'ം',
    0x78: 'ഃ',

    0xA1: 'ക്ക',
    0xA2: 'ക്ല',
    0xA3: 'ക്ഷ',
    0xA4: 'ഗ്ഗ',
    0xA5: 'ഗ്ല',
    0xA6: 'ങ്ക',
    0xA7: 'ങ്ങ',
    0xA8: 'ച്ച',
    0xA9: 'ഞ്ച',
    0xAA: 'ഞ്ഞ',
    0xAB: 'ട്ട',
    0xAC: 'ൺ',
    0xAD: 'ണ്ട',
    0xAE: 'ണ്ണ',
    0xAF: 'ത്ത',
    0xB0: 'തഥ',
    0xB1: 'ദ്ദ',
    0xB2: 'ദ്ധ',
    0xB3: 'ൻ',
    0xB4: 'ന്ത',
    0xB5: 'ന്ദ',
    0xB6: 'ന്ന',
    0xB7: 'ന്മ',
    0xB8: 'പ്പ',
    0xB9: 'പ്ല',
    0xBA: 'ബ്ബ',
    0xBB: 'ബ്ല',
    0xBC: 'മ്പ',
    0xBD: 'മ്മ',
    0xBE: 'മ്ല',
    0xBF: 'യ്യ',
    0xC0: 'ർ',
    0xC1: 'റ്റ',
    0xC2: 'ൽ',
    0xC3: 'ല്ല',
    0xC4: 'ൾ',
    0xC5: 'ള്ള',
    0xC6: 'വ്വ',
    0xC7: 'ശ്ല',
    0xC8: 'ശ്ശ',
    0xC9: 'സ്ല',
    0xCA: 'സ്സ',
    0xCB: 'ഹ്ല',
    0xCC: 'സ്റ്റ',
    0xCD: 'ഡ്ഡ',
    0xCE: 'ക്ട',
    0xCF: 'ബ്ധ',
    0xD0: 'ബ്ദ',
    0xD1: 'ച്ഛ',
    0xD2: 'പ്മ',
    0xD3: 'ഹന',
    0xD4: 'ന്ധ',
    0xD5: 'ത്സ',
    0xD6: 'ജ്ജ',
    0xD7: 'ണ്മ',
    0xD8: 'സ്ഥ',
    0xD9: 'ന്ഥ',
    0xDA: 'ജ്ഞ',
    0xDB: 'ത്ഭ',
    0xDC: 'ഗ്മ',
    0xDD: 'ശ്ച',
    0xDE: 'ണ്ഡ',
    0xDF: 'ത്മ',
    0xE0: 'ക്ത',
    0xE1: 'ഗ്ന',
    0xE2: 'ന്റ',
    0xE3: 'ഷ്ട',
    0xE4: 'റ്റ',
    0xE5: 'ല്പ',
    0xE6: 'കു',
    0xE7: 'ക്കു',
    0xE8: 'ങ്കു',
    0xE9: 'ണു',
    0xEA: 'രു',
    0xEB: 'നു',
    0xEC: 'ന്നു',
    0xED: 'യ്ക്കു',
    0xEE: 'ൻ',
    0xEF: 'ണ്ട',

    0x79: Virama + "\u0d2f", // vya
    0x7A: Virama + "\u0d35", // vva
    0x7C: Virama + "\u0d2f" + Virama + "\u0d02", // vyam
};


function map(c) {
    if (DirectMapping[c]) {
        return DirectMapping[c];
    }
    return String.fromCharCode(c);
}

/**
 * Parses input and converts it to unicode
 *
 * @param {string} input - raw input encoded using Thiruvachanam font
 * @param {string} nl - (optional) string sequence used to represent a newline in the input
 * @returns {string}
 */
function convertThiruvachanamToUnicode(input, nl = null) {
    const N = input.length;
    let result = "";
    for (let i = 0; i < N; i++) {
        if (nl && input.substring(i, i + nl.length) === nl) {
            result += "\n";
            // newline detected; skip ahead
            i += nl.length - 1;
            continue
        }
        let c1 = input.charCodeAt(i);
        const c2 = input.charCodeAt(i + 1);
        const c3 = input.charCodeAt(i + 2);
        const c4 = input.charCodeAt(i + 3);

        // special cases: two characters combine into one
        if (c1 === 0x43 && c2 === 0x75) {
            result += "ഈ";
            i++;
            continue;
        }
        else if (c1 === 0x44 && c2 === 0x75) {
            result += "ഊ";
            i++;
            continue;
        }
        else if (c1 === 0x73 && c2 === 0x46) {
            result += "ഐ";
            i++;
            continue;
        }
        else if (c1 === 0x48 && c2 === 0x6D) {
            result += "ഓ";
            i++;
            continue;
        }
        else if (c1 === 0x48 && c2 === 0x75) {
            result += "ഔ";
            i++;
            continue;
        }
        // 4 character special combos where 0x7B (Virama + "\u0d30") is involved
        else if (c1 === 0x73 && c2 === 0x7B && c4 === 0x6D) {
            // e.g. വ്രൊ
            result += map(c3) + Virama + "\u0d30" + "\u0d4A";
            i += 3;
            continue;
        }
        else if (c1 === 0x74 && c2 === 0x7B && c4 === 0x6D) {
            // e.g. വ്രോ
            result += map(c3) + Virama + "\u0d30" + "\u0d4B";
            i += 3;
            continue;
        }
        else if (c1 === 0x73 && c2 === 0x73 && c3 === 0x7B) {
            // e.g. വ്രൈ
            i += 3;
            result += map(c4) + Virama + "\u0d30" + "\u0d48";
            continue;
        }
        else if (c1 === 0x73 && c2 === 0x7B) {
            // e.g. വ്രെ
            i += 2;
            result += map(c3) +  Virama + "\u0d30" + "\u0d46";
            continue;
        }
        else if (c1 === 0x74 && c2 === 0x7B) {
            // e.g. വ്രേ
            i += 2;
            result += map(c3) + Virama + "\u0d30" + "\u0d47";
            continue;
        }
        // special cases: reordering
        else if (c1 === 0x73 && c3 === 0x6D) {
            // e.g. വൊ
            result += map(c2) + "\u0d4A";
            i += 2;
            continue;
        }
        else if (c1 === 0x74 && c3 === 0x6D) {
            // e.g. വോ
            result += map(c2) + "\u0d4B";
            i += 2;
            continue;
        }
        else if (c1 === 0x73 && c2 === 0x73) {
            // e.g. വൈ
            i += 2;
            result += map(c3) + "\u0d48";
            continue;
        }
        else if (c1 === 0x7B) {
            // e.g. വ്ര
            i += 1;
            result += map(c2) + Virama + "\u0d30";
            continue;
        }
        else if (c1 === 0x73) {
            // e.g. വെ
            i += 1;
            result += map(c2) + "\u0d46";
            continue;
        }
        else if (c1 === 0x74) {
            // e.g. വേ
            i += 1;
            result += map(c2) + "\u0d47";
            continue;
        }

        // simple direct mapping
        result += map(c1);
    }
    return result;
}

module.exports = {
    convertThiruvachanamToUnicode
};

// തിരുവചനം
// console.log(convertThiruvachanamToUnicode("XncphN\\w"));
	const Virama = "\u0d4d";

	const DirectMapping = {
	0x41: 'അ',
	0x42: 'ആ',
	0x43: 'ഇ',
	0x44: 'ഉ',
	0x45: 'ഋ',
	0x46: 'എ',
	0x47: 'ഏ',
	0x48: 'ഒ',
	0x49: 'ക',
	0x4A: 'ഖ',
	0x4B: 'ഗ',
	0x4C: 'ഘ',
	0x4D: 'ങ',
	0x4E: 'ച',
	0x4F: 'ഛ',
	0x50: 'ജ',
	0x51: 'ഝ',
	0x52: 'ഞ',
	0x53: 'ട',
	0x54: 'ഠ',
	0x55: 'ഡ',
	0x56: 'ഢ',
	0x57: 'ണ',
	0x58: 'ത',
	0x59: 'ഥ',
	0x5A: 'ദ',
	0x5B: 'ധ',
	0x5C: 'ന',
	0x5D: 'പ',
	0x5E: 'ഫ',
	0x5F: 'ബ',
	0x60: 'ഭ',
	0x61: 'മ',
	0x62: 'യ',
	0x63: 'ര',
	0x64: 'റ',
	0x65: 'ല',
	0x66: 'ള',
	0x67: 'ഴ',
	0x68: 'വ',
	0x69: 'ശ',
	0x6A: 'ഷ',
	0x6B: 'സ',
	0x6C: 'ഹ',
	0x6D: 'ാ',
	0x6E: 'ി',
	0x6F: 'ീ',
	0x70: 'ു',
	0x71: 'ൂ',
	0x72: 'ൃ',
	0x75: 'ൗ',
	0x76: '്',
	0x77: 'ം',
	0x78: 'ഃ',

	0xA1: 'ക്ക',
	0xA2: 'ക്ല',
	0xA3: 'ക്ഷ',
	0xA4: 'ഗ്ഗ',
	0xA5: 'ഗ്ല',
	0xA6: 'ങ്ക',
	0xA7: 'ങ്ങ',
	0xA8: 'ച്ച',
	0xA9: 'ഞ്ച',
	0xAA: 'ഞ്ഞ',
	0xAB: 'ട്ട',
	0xAC: 'ൺ',
	0xAD: 'ണ്ട',
	0xAE: 'ണ്ണ',
	0xAF: 'ത്ത',
	0xB0: 'തഥ',
	0xB1: 'ദ്ദ',
	0xB2: 'ദ്ധ',
	0xB3: 'ൻ',
	0xB4: 'ന്ത',
	0xB5: 'ന്ദ',
	0xB6: 'ന്ന',
	0xB7: 'ന്മ',
	0xB8: 'പ്പ',
	0xB9: 'പ്ല',
	0xBA: 'ബ്ബ',
	0xBB: 'ബ്ല',
	0xBC: 'മ്പ',
	0xBD: 'മ്മ',
	0xBE: 'മ്ല',
	0xBF: 'യ്യ',
	0xC0: 'ർ',
	0xC1: 'റ്റ',
	0xC2: 'ൽ',
	0xC3: 'ല്ല',
	0xC4: 'ൾ',
	0xC5: 'ള്ള',
	0xC6: 'വ്വ',
	0xC7: 'ശ്ല',
	0xC8: 'ശ്ശ',
	0xC9: 'സ്ല',
	0xCA: 'സ്സ',
	0xCB: 'ഹ്ല',
	0xCC: 'സ്റ്റ',
	0xCD: 'ഡ്ഡ',
	0xCE: 'ക്ട',
	0xCF: 'ബ്ധ',
	0xD0: 'ബ്ദ',
	0xD1: 'ച്ഛ',
	0xD2: 'പ്മ',
	0xD3: 'ഹന',
	0xD4: 'ന്ധ',
	0xD5: 'ത്സ',
	0xD6: 'ജ്ജ',
	0xD7: 'ണ്മ',
	0xD8: 'സ്ഥ',
	0xD9: 'ന്ഥ',
	0xDA: 'ജ്ഞ',
	0xDB: 'ത്ഭ',
	0xDC: 'ഗ്മ',
	0xDD: 'ശ്ച',
	0xDE: 'ണ്ഡ',
	0xDF: 'ത്മ',
	0xE0: 'ക്ത',
	0xE1: 'ഗ്ന',
	0xE2: 'ന്റ',
	0xE3: 'ഷ്ട',
	0xE4: 'റ്റ',
	0xE5: 'ല്പ',
	0xE6: 'കു',
	0xE7: 'ക്കു',
	0xE8: 'ങ്കു',
	0xE9: 'ണു',
	0xEA: 'രു',
	0xEB: 'നു',
	0xEC: 'ന്നു',
	0xED: 'യ്ക്കു',
	0xEE: 'ൻ',
	0xEF: 'ണ്ട',

	0x79: Virama + "\u0d2f", // vya
	0x7A: Virama + "\u0d35", // vva
	0x7C: Virama + "\u0d2f" + Virama + "\u0d02", // vyam
	};


	function map(c) {
	if (DirectMapping[c]) {
	return DirectMapping[c];
	}
	return String.fromCharCode(c);
	}

	/**
	* Parses input and converts it to unicode
	*
	* @param {string} input - raw input encoded using Thiruvachanam font
	* @param {string} nl - (optional) string sequence used to represent a newline in the input
	* @returns {string}
	*/
	function convertThiruvachanamToUnicode(input, nl = null) {
	const N = input.length;
	let result = "";
	for (let i = 0; i < N; i++) {
	if (nl && input.substring(i, i + nl.length) === nl) {
	result += "\n";
	// newline detected; skip ahead
	i += nl.length - 1;
	continue
	}
	let c1 = input.charCodeAt(i);
	const c2 = input.charCodeAt(i + 1);
	const c3 = input.charCodeAt(i + 2);
	const c4 = input.charCodeAt(i + 3);

	// special cases: two characters combine into one
	if (c1 === 0x43 && c2 === 0x75) {
	result += "ഈ";
	i++;
	continue;
	}
	else if (c1 === 0x44 && c2 === 0x75) {
	result += "ഊ";
	i++;
	continue;
	}
	else if (c1 === 0x73 && c2 === 0x46) {
	result += "ഐ";
	i++;
	continue;
	}
	else if (c1 === 0x48 && c2 === 0x6D) {
	result += "ഓ";
	i++;
	continue;
	}
	else if (c1 === 0x48 && c2 === 0x75) {
	result += "ഔ";
	i++;
	continue;
	}
	// 4 character special combos where 0x7B (Virama + "\u0d30") is involved
	else if (c1 === 0x73 && c2 === 0x7B && c4 === 0x6D) {
	// e.g. വ്രൊ
	result += map(c3) + Virama + "\u0d30" + "\u0d4A";
	i += 3;
	continue;
	}
	else if (c1 === 0x74 && c2 === 0x7B && c4 === 0x6D) {
	// e.g. വ്രോ
	result += map(c3) + Virama + "\u0d30" + "\u0d4B";
	i += 3;
	continue;
	}
	else if (c1 === 0x73 && c2 === 0x73 && c3 === 0x7B) {
	// e.g. വ്രൈ
	i += 3;
	result += map(c4) + Virama + "\u0d30" + "\u0d48";
	continue;
	}
	else if (c1 === 0x73 && c2 === 0x7B) {
	// e.g. വ്രെ
	i += 2;
	result += map(c3) + Virama + "\u0d30" + "\u0d46";
	continue;
	}
	else if (c1 === 0x74 && c2 === 0x7B) {
	// e.g. വ്രേ
	i += 2;
	result += map(c3) + Virama + "\u0d30" + "\u0d47";
	continue;
	}
	// special cases: reordering
	else if (c1 === 0x73 && c3 === 0x6D) {
	// e.g. വൊ
	result += map(c2) + "\u0d4A";
	i += 2;
	continue;
	}
	else if (c1 === 0x74 && c3 === 0x6D) {
	// e.g. വോ
	result += map(c2) + "\u0d4B";
	i += 2;
	continue;
	}
	else if (c1 === 0x73 && c2 === 0x73) {
	// e.g. വൈ
	i += 2;
	result += map(c3) + "\u0d48";
	continue;
	}
	else if (c1 === 0x7B) {
	// e.g. വ്ര
	i += 1;
	result += map(c2) + Virama + "\u0d30";
	continue;
	}
	else if (c1 === 0x73) {
	// e.g. വെ
	i += 1;
	result += map(c2) + "\u0d46";
	continue;
	}
	else if (c1 === 0x74) {
	// e.g. വേ
	i += 1;
	result += map(c2) + "\u0d47";
	continue;
	}

	// simple direct mapping
	result += map(c1);
	}
	return result;
	}

	module.exports = {
	convertThiruvachanamToUnicode
	};

	// തിരുവചനം
	// console.log(convertThiruvachanamToUnicode("XncphN\\w"));