-
-
Save akprasad/cd0d3df4f51de2add99748cadc3c47bd to your computer and use it in GitHub Desktop.
comparison script between vidyut-prakriya and ashtadhyayi.com (kartari tinantas)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
import json | |
from dataclasses import dataclass | |
from indic_transliteration import sanscript | |
from collections import Counter | |
@dataclass | |
class Tinanta: | |
gana: int | |
number: int | |
lakara: str | |
text: str | |
def load_vidyut_padas() -> list[Tinanta]: | |
print("Loading vidyut padas") | |
padas = [] | |
with open("tinantas-basic-kartari.csv") as f: | |
reader = csv.DictReader(f) | |
for row in reader: | |
for pada in row['padas'].split('|'): | |
padas.append(Tinanta( | |
gana=int(row['gana']), | |
number=int(row['number']), | |
lakara=row['lakara'], | |
text=pada, | |
)) | |
return padas | |
def load_acom_padas() -> list[Tinanta]: | |
print("Loading ashtadhyayi.com padas") | |
la_map = { | |
"plat": "lat", | |
"alat": "lat", | |
"plit": "lit", | |
"alit": "lit", | |
"plut": "lut", | |
"alut": "lut", | |
"plrut": "lrt", | |
"alrut": "lrt", | |
"plot": "lot", | |
"alot": "lot", | |
"plang": "lan", | |
"alang": "lan", | |
"pvidhiling": "vidhi-lin", | |
"avidhiling": "vidhi-lin", | |
"pashirling": "ashir-lin", | |
"aashirling": "ashir-lin", | |
"plung": "lun", | |
"alung": "lun", | |
"plrung": "lrn", | |
"alrung": "lrn", | |
} | |
with open("dhatu/dhatuforms.txt") as f: | |
data = json.load(f) | |
padas = [] | |
for key, dhatu_padas in data.items(): | |
gana, number = key.split('.') | |
gana = int(gana) | |
number = int(number) | |
for raw_lakara, raw_la_padas in dhatu_padas.items(): | |
lakara = la_map[raw_lakara] | |
raw_la_padas_slp1 = sanscript.transliterate(raw_la_padas, "devanagari", "slp1") | |
for cell in raw_la_padas_slp1.split(';'): | |
for pada in cell.split(','): | |
if pada: | |
padas.append(Tinanta( | |
gana=gana, | |
number=number, | |
lakara=lakara, | |
text=pada | |
)) | |
return padas | |
def _to_dhatu_map(padas: list[Tinanta]) -> dict[[tuple[int, int]], set[Tinanta]]: | |
ret = {} | |
for p in padas: | |
key = (p.gana, p.number) | |
if key not in ret: | |
ret[key] = set() | |
# Drop final d from all padas for ease of comparison | |
if not p.text.endswith("d"): | |
ret[key].add(p.text) | |
return ret | |
def compare(vidyut_padas: list[Tinanta], acom_padas: list[Tinanta]): | |
# group by dhatu | |
vidyut_map = _to_dhatu_map(vidyut_padas) | |
acom_map = _to_dhatu_map(acom_padas) | |
c = Counter() | |
for key in vidyut_map: | |
if key not in acom_map: | |
c["Vidyut key missing in ashtadhyayi.com"] += 1 | |
continue | |
v_padas = vidyut_map[key] | |
a_padas = acom_map[key] | |
if v_padas == a_padas: | |
c["Dhatu exact match"] += 1 | |
else: | |
c["Dhatu mismatch"] += 1 | |
v_not_a = v_padas - a_padas | |
a_not_v = a_padas - v_padas | |
gana, number = key | |
print(f"{gana}.{number} mismatch:") | |
print(" ", v_not_a) | |
print(" ", a_not_v) | |
print() | |
print("Counters:") | |
print(c) | |
def main(): | |
vidyut_padas = load_vidyut_padas() | |
acom_padas = load_acom_padas() | |
compare(vidyut_padas, acom_padas) | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment