A Python script for verifying that a Prakrit gāthā is metrically correct.
# -*- coding: utf-8 -*- | |
""" Usage: python3 verify_gatha.py FILENAME """ | |
""" Results in FILENAME.err (a list of errors) | |
and FILENAME.log (metrical data) """ | |
""" Take a file in UTF-8 encoding, in the ISO-15919 transliteration | |
scheme, and try to scan its verses and match them against the | |
canonical pattern of the Prakrit gāthā. If there are any errors, | |
output them to an error file. """ | |
""" I have explicitly not done anything about transliteration. | |
sanscript.py works wonderfully, so I will start on the assumption | |
that the file uses the ISO-15919 transliteration scheme. As a | |
consequence, this script uses a short subroutine to replace aspirates | |
with the capitalized versions thereof (e.g., ḍh with Ḍ) for ease of | |
processing.""" | |
""" For the NUMBERING, the following convention is followed: | |
FIRST LINE | |
SECOND LINE - NUMBER | |
Thus the script looks for ARABIC NUMERALS and interprets them | |
as the verse number, and interprets the line in which they occur | |
as the second line in a two-line verse. """ | |
import sys | |
import os | |
import re | |
import collections | |
import csv | |
f = open(sys.argv[1],'r') | |
logFile = open(os.path.splitext(sys.argv[1])[0] + '.log','w') | |
log = '' | |
errorCount = 0 | |
totalsyllables = 0 | |
digits = re.compile(r'(\d+)') | |
vowels = re.compile(r'[aiuïüāĩũãīūeoēō]') | |
longvowels = re.compile(r'[āīūēō]') | |
firstsylls = dict() | |
secondsylls = dict() | |
verses = collections.OrderedDict() | |
# The first element is the verse number, | |
# and the second will be an array consisting of the | |
# two lines. | |
# I use OrderedDict because regular Python dictionaries | |
# don't remember the order in which elements are added. | |
def replaceAspirates(string): | |
# This function replaces the aspirates (which are written | |
# with two characters) with capitalized versions of the non-aspirates, | |
# to make processing easier. a mapping is necessary because | |
# upper() does not work on ḍ, ṭ, and ṇ. | |
aspirates = { "kh":"K", "gh":"G", "ch":"C", "jh":"J", "ṭh":"Ṭ", "ḍh":"Ḍ", "th":"T", "dh":"D", "ph":"P", "bh":"B" } | |
# One might not want to include the nasals here, | |
# since they will often make a syllable long | |
# "ṇh":"Ṇ", "nh":"N", "mh":"M" | |
output = string | |
for k, v in aspirates.items(): | |
output = output.replace(k,v) | |
return output | |
def restoreAspirates(string): | |
aspirates = { "K":"kh", "G":"gh", "C":"ch", "J":"j", "Ṭ":"ṭh", "Ḍ":"ḍh", "T":"t", "D":"dh", "P":"ph", "B":"bh" } | |
output = string | |
for k, v in aspirates.items(): | |
output = output.replace(k,v) | |
return output | |
def writeError(verse,line,ganano,gana): | |
global errorCount | |
global log | |
errorCount += 1 | |
errors = '\nERROR NUMBER '+str(errorCount)+':\n' | |
errors = errors + 'Gaṇa number '+ganano+' in line '+line+' of verse '+verse+' has the incorrect form '+gana+'.\n\n' | |
log = log + errors | |
def scan(string,line,reference,original): | |
global log | |
global firstsylls | |
global secondsylls | |
global totalsyllables | |
ganas = [ ] # we will store the ganas as strings here | |
totalmoras = 0 # the total number of moras in the line | |
ganapattern = '' # the pattern of the gaṇa (in Gs and Ls) | |
ganatext = '' # the text of the gaṇa | |
linepattern = '' # the pattern of the total line | |
linebyganas = '' | |
nosyllables = 0 | |
index = 0 | |
log = log + 'Verse '+reference+', line '+str(line)+': '+original+'\n' | |
while index < len(string): | |
thisletter = string[index] | |
# we will only write PATTERN if the current letter | |
# is a vowel. the consonants before and after it simply | |
# help us determine whether the syllable is light of heavy. | |
# start by seeing if the current letter is a vowel | |
if not vowels.match(thisletter): | |
# if it's a consonant, read it into the text | |
try: | |
nextletter = string[index+1] | |
if vowels.match(nextletter): | |
ganatext = ganatext+restoreAspirates(thisletter) | |
except IndexError: | |
break | |
else: | |
nosyllables += 1 | |
if longvowels.match(thisletter): | |
# if the vowel is inherently long, then it is going | |
# to make the syllable heavy. | |
ganatext = ganatext+thisletter | |
ganapattern = ganapattern+'G' | |
else: | |
# otherwise, we have to look to the next letter. | |
try: | |
nextletter = string[index+1] | |
if vowels.match(nextletter): | |
# V V = L | |
ganatext = ganatext+thisletter | |
ganapattern = ganapattern+'L' | |
elif re.match(r'ṁ',string[index+1]): | |
# V ṁ = G | |
ganatext = ganatext+thisletter+'ṁ' | |
ganapattern = ganapattern+'G' | |
else: | |
try: | |
nextnextletter = string[index+2] | |
if vowels.match(nextnextletter): | |
# V CV | |
ganatext = ganatext+thisletter | |
ganapattern = ganapattern+'L' | |
else: | |
# V CC | |
ganatext = ganatext+thisletter+nextletter | |
ganapattern = ganapattern+'G' | |
except IndexError: | |
# we are at the end of the line | |
ganatext = ganatext+thisletter+nextletter | |
ganapattern = ganapattern+'G' | |
except IndexError: | |
# if there is no following letter, then it is the | |
# end of the line, and it is heavy. | |
ganatext = ganatext+thisletter | |
ganapattern = ganapattern+'G' | |
# now check to see if the gaṇa is complete | |
moras = 0 | |
for syllable in ganapattern: | |
if syllable == 'L': | |
moras = moras + 1 | |
elif syllable == 'G': | |
moras = moras + 2 | |
if ((len(ganas) + 1) % 2) == 1: | |
# if the gana is odd: | |
#print('gana no. '+str(len(ganas)+1)+' is odd in verse '+reference) | |
if not moras < 4: | |
if not (ganapattern == 'GG' or ganapattern == 'GLL' or ganapattern == 'LLG' or ganapattern == 'LLLL'): | |
writeError(reference,str(line),str(len(ganas) + 1),ganapattern) | |
break | |
ganas.append(ganapattern) | |
linebyganas = linebyganas+'['+ganatext+']' | |
linepattern = linepattern+'['+ganapattern+']' | |
ganapattern = '' | |
ganatext = '' | |
moras = 0 | |
# if the gana is even: | |
elif ((len(ganas)+1) % 2) == 0: | |
# if it is the sixth gana in the second line | |
if (len(ganas) + 1) == 6 and line == 2: | |
if not moras < 1: | |
if not ganapattern == 'L': | |
writeError(reference,str(line),str(len(ganas) + 1),ganapattern) | |
break | |
ganas.append(ganapattern) | |
linebyganas = linebyganas+'['+ganatext+']' | |
linepattern = linepattern+'['+ganapattern+']' | |
ganapattern = '' | |
ganatext = '' | |
totalmoras = totalmoras + moras | |
moras = 0 | |
# if it is the sixth gana in the first line | |
elif (len(ganas) + 1) == 6 and line == 1: | |
if not moras < 4: | |
if not (ganapattern == 'LGL' or ganapattern == 'LLLL'): | |
writeError(reference,str(line),str(len(ganas) + 1),ganapattern) | |
break | |
ganas.append(ganapattern) | |
linebyganas = linebyganas+'['+ganatext+']' | |
linepattern = linepattern+'['+ganapattern+']' | |
ganapattern = '' | |
ganatext = '' | |
totalmoras = totalmoras + moras | |
moras = 0 | |
# if it is the last gana in either line | |
elif (len(ganas) + 1) == 8: | |
if not moras < 2: | |
if not ganapattern == 'G': | |
writeError(reference,str(line),str(len(ganas) + 1),ganapattern) | |
break | |
ganas.append(ganapattern) | |
linebyganas = linebyganas+'['+ganatext+']' | |
linepattern = linepattern+'['+ganapattern+']' | |
ganapattern = '' | |
ganatext = '' | |
totalmoras = totalmoras + moras | |
moras = 0 | |
# otherwise | |
else: | |
if not moras < 4: | |
if not (ganapattern == 'LGL' or ganapattern == 'LLLL' or ganapattern == 'GG' or ganapattern == 'LLG' or ganapattern == 'GLL'): | |
writeError(reference,str(line),str(len(ganas) + 1),ganapattern) | |
break | |
ganas.append(ganapattern) | |
linebyganas = linebyganas+'['+ganatext+']' | |
linepattern = linepattern+'['+ganapattern+']' | |
ganapattern = '' | |
ganatext = '' | |
totalmoras = totalmoras + moras | |
moras = 0 | |
index += 1 | |
log = log+' '+linebyganas+'\n' | |
log = log+' '+linepattern+'\n' | |
log = log+' number of syllables: '+str(nosyllables)+'\n' | |
if (line == 1): | |
if str(nosyllables) in firstsylls: | |
firstsylls[str(nosyllables)] += 1 | |
else: | |
firstsylls[str(nosyllables)] = 1 | |
elif (line == 2): | |
if str(nosyllables) in secondsylls: | |
secondsylls[str(nosyllables)] += 1 | |
else: | |
secondsylls[str(nosyllables)] = 1 | |
totalsyllables += nosyllables | |
while True: | |
prevLine = f.readline() | |
currentLine = f.readline() | |
if not currentLine: break # EOF | |
match = digits.search(currentLine) | |
if match: | |
firstline = prevLine.lower().strip() | |
secondline = digits.sub('',currentLine).lower().strip() | |
secondline = secondline.replace(' -','') | |
verses[match.group(0)] = [ firstline, secondline ] | |
for number, verse in verses.items(): | |
firstline = replaceAspirates(verse[0]).replace(' ','').replace('-','') | |
secondline = replaceAspirates(verse[1]).replace(' ','').replace('-','') | |
# scan takes four parameters: the string to scan (preferably with spaces and hyphens removed), | |
# the line number (1 or 2), the number of the verse, and the original text of the verse | |
# (with hyphens and spaces still present) | |
scan(firstline,1,number,verse[0]) | |
scan(secondline,2,number,verse[1]) | |
if errorCount == '0': | |
logFile.write('No metrical errors detected.') | |
print('No metrical errors detected.') | |
else: | |
logFile.write(str(errorCount)+' errors detected. See ERROR lines below for details.\n') | |
logFile.write(str(totalsyllables)+' syllables scanned.\n\n') | |
print(str(errorCount)+' errors detected. See the log file for details.') | |
logFile.write(log) | |
logFile.close() | |
print(firstsylls.keys()) | |
print(firstsylls) | |
with open(os.path.splitext(sys.argv[1])[0] + '-syllables-line1.csv','w') as csvoutput: | |
firstline = collections.OrderedDict(sorted(firstsylls.items())) | |
csvoutput.write("No. of syllables,Instances\n") | |
for key, value in firstline.items(): | |
csvoutput.write(key+","+str(value)+"\n") | |
with open(os.path.splitext(sys.argv[1])[0] + '-syllables-line2.csv','w') as csvoutput: | |
secondline = collections.OrderedDict(sorted(secondsylls.items())) | |
for key, value in secondline.items(): | |
csvoutput.write(key+","+str(value)+"\n") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment