A python script for parsing Tamil verse into metrical units
# -*- coding: utf-8 -*- | |
""" Usage: python3 versify.py FILENAME """ | |
""" Results in FILENAME.log (errors and statistics) | |
and FILENAME.json (a json file of metrically parsed text) """ | |
""" This program expects the text to be in the format | |
represented by the GRETIL Kuṟuntokai | |
(http://gretil.sub.uni-goettingen.de/gretil/4_drav/tamil/pm/pm110__u.htm) | |
namely: the | |
poem comes on a line such as: | |
103. neytal - talaivi kūṟṟu | |
then it is followed by a space, then each of the lines | |
of the poem, and then | |
-vāyilāṉ tēvaṉār. | |
which gives the author. """ | |
import sys | |
import os | |
import re | |
import json | |
import collections | |
f = open(sys.argv[1],'r') | |
logFile = open(os.path.splitext(sys.argv[1])[0] + '.log','w') | |
jsonFile = open(os.path.splitext(sys.argv[1])[0] + '.json','w') | |
log = '' | |
totalSyllables = 0 | |
discardedSyllables = 0 | |
errorCount = 0 | |
authorline = re.compile(r'^-{1}([^-].*)') # group 1 is the author | |
titleline = re.compile(r'(\d+)\. ([^\s]+) - (.*)') # group 1 is the number, 2 is the tiṇai, and 3 is ? | |
vowels = re.compile(r'[aiuāĩũãīūeoēōYW]') | |
longvowels = re.compile(r'[āīūēō]') | |
light = re.compile(r'([kṅcñṭṇtnpmyrlvḷḻṟṉ])*([aiueoYW])$') | |
# for the first syllable in a cīr, count ai and au as heavy (not otherwise) | |
initiallight = re.compile(r'([kṅcñṭṇtnpmyrlvḷḻṟṉ])*([aiueo])$') | |
cu = re.compile(r'([kṅcñṭṇtnpmyrlvḷḻṟṉ])*(u)$') | |
poems = [] | |
def replaceDigraphs(string): | |
digraphs = { "ai":"Y", "au":"W" } | |
output = string | |
for k, v in digraphs.items(): | |
output = output.replace(k,v) | |
return output | |
def restoreDigraphs(string): | |
digraphs = { "Y":"ai", "W":"au" } | |
output = string | |
for k, v in digraphs.items(): | |
output = output.replace(k,v) | |
return output | |
def convertTxtToJson(textfile): | |
global poems | |
poem = collections.OrderedDict() | |
metadata = collections.OrderedDict() | |
lines = [] | |
while True: | |
currentLine = textfile.readline() | |
if not currentLine: break # EOF | |
else: | |
numbermatch = titleline.search(currentLine) | |
authormatch = authorline.search(currentLine) | |
if numbermatch: # if the reader matches a regex for the number | |
metadata["number"] = numbermatch.group(1) | |
metadata["landscape"] = numbermatch.group(2) | |
metadata["direction"] = numbermatch.group(3) | |
if authormatch: # if the reader matches a regex for the title | |
metadata["author"] = authormatch.group(1).replace('.','') | |
poem["lines"] = lines | |
poem["metadata"] = metadata | |
poems.append(poem) | |
poem = collections.OrderedDict() | |
metadata = collections.OrderedDict() | |
lines = [] | |
if authormatch == None and numbermatch == None: | |
if "number" in metadata: | |
if currentLine.strip(): | |
lines.append(currentLine.strip().replace('.','')) | |
def errorMessage(cir,cirnumber,poemnumber,linenumber): | |
global log | |
global errorCount | |
errorCount += 1 | |
log = log+'Error '+str(errorCount)+': Poem no. '+poemnumber+', line '+linenumber+', cīr no. '+cirnumber+': '+restoreDigraphs(cir)+'\n' | |
def scanLine(line,poemnumber,linenumber): | |
# this gives an array of cīrs, | |
# and each cīr is an array of acai, | |
# and each acai is a dictionary consisting of the syllables in the text, | |
# the prosodic values (L and G), and the technical term. | |
global log | |
global totalSyllables | |
global discardedSyllables | |
line = replaceDigraphs(line) | |
sandhi = re.compile('([kṅcñṭṇtnpmyrlvḷḻṟṉ]) ([aāiīuūeēoōYW])') | |
sandhimatch = sandhi.search(line) | |
if sandhimatch: | |
line = line.replace(sandhimatch.group(0),' '+sandhimatch.group(1)+sandhimatch.group(2)) | |
cirs = line.split(' ') | |
newcirs = [] | |
for index, cir in enumerate(cirs): | |
# this will ONLY work if there are a maximum of two acais per cīr | |
acais = [ {}, {} ] | |
syllables = syllabize(cir) | |
totalSyllables += len(syllables)+1 | |
# if there are two syllables, it must be nēr-nēr | |
if len(syllables) == 2: | |
firstlight = initiallight.search(syllables[0]) | |
secondlight = light.search(syllables[1]) | |
acais[0] = { | |
"syllables": [ restoreDigraphs(syllables[0]) ], | |
"type": "nēr", | |
} | |
if firstlight: | |
acais[0]["quantity"] = "L" | |
else: | |
acais[0]["quantity"] = "G" | |
acais[1] = { | |
"syllables": [ restoreDigraphs(syllables[1]) ], | |
"type": "nēr" | |
} | |
if secondlight: | |
acais[1]["quantity"] = "L" | |
else: | |
acais[1]["quantity"] = "G" | |
newcirs.append(acais) | |
# if there are three syllables, it can be either: | |
# - nēr-nirai | |
# - nirai-nēr | |
# - nērpu-nēr | |
# - nēr-nērpu | |
# nērpu-nēr is identical to nēr-nirai, | |
if len(syllables) == 3: | |
firstlight = initiallight.search(syllables[0]) | |
secondlight = light.search(syllables[1]) | |
thirdlight = light.search(syllables[2]) | |
# if the first is heavy, it is either nēr or nērpu | |
# since we are only entertaining the hypothesis of nēr-nirai insteard of nērpu-nēr, | |
# we are assuming it is nēr | |
if not firstlight: | |
acais[0] = { | |
"syllables": [ restoreDigraphs(syllables[0]) ], | |
"type":"nēr", | |
"quantity":"G" | |
} | |
# after a nēracai, only a nērpu or a nirai can follow | |
if not secondlight: | |
nerpu = cu.search(syllables[2]) | |
if nerpu: | |
acais[1] = { | |
"syllables": [ restoreDigraphs(syllables[1]), restoreDigraphs(syllables[2]) ], | |
"type": "nērpu", | |
"quantity":"Gu" | |
} | |
# otherwise there is some problem | |
else: | |
errorMessage(cir,str(index+1),poemnumber,linenumber) | |
discardedSyllables += len(syllables)+1 | |
# otherwise the second acai should be nirai | |
else: | |
acais[1] = { | |
"syllables": [ restoreDigraphs(syllables[1]), restoreDigraphs(syllables[2]) ], | |
"type":"nirai" | |
} | |
if thirdlight: | |
acais[1]["quantity"] = "LL" | |
else: | |
acais[1]["quantity"] = "LG" | |
# if the first is light, then the first acai must be nirai | |
else: | |
acais[0] = { | |
"syllables": [ restoreDigraphs(syllables[0]), restoreDigraphs(syllables[1]) ], | |
"type":"nirai" | |
} | |
if secondlight: | |
acais[0]["quantity"] = "LL" | |
else: | |
acais[0]["quantity"] = "LG" | |
# the third syllable makes up a nēr | |
acais[1] = { | |
"syllables": [ restoreDigraphs(syllables[2]) ], | |
"type":"nēr" | |
} | |
if thirdlight: | |
acais[1]["quantity"] = "L" | |
else: | |
acais[1]["quantity"] = "G" | |
newcirs.append(acais) | |
# if there are four syllables, the possibilities are: | |
# - nērpu-nirai (GL-LX) (this is similar to the following, | |
# but preferred when the last syllable is heavy) | |
# - nēr-niraipu (G-LXL) | |
# - nirai-nirai (LX-LX) | |
# - nirai-nērpu (LX-GL) | |
# - niraipu-nēr (LXL-G) (indistinguishable from nirai-nirai?) | |
if len(syllables) == 4: | |
firstlight = initiallight.search(syllables[0]) | |
secondlight = light.search(syllables[1]) | |
thirdlight = light.search(syllables[2]) | |
fourthlight = light.search(syllables[3]) | |
# if the first is light, it is nirai | |
# technically it could also be niraipu, but we treat | |
# niraipu-nēr as nirai-nirai here. | |
# this means that there is a zero incidence in the corpus | |
# of four-syllable cīr starting niraipu. | |
if firstlight: | |
acais[0] = { | |
"syllables": [ restoreDigraphs(syllables[0]), restoreDigraphs(syllables[1]) ], | |
"type":"nirai" | |
} | |
if secondlight: | |
acais[0]["quantity"] = "LL" | |
else: | |
acais[0]["quantity"] = "LG" | |
# from an initial nirai, the second acai could be either nirai or nērpu | |
# if the third syllable is light, it is nirai | |
if thirdlight: | |
acais[1] = { | |
"syllables": [ restoreDigraphs(syllables[2]), restoreDigraphs(syllables[3]) ], | |
"type": "nirai" | |
} | |
if fourthlight: | |
acais[1]["quantity"] = "LL" | |
else: | |
acais[1]["quantity"] = "LG" | |
# if the third syllable is heavy, it ought to be nērpu | |
else: | |
nerpu = cu.search(syllables[3]) | |
# if it is in fact a nērpu | |
if nerpu: | |
acais[1] = { | |
"syllables": [ restoreDigraphs(syllables[2]), restoreDigraphs(syllables[3]) ], | |
"type": "nērpu", | |
"quantity":"Gu" | |
} | |
# otherwise throw an error | |
else: | |
errorMessage(cir,str(index+1),poemnumber,linenumber) | |
discardedSyllables += len(syllables)+1 | |
# if the first syllable is heavy, it is either nēr or nērpu | |
# nērpu needs to followed by nirai, and nēr needs to be followed by niraipu | |
else: | |
# if the fourth syllable is heavy, it needs to be nērpu-nirai | |
# if the fourth syllable is light and not Cu, also take it to be nērpu-nirai | |
# if the fourth syllable is light and Cu, take it to be nēr-niraipu | |
# in all of them, the second syllable is light (otherwise error) | |
if secondlight: | |
if fourthlight: | |
niraipu = cu.search(syllables[3]) | |
if niraipu: # nēr-niraipu | |
acais[0] = { | |
"syllables": [ restoreDigraphs(syllables[0]) ], | |
"type":"nēr", | |
"quantity":"G" | |
} | |
acais[1] = { | |
"syllables": [ restoreDigraphs(syllables[1]), restoreDigraphs(syllables[2]), restoreDigraphs(syllables[3]) ], | |
"type":"niraipu" | |
} | |
if thirdlight: | |
acais[1]["quantity"] = "LLu" | |
else: | |
acais[1]["quantity"] = "LGu" | |
else: # then it is probably nērpu-nirai | |
nerpu = cu.search(syllables[1]) | |
if nerpu: | |
acais[0] = { | |
"syllables": [ restoreDigraphs(syllables[0]), restoreDigraphs(syllables[1]) ], | |
"type":"nērpu", | |
"quantity":"Gu" | |
} | |
if thirdlight: | |
acais[1] = { | |
"syllables": [ restoreDigraphs(syllables[2]), restoreDigraphs(syllables[3]) ], | |
"type":"nirai", | |
"quantity":"LL" | |
} | |
else: | |
errorMessage(cir,str(index+1),poemnumber,linenumber) | |
discardedSyllables += len(syllables)+1 | |
else: | |
errorMessage(cir,str(index+1),poemnumber,linenumber) | |
discardedSyllables += len(syllables)+1 | |
else: # if the fourth syllable is heavy, it must be nērpu-nirai | |
nerpu = cu.search(syllables[1]) | |
if nerpu: | |
acais[0] = { | |
"syllables": [ restoreDigraphs(syllables[0]), restoreDigraphs(syllables[1]) ], | |
"type":"nērpu", | |
"quantity":"Gu" | |
} | |
if thirdlight: | |
acais[1] = { | |
"syllables": [ restoreDigraphs(syllables[2]), restoreDigraphs(syllables[3]) ], | |
"type":"nirai", | |
"quantity":"LG" | |
} | |
else: | |
errorMessage(cir,str(index+1),poemnumber,linenumber) | |
discardedSyllables += len(syllables)+1 | |
else: | |
errorMessage(cir,str(index+1),poemnumber,linenumber) | |
discardedSyllables += len(syllables)+1 | |
else: | |
errorMessage(cir,str(index+1),poemnumber,linenumber) | |
discardedSyllables += len(syllables)+1 | |
newcirs.append(acais) | |
# if there are five syllables, | |
# the options are either niraipu-nirai | |
# or nirai-niraipu. | |
if len(syllables) == 5: | |
firstlight = initiallight.search(syllables[0]) | |
secondlight = light.search(syllables[1]) | |
thirdlight = light.search(syllables[2]) | |
fourthlight = light.search(syllables[3]) | |
fifthlight = light.search(syllables[4]) | |
if firstlight: | |
# if the fifth syllable is heavy, it must be niraipu-nirai | |
if not fifthlight: | |
niraipu = cu.search(syllables[2]) | |
if niraipu: # make sure the third syllable is Cu! | |
acais[0] = { | |
"syllables": [ restoreDigraphs(syllables[0]), restoreDigraphs(syllables[1]), restoreDigraphs(syllables[2]) ], | |
"type":"niraipu" | |
} | |
if secondlight: | |
acais[0]["quantity"] = "LLu" | |
else: | |
acais[0]["quantity"] = "LGu" | |
if fourthlight: | |
acais[1] = { | |
"syllables": [ restoreDigraphs(syllables[3]), restoreDigraphs(syllables[4]) ], | |
"type":"nirai" | |
} | |
if fifthlight: | |
acais[1]["quantity"] = "LL" | |
else: | |
acais[1]["quantity"] = "LG" | |
else: | |
errorMessage(cir,str(index+1),poemnumber,linenumber) | |
discardedSyllables += len(syllables)+1 | |
else: | |
errorMessage(cir,str(index+1),poemnumber,linenumber) | |
discardedSyllables += len(syllables)+1 | |
# if the fifth syllable is light, then | |
# whether it is niraipu-nirai or nirai-niraipu | |
# will depend on whether the final syllable (and the third) | |
# takes the required shape. | |
else: | |
secondniraipu = cu.search(syllables[4]) | |
if secondniraipu: | |
if thirdlight: | |
acais[0] = { | |
"syllables": [ restoreDigraphs(syllables[0]), restoreDigraphs(syllables[1]) ], | |
"type":"nirai" | |
} | |
if secondlight: | |
acais[0]["quantity"] = "LL" | |
else: | |
acais[0]["quantity"] = "LG" | |
acais[1] = { | |
"syllables": [ restoreDigraphs(syllables[2]), restoreDigraphs(syllables[3]), restoreDigraphs(syllables[4]) ], | |
"type":"niraipu" | |
} | |
if fourthlight: | |
acais[1]["quantity"] = "LLu" | |
else: | |
acais[1]["quantity"] = "LGu" | |
else: | |
errorMessage(cir,str(index+1),poemnumber,linenumber) | |
discardedSyllables += len(syllables)+1 | |
else: # if the final syllable is not a candidate for niraipu, then let's hope the third is. | |
firstniraipu = cu.search(syllables[2]) | |
if firstniraipu: | |
acais[0] = { | |
"syllables": [ restoreDigraphs(syllables[0]), restoreDigraphs(syllables[1]), restoreDigraphs(syllables[2]) ], | |
"type":"niraipu" | |
} | |
if secondlight: | |
acais[0]["quantity"] = "LLu" | |
else: | |
acais[0]["quantity"] = "LGu" | |
acais[1] = { | |
"syllables": [ restoreDigraphs(syllables[3]), restoreDigraphs(syllables[4]) ], | |
"type":"nirai" | |
} | |
if fifthlight: | |
acais[1]["quantity"] = "LL" | |
else: # this should already be covered by the above case, but just in case... | |
acais[1]["quantity"] = "LG" | |
else: | |
errorMessage(cir,str(index+1),poemnumber,linenumber) | |
discardedSyllables += len(syllables)+1 | |
# if the first syllable is heavy, then we have a problem... | |
else: | |
errorMessage(cir,str(index+1),poemnumber,linenumber) | |
discardedSyllables += len(syllables)+1 | |
newcirs.append(acais) | |
if len(syllables) == 6: | |
errorMessage(cir,str(index+1),poemnumber,linenumber) | |
discardedSyllables += len(syllables)+1 | |
return newcirs | |
def syllabize(string): | |
# returns an array of syllables | |
syllables = [] | |
index = 0 | |
while index < len(string): | |
thisletter = string[index] | |
if vowels.match(thisletter): | |
thissyllable = '' | |
# check to see if there is an onset | |
# NB in tamil we only need to check for ONE onset. | |
try: | |
prevletter = string[index-1] | |
if not vowels.match(prevletter): | |
thissyllable = prevletter | |
# in case of an indexerror, the vowel is first | |
except IndexError: | |
thissyllable = '' | |
# check to see if there is a coda | |
try: | |
nextletter = string[index+1] | |
if not vowels.match(nextletter): # the next letter is a consonant, keep checking | |
try: | |
nextnextletter = string[index+2] | |
if vowels.match(nextnextletter): # C*V-CV, finished | |
thissyllable = thissyllable+thisletter | |
else: # C*VCC, keep checking | |
try: | |
nextnextnextletter = string[index+3] | |
if vowels.match(nextnextnextletter): # C*VC-CV, finished | |
thissyllable = thissyllable+thisletter+nextletter | |
else: # C*VCC.C(V), finished | |
thissyllable = thissyllable+thisletter+nextletter+nextnextletter | |
# in case of an indexerror, C*VCC, finished | |
except IndexError: | |
thissyllable = thissyllable+thisletter+nextletter+nextnextletter | |
# in case of an indexerror, C*VC, finished | |
except IndexError: | |
thissyllable = thissyllable+thisletter+nextletter | |
# if the next letter is a vowel, C*V-V, finished | |
else: | |
thissyllable = thissyllable+thisletter | |
# in case of an indexerror, C*V, finished | |
except IndexError: | |
thissyllable = thissyllable+thisletter | |
syllables.append(thissyllable) | |
index += 1 | |
return syllables | |
def scanLines(poem,index): | |
global poems | |
scansion = [] | |
for index, line in enumerate(poem["lines"]): | |
scannedLine = scanLine(line,poem["metadata"]["number"],str(index+1)) | |
scansion.append(scannedLine) | |
poem["scansion"] = scansion | |
poems[index] = poem | |
def statistics(): | |
strong = { | |
"nēr": { "G": 0, "L": 0 }, | |
"nirai": { "LG": 0, "LL": 0 }, | |
"nērpu": { "Gu": 0 }, | |
"niraipu": { "LGu": 0, "LLu": 0 } | |
} | |
weak = { | |
"nēr": { "G": 0, "L": 0 }, | |
"nirai": { "LG": 0, "LL": 0 }, | |
"nērpu": { "Gu": 0 }, | |
"niraipu": { "LGu": 0, "LLu": 0 } | |
} | |
cirdata = { | |
"G": { "G": 0, "L": 0, "Gu": 0, "LL": 0, "LG": 0, "LLu": 0, "LGu": 0 }, | |
"L": { "G": 0, "L": 0, "Gu": 0, "LL": 0, "LG": 0, "LLu": 0, "LGu": 0 }, | |
"Gu": { "G": 0, "L": 0, "Gu": 0, "LL": 0, "LG": 0, "LLu": 0, "LGu": 0 }, | |
"LL": { "G": 0, "L": 0, "Gu": 0, "LL": 0, "LG": 0, "LLu": 0, "LGu": 0 }, | |
"LG": { "G": 0, "L": 0, "Gu": 0, "LL": 0, "LG": 0, "LLu": 0, "LGu": 0 }, | |
"LLu": { "G": 0, "L": 0, "Gu": 0, "LL": 0, "LG": 0, "LLu": 0, "LGu": 0 }, | |
"LGu": { "G": 0, "L": 0, "Gu": 0, "LL": 0, "LG": 0, "LLu": 0, "LGu": 0 } | |
} | |
lstrong = { "1": 0, "2": 0, "3": 0, "4": 0, "5": 0 } | |
for poem in poems: | |
for line in poem["scansion"]: | |
for cirno, cir in enumerate(line): | |
try: | |
if "type" in cir[0] and "quantity" in cir[0]: | |
strong[cir[0]["type"]][cir[0]["quantity"]] += 1 | |
if cir[0]["quantity"] == "L": | |
lstrong[str(cirno+1)] += 1 | |
try: | |
if "type" in cir[1] and "quantity" in cir[1]: | |
weak[cir[1]["type"]][cir[1]["quantity"]] += 1 | |
cirdata[cir[0]["quantity"]][cir[1]["quantity"]] += 1 | |
except IndexError: | |
print('indexerror for line '+line) | |
except IndexError: | |
print('indexerror for line '+line) | |
totalstrongner = strong["nēr"]["L"] + strong["nēr"]["G"] + strong["nērpu"]["Gu"] | |
totalstrongnirai = strong["nirai"]["LL"] + strong["nirai"]["LG"] + strong["niraipu"]["LLu"] + strong["niraipu"]["LGu"] | |
totalstrong = totalstrongner + totalstrongnirai | |
totalweakner = weak["nēr"]["L"] + weak["nēr"]["G"] + weak["nērpu"]["Gu"] | |
totalweaknirai = weak["nirai"]["LL"] + weak["nirai"]["LG"] + weak["niraipu"]["LLu"] + weak["niraipu"]["LGu"] | |
totalweak = totalweakner + totalweaknirai | |
return ''' | |
Split in strong positions: | |
Nēr (including nērpu): '''+str(totalstrongner)+''' or '''+str(round((totalstrongner/totalstrong)*100,3))+'''% | |
- G ('''+str(strong["nēr"]["G"])+''' or '''+str(round((strong["nēr"]["G"]/totalstrongner)*100,3))+'''%) | |
- L ('''+str(strong["nēr"]["L"])+''' or '''+str(round((strong["nēr"]["L"]/totalstrongner)*100,3))+'''%) | |
- Gu ('''+str(strong["nērpu"]["Gu"])+''' or '''+str(round((strong["nērpu"]["Gu"]/totalstrongner)*100,3))+'''%) | |
Nirai (including niraipu): '''+str(totalstrongnirai)+''' or '''+str(round((totalstrongnirai/totalstrong)*100,3))+'''% | |
- LL ('''+str(strong["nirai"]["LL"])+''' or '''+str(round((strong["nirai"]["LL"]/totalstrongnirai)*100,3))+'''%) | |
- LG ('''+str(strong["nirai"]["LG"])+''' or '''+str(round((strong["nirai"]["LG"]/totalstrongnirai)*100,3))+'''%) | |
- LLu ('''+str(strong["niraipu"]["LLu"])+''' or '''+str(round((strong["niraipu"]["LLu"]/totalstrongnirai)*100,3))+'''%) | |
- LGu ('''+str(strong["niraipu"]["LGu"])+''' or '''+str(round((strong["niraipu"]["LGu"]/totalstrongnirai)*100,3))+'''%) | |
Split in weak positions: | |
Nēr (including nērpu): '''+str(totalweakner)+''' or '''+str(round((totalweakner/totalweak)*100,3))+'''% | |
- G ('''+str(weak["nēr"]["G"])+''' or '''+str(round((weak["nēr"]["G"]/totalweakner)*100,3))+'''%) | |
- L ('''+str(weak["nēr"]["L"])+''' or '''+str(round((weak["nēr"]["L"]/totalweakner)*100,3))+'''%) | |
- Gu ('''+str(weak["nērpu"]["Gu"])+''' or '''+str(round((weak["nērpu"]["Gu"]/totalstrongner)*100,3))+'''%) | |
Nirai (including niraipu): '''+str(totalweaknirai)+''' or '''+str(round((totalweaknirai/totalweak)*100,3))+'''% | |
- LL ('''+str(weak["nirai"]["LL"])+''' or '''+str(round((weak["nirai"]["LL"]/totalweaknirai)*100,3))+'''%) | |
- LG ('''+str(weak["nirai"]["LG"])+''' or '''+str(round((weak["nirai"]["LG"]/totalweaknirai)*100,3))+'''%) | |
- LLu ('''+str(weak["niraipu"]["LLu"])+''' or '''+str(round((weak["niraipu"]["LLu"]/totalweaknirai)*100,3))+'''%) | |
- LGu ('''+str(weak["niraipu"]["LGu"])+''' or '''+str(round((weak["niraipu"]["LGu"]/totalweaknirai)*100,3))+'''%) | |
Number of nēr-acai with a L in strong position throughout the line: | |
- First cīr: '''+str(lstrong["1"])+''' | |
- Second cīr: '''+str(lstrong["2"])+''' | |
- Third cīr: '''+str(lstrong["3"])+''' | |
- Fourth cīr: '''+str(lstrong["4"])+''' | |
- Fifth cīr: '''+str(lstrong["5"])+''' | |
Some conditional probabilities for a cīr: | |
'''+json.dumps(cirdata,indent=4,sort_keys=True,ensure_ascii=False) | |
convertTxtToJson(f) | |
for index, poem in enumerate(poems): | |
scanLines(poem,index) | |
logFile.write("Scanned "+str(len(poems))+" poems, with "+str(totalSyllables)+" syllables.\n") | |
if discardedSyllables > 1: | |
percent = round((discardedSyllables/totalSyllables)*100,3) | |
logFile.write('''I was unable to parse '''+str(discardedSyllables)+''' syllables ('''+str(percent)+'''%). | |
Possible reasons for failure: | |
- The text is incorrect. | |
- CVR should be counted as light (the parser counts it as heavy).\n\n''') | |
logFile.write(log) | |
logFile.write(statistics()) | |
logFile.close() | |
jsonFile.write(json.dumps(poems, indent=4, sort_keys=True, ensure_ascii=False)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment