Last active
April 6, 2019 21:36
-
-
Save aso2101/83d418147908bcfa6a295e6c913b6ad5 to your computer and use it in GitHub Desktop.
A python script for parsing Tamil verse into metrical units
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" Usage: python3 versify.py FILENAME """ | |
""" Results in FILENAME.log (errors and statistics) | |
and FILENAME.json (a json file of metrically parsed text) """ | |
""" This program expects the text to be in the format | |
represented by the GRETIL Kuṟuntokai | |
(http://gretil.sub.uni-goettingen.de/gretil/4_drav/tamil/pm/pm110__u.htm) | |
namely: the | |
poem comes on a line such as: | |
103. neytal - talaivi kūṟṟu | |
then it is followed by a space, then each of the lines | |
of the poem, and then | |
-vāyilāṉ tēvaṉār. | |
which gives the author. """ | |
import sys | |
import os | |
import re | |
import json | |
import collections | |
f = open(sys.argv[1],'r') | |
logFile = open(os.path.splitext(sys.argv[1])[0] + '.log','w') | |
jsonFile = open(os.path.splitext(sys.argv[1])[0] + '.json','w') | |
log = '' | |
totalSyllables = 0 | |
discardedSyllables = 0 | |
errorCount = 0 | |
authorline = re.compile(r'^-{1}([^-].*)') # group 1 is the author | |
titleline = re.compile(r'(\d+)\. ([^\s]+) - (.*)') # group 1 is the number, 2 is the tiṇai, and 3 is ? | |
vowels = re.compile(r'[aiuāĩũãīūeoēōYW]') | |
longvowels = re.compile(r'[āīūēō]') | |
light = re.compile(r'([kṅcñṭṇtnpmyrlvḷḻṟṉ])*([aiueoYW])$') | |
# for the first syllable in a cīr, count ai and au as heavy (not otherwise) | |
initiallight = re.compile(r'([kṅcñṭṇtnpmyrlvḷḻṟṉ])*([aiueo])$') | |
cu = re.compile(r'([kṅcñṭṇtnpmyrlvḷḻṟṉ])*(u)$') | |
poems = [] | |
def replaceDigraphs(string): | |
digraphs = { "ai":"Y", "au":"W" } | |
output = string | |
for k, v in digraphs.items(): | |
output = output.replace(k,v) | |
return output | |
def restoreDigraphs(string): | |
digraphs = { "Y":"ai", "W":"au" } | |
output = string | |
for k, v in digraphs.items(): | |
output = output.replace(k,v) | |
return output | |
def convertTxtToJson(textfile): | |
global poems | |
poem = collections.OrderedDict() | |
metadata = collections.OrderedDict() | |
lines = [] | |
while True: | |
currentLine = textfile.readline() | |
if not currentLine: break # EOF | |
else: | |
numbermatch = titleline.search(currentLine) | |
authormatch = authorline.search(currentLine) | |
if numbermatch: # if the reader matches a regex for the number | |
metadata["number"] = numbermatch.group(1) | |
metadata["landscape"] = numbermatch.group(2) | |
metadata["direction"] = numbermatch.group(3) | |
if authormatch: # if the reader matches a regex for the title | |
metadata["author"] = authormatch.group(1).replace('.','') | |
poem["lines"] = lines | |
poem["metadata"] = metadata | |
poems.append(poem) | |
poem = collections.OrderedDict() | |
metadata = collections.OrderedDict() | |
lines = [] | |
if authormatch == None and numbermatch == None: | |
if "number" in metadata: | |
if currentLine.strip(): | |
lines.append(currentLine.strip().replace('.','')) | |
def errorMessage(cir,cirnumber,poemnumber,linenumber): | |
global log | |
global errorCount | |
errorCount += 1 | |
log = log+'Error '+str(errorCount)+': Poem no. '+poemnumber+', line '+linenumber+', cīr no. '+cirnumber+': '+restoreDigraphs(cir)+'\n' | |
def scanLine(line,poemnumber,linenumber): | |
# this gives an array of cīrs, | |
# and each cīr is an array of acai, | |
# and each acai is a dictionary consisting of the syllables in the text, | |
# the prosodic values (L and G), and the technical term. | |
global log | |
global totalSyllables | |
global discardedSyllables | |
line = replaceDigraphs(line) | |
sandhi = re.compile('([kṅcñṭṇtnpmyrlvḷḻṟṉ]) ([aāiīuūeēoōYW])') | |
sandhimatch = sandhi.search(line) | |
if sandhimatch: | |
line = line.replace(sandhimatch.group(0),' '+sandhimatch.group(1)+sandhimatch.group(2)) | |
cirs = line.split(' ') | |
newcirs = [] | |
for index, cir in enumerate(cirs): | |
# this will ONLY work if there are a maximum of two acais per cīr | |
acais = [ {}, {} ] | |
syllables = syllabize(cir) | |
totalSyllables += len(syllables)+1 | |
# if there are two syllables, it must be nēr-nēr | |
if len(syllables) == 2: | |
firstlight = initiallight.search(syllables[0]) | |
secondlight = light.search(syllables[1]) | |
acais[0] = { | |
"syllables": [ restoreDigraphs(syllables[0]) ], | |
"type": "nēr", | |
} | |
if firstlight: | |
acais[0]["quantity"] = "L" | |
else: | |
acais[0]["quantity"] = "G" | |
acais[1] = { | |
"syllables": [ restoreDigraphs(syllables[1]) ], | |
"type": "nēr" | |
} | |
if secondlight: | |
acais[1]["quantity"] = "L" | |
else: | |
acais[1]["quantity"] = "G" | |
newcirs.append(acais) | |
# if there are three syllables, it can be either: | |
# - nēr-nirai | |
# - nirai-nēr | |
# - nērpu-nēr | |
# - nēr-nērpu | |
# nērpu-nēr is identical to nēr-nirai, | |
if len(syllables) == 3: | |
firstlight = initiallight.search(syllables[0]) | |
secondlight = light.search(syllables[1]) | |
thirdlight = light.search(syllables[2]) | |
# if the first is heavy, it is either nēr or nērpu | |
# since we are only entertaining the hypothesis of nēr-nirai insteard of nērpu-nēr, | |
# we are assuming it is nēr | |
if not firstlight: | |
acais[0] = { | |
"syllables": [ restoreDigraphs(syllables[0]) ], | |
"type":"nēr", | |
"quantity":"G" | |
} | |
# after a nēracai, only a nērpu or a nirai can follow | |
if not secondlight: | |
nerpu = cu.search(syllables[2]) | |
if nerpu: | |
acais[1] = { | |
"syllables": [ restoreDigraphs(syllables[1]), restoreDigraphs(syllables[2]) ], | |
"type": "nērpu", | |
"quantity":"Gu" | |
} | |
# otherwise there is some problem | |
else: | |
errorMessage(cir,str(index+1),poemnumber,linenumber) | |
discardedSyllables += len(syllables)+1 | |
# otherwise the second acai should be nirai | |
else: | |
acais[1] = { | |
"syllables": [ restoreDigraphs(syllables[1]), restoreDigraphs(syllables[2]) ], | |
"type":"nirai" | |
} | |
if thirdlight: | |
acais[1]["quantity"] = "LL" | |
else: | |
acais[1]["quantity"] = "LG" | |
# if the first is light, then the first acai must be nirai | |
else: | |
acais[0] = { | |
"syllables": [ restoreDigraphs(syllables[0]), restoreDigraphs(syllables[1]) ], | |
"type":"nirai" | |
} | |
if secondlight: | |
acais[0]["quantity"] = "LL" | |
else: | |
acais[0]["quantity"] = "LG" | |
# the third syllable makes up a nēr | |
acais[1] = { | |
"syllables": [ restoreDigraphs(syllables[2]) ], | |
"type":"nēr" | |
} | |
if thirdlight: | |
acais[1]["quantity"] = "L" | |
else: | |
acais[1]["quantity"] = "G" | |
newcirs.append(acais) | |
# if there are four syllables, the possibilities are: | |
# - nērpu-nirai (GL-LX) (this is similar to the following, | |
# but preferred when the last syllable is heavy) | |
# - nēr-niraipu (G-LXL) | |
# - nirai-nirai (LX-LX) | |
# - nirai-nērpu (LX-GL) | |
# - niraipu-nēr (LXL-G) (indistinguishable from nirai-nirai?) | |
if len(syllables) == 4: | |
firstlight = initiallight.search(syllables[0]) | |
secondlight = light.search(syllables[1]) | |
thirdlight = light.search(syllables[2]) | |
fourthlight = light.search(syllables[3]) | |
# if the first is light, it is nirai | |
# technically it could also be niraipu, but we treat | |
# niraipu-nēr as nirai-nirai here. | |
# this means that there is a zero incidence in the corpus | |
# of four-syllable cīr starting niraipu. | |
if firstlight: | |
acais[0] = { | |
"syllables": [ restoreDigraphs(syllables[0]), restoreDigraphs(syllables[1]) ], | |
"type":"nirai" | |
} | |
if secondlight: | |
acais[0]["quantity"] = "LL" | |
else: | |
acais[0]["quantity"] = "LG" | |
# from an initial nirai, the second acai could be either nirai or nērpu | |
# if the third syllable is light, it is nirai | |
if thirdlight: | |
acais[1] = { | |
"syllables": [ restoreDigraphs(syllables[2]), restoreDigraphs(syllables[3]) ], | |
"type": "nirai" | |
} | |
if fourthlight: | |
acais[1]["quantity"] = "LL" | |
else: | |
acais[1]["quantity"] = "LG" | |
# if the third syllable is heavy, it ought to be nērpu | |
else: | |
nerpu = cu.search(syllables[3]) | |
# if it is in fact a nērpu | |
if nerpu: | |
acais[1] = { | |
"syllables": [ restoreDigraphs(syllables[2]), restoreDigraphs(syllables[3]) ], | |
"type": "nērpu", | |
"quantity":"Gu" | |
} | |
# otherwise throw an error | |
else: | |
errorMessage(cir,str(index+1),poemnumber,linenumber) | |
discardedSyllables += len(syllables)+1 | |
# if the first syllable is heavy, it is either nēr or nērpu | |
# nērpu needs to followed by nirai, and nēr needs to be followed by niraipu | |
else: | |
# if the fourth syllable is heavy, it needs to be nērpu-nirai | |
# if the fourth syllable is light and not Cu, also take it to be nērpu-nirai | |
# if the fourth syllable is light and Cu, take it to be nēr-niraipu | |
# in all of them, the second syllable is light (otherwise error) | |
if secondlight: | |
if fourthlight: | |
niraipu = cu.search(syllables[3]) | |
if niraipu: # nēr-niraipu | |
acais[0] = { | |
"syllables": [ restoreDigraphs(syllables[0]) ], | |
"type":"nēr", | |
"quantity":"G" | |
} | |
acais[1] = { | |
"syllables": [ restoreDigraphs(syllables[1]), restoreDigraphs(syllables[2]), restoreDigraphs(syllables[3]) ], | |
"type":"niraipu" | |
} | |
if thirdlight: | |
acais[1]["quantity"] = "LLu" | |
else: | |
acais[1]["quantity"] = "LGu" | |
else: # then it is probably nērpu-nirai | |
nerpu = cu.search(syllables[1]) | |
if nerpu: | |
acais[0] = { | |
"syllables": [ restoreDigraphs(syllables[0]), restoreDigraphs(syllables[1]) ], | |
"type":"nērpu", | |
"quantity":"Gu" | |
} | |
if thirdlight: | |
acais[1] = { | |
"syllables": [ restoreDigraphs(syllables[2]), restoreDigraphs(syllables[3]) ], | |
"type":"nirai", | |
"quantity":"LL" | |
} | |
else: | |
errorMessage(cir,str(index+1),poemnumber,linenumber) | |
discardedSyllables += len(syllables)+1 | |
else: | |
errorMessage(cir,str(index+1),poemnumber,linenumber) | |
discardedSyllables += len(syllables)+1 | |
else: # if the fourth syllable is heavy, it must be nērpu-nirai | |
nerpu = cu.search(syllables[1]) | |
if nerpu: | |
acais[0] = { | |
"syllables": [ restoreDigraphs(syllables[0]), restoreDigraphs(syllables[1]) ], | |
"type":"nērpu", | |
"quantity":"Gu" | |
} | |
if thirdlight: | |
acais[1] = { | |
"syllables": [ restoreDigraphs(syllables[2]), restoreDigraphs(syllables[3]) ], | |
"type":"nirai", | |
"quantity":"LG" | |
} | |
else: | |
errorMessage(cir,str(index+1),poemnumber,linenumber) | |
discardedSyllables += len(syllables)+1 | |
else: | |
errorMessage(cir,str(index+1),poemnumber,linenumber) | |
discardedSyllables += len(syllables)+1 | |
else: | |
errorMessage(cir,str(index+1),poemnumber,linenumber) | |
discardedSyllables += len(syllables)+1 | |
newcirs.append(acais) | |
# if there are five syllables, | |
# the options are either niraipu-nirai | |
# or nirai-niraipu. | |
if len(syllables) == 5: | |
firstlight = initiallight.search(syllables[0]) | |
secondlight = light.search(syllables[1]) | |
thirdlight = light.search(syllables[2]) | |
fourthlight = light.search(syllables[3]) | |
fifthlight = light.search(syllables[4]) | |
if firstlight: | |
# if the fifth syllable is heavy, it must be niraipu-nirai | |
if not fifthlight: | |
niraipu = cu.search(syllables[2]) | |
if niraipu: # make sure the third syllable is Cu! | |
acais[0] = { | |
"syllables": [ restoreDigraphs(syllables[0]), restoreDigraphs(syllables[1]), restoreDigraphs(syllables[2]) ], | |
"type":"niraipu" | |
} | |
if secondlight: | |
acais[0]["quantity"] = "LLu" | |
else: | |
acais[0]["quantity"] = "LGu" | |
if fourthlight: | |
acais[1] = { | |
"syllables": [ restoreDigraphs(syllables[3]), restoreDigraphs(syllables[4]) ], | |
"type":"nirai" | |
} | |
if fifthlight: | |
acais[1]["quantity"] = "LL" | |
else: | |
acais[1]["quantity"] = "LG" | |
else: | |
errorMessage(cir,str(index+1),poemnumber,linenumber) | |
discardedSyllables += len(syllables)+1 | |
else: | |
errorMessage(cir,str(index+1),poemnumber,linenumber) | |
discardedSyllables += len(syllables)+1 | |
# if the fifth syllable is light, then | |
# whether it is niraipu-nirai or nirai-niraipu | |
# will depend on whether the final syllable (and the third) | |
# takes the required shape. | |
else: | |
secondniraipu = cu.search(syllables[4]) | |
if secondniraipu: | |
if thirdlight: | |
acais[0] = { | |
"syllables": [ restoreDigraphs(syllables[0]), restoreDigraphs(syllables[1]) ], | |
"type":"nirai" | |
} | |
if secondlight: | |
acais[0]["quantity"] = "LL" | |
else: | |
acais[0]["quantity"] = "LG" | |
acais[1] = { | |
"syllables": [ restoreDigraphs(syllables[2]), restoreDigraphs(syllables[3]), restoreDigraphs(syllables[4]) ], | |
"type":"niraipu" | |
} | |
if fourthlight: | |
acais[1]["quantity"] = "LLu" | |
else: | |
acais[1]["quantity"] = "LGu" | |
else: | |
errorMessage(cir,str(index+1),poemnumber,linenumber) | |
discardedSyllables += len(syllables)+1 | |
else: # if the final syllable is not a candidate for niraipu, then let's hope the third is. | |
firstniraipu = cu.search(syllables[2]) | |
if firstniraipu: | |
acais[0] = { | |
"syllables": [ restoreDigraphs(syllables[0]), restoreDigraphs(syllables[1]), restoreDigraphs(syllables[2]) ], | |
"type":"niraipu" | |
} | |
if secondlight: | |
acais[0]["quantity"] = "LLu" | |
else: | |
acais[0]["quantity"] = "LGu" | |
acais[1] = { | |
"syllables": [ restoreDigraphs(syllables[3]), restoreDigraphs(syllables[4]) ], | |
"type":"nirai" | |
} | |
if fifthlight: | |
acais[1]["quantity"] = "LL" | |
else: # this should already be covered by the above case, but just in case... | |
acais[1]["quantity"] = "LG" | |
else: | |
errorMessage(cir,str(index+1),poemnumber,linenumber) | |
discardedSyllables += len(syllables)+1 | |
# if the first syllable is heavy, then we have a problem... | |
else: | |
errorMessage(cir,str(index+1),poemnumber,linenumber) | |
discardedSyllables += len(syllables)+1 | |
newcirs.append(acais) | |
if len(syllables) == 6: | |
errorMessage(cir,str(index+1),poemnumber,linenumber) | |
discardedSyllables += len(syllables)+1 | |
return newcirs | |
def syllabize(string): | |
# returns an array of syllables | |
syllables = [] | |
index = 0 | |
while index < len(string): | |
thisletter = string[index] | |
if vowels.match(thisletter): | |
thissyllable = '' | |
# check to see if there is an onset | |
# NB in tamil we only need to check for ONE onset. | |
try: | |
prevletter = string[index-1] | |
if not vowels.match(prevletter): | |
thissyllable = prevletter | |
# in case of an indexerror, the vowel is first | |
except IndexError: | |
thissyllable = '' | |
# check to see if there is a coda | |
try: | |
nextletter = string[index+1] | |
if not vowels.match(nextletter): # the next letter is a consonant, keep checking | |
try: | |
nextnextletter = string[index+2] | |
if vowels.match(nextnextletter): # C*V-CV, finished | |
thissyllable = thissyllable+thisletter | |
else: # C*VCC, keep checking | |
try: | |
nextnextnextletter = string[index+3] | |
if vowels.match(nextnextnextletter): # C*VC-CV, finished | |
thissyllable = thissyllable+thisletter+nextletter | |
else: # C*VCC.C(V), finished | |
thissyllable = thissyllable+thisletter+nextletter+nextnextletter | |
# in case of an indexerror, C*VCC, finished | |
except IndexError: | |
thissyllable = thissyllable+thisletter+nextletter+nextnextletter | |
# in case of an indexerror, C*VC, finished | |
except IndexError: | |
thissyllable = thissyllable+thisletter+nextletter | |
# if the next letter is a vowel, C*V-V, finished | |
else: | |
thissyllable = thissyllable+thisletter | |
# in case of an indexerror, C*V, finished | |
except IndexError: | |
thissyllable = thissyllable+thisletter | |
syllables.append(thissyllable) | |
index += 1 | |
return syllables | |
def scanLines(poem,index): | |
global poems | |
scansion = [] | |
for index, line in enumerate(poem["lines"]): | |
scannedLine = scanLine(line,poem["metadata"]["number"],str(index+1)) | |
scansion.append(scannedLine) | |
poem["scansion"] = scansion | |
poems[index] = poem | |
def statistics(): | |
strong = { | |
"nēr": { "G": 0, "L": 0 }, | |
"nirai": { "LG": 0, "LL": 0 }, | |
"nērpu": { "Gu": 0 }, | |
"niraipu": { "LGu": 0, "LLu": 0 } | |
} | |
weak = { | |
"nēr": { "G": 0, "L": 0 }, | |
"nirai": { "LG": 0, "LL": 0 }, | |
"nērpu": { "Gu": 0 }, | |
"niraipu": { "LGu": 0, "LLu": 0 } | |
} | |
cirdata = { | |
"G": { "G": 0, "L": 0, "Gu": 0, "LL": 0, "LG": 0, "LLu": 0, "LGu": 0 }, | |
"L": { "G": 0, "L": 0, "Gu": 0, "LL": 0, "LG": 0, "LLu": 0, "LGu": 0 }, | |
"Gu": { "G": 0, "L": 0, "Gu": 0, "LL": 0, "LG": 0, "LLu": 0, "LGu": 0 }, | |
"LL": { "G": 0, "L": 0, "Gu": 0, "LL": 0, "LG": 0, "LLu": 0, "LGu": 0 }, | |
"LG": { "G": 0, "L": 0, "Gu": 0, "LL": 0, "LG": 0, "LLu": 0, "LGu": 0 }, | |
"LLu": { "G": 0, "L": 0, "Gu": 0, "LL": 0, "LG": 0, "LLu": 0, "LGu": 0 }, | |
"LGu": { "G": 0, "L": 0, "Gu": 0, "LL": 0, "LG": 0, "LLu": 0, "LGu": 0 } | |
} | |
lstrong = { "1": 0, "2": 0, "3": 0, "4": 0, "5": 0 } | |
for poem in poems: | |
for line in poem["scansion"]: | |
for cirno, cir in enumerate(line): | |
try: | |
if "type" in cir[0] and "quantity" in cir[0]: | |
strong[cir[0]["type"]][cir[0]["quantity"]] += 1 | |
if cir[0]["quantity"] == "L": | |
lstrong[str(cirno+1)] += 1 | |
try: | |
if "type" in cir[1] and "quantity" in cir[1]: | |
weak[cir[1]["type"]][cir[1]["quantity"]] += 1 | |
cirdata[cir[0]["quantity"]][cir[1]["quantity"]] += 1 | |
except IndexError: | |
print('indexerror for line '+line) | |
except IndexError: | |
print('indexerror for line '+line) | |
totalstrongner = strong["nēr"]["L"] + strong["nēr"]["G"] + strong["nērpu"]["Gu"] | |
totalstrongnirai = strong["nirai"]["LL"] + strong["nirai"]["LG"] + strong["niraipu"]["LLu"] + strong["niraipu"]["LGu"] | |
totalstrong = totalstrongner + totalstrongnirai | |
totalweakner = weak["nēr"]["L"] + weak["nēr"]["G"] + weak["nērpu"]["Gu"] | |
totalweaknirai = weak["nirai"]["LL"] + weak["nirai"]["LG"] + weak["niraipu"]["LLu"] + weak["niraipu"]["LGu"] | |
totalweak = totalweakner + totalweaknirai | |
return ''' | |
Split in strong positions: | |
Nēr (including nērpu): '''+str(totalstrongner)+''' or '''+str(round((totalstrongner/totalstrong)*100,3))+'''% | |
- G ('''+str(strong["nēr"]["G"])+''' or '''+str(round((strong["nēr"]["G"]/totalstrongner)*100,3))+'''%) | |
- L ('''+str(strong["nēr"]["L"])+''' or '''+str(round((strong["nēr"]["L"]/totalstrongner)*100,3))+'''%) | |
- Gu ('''+str(strong["nērpu"]["Gu"])+''' or '''+str(round((strong["nērpu"]["Gu"]/totalstrongner)*100,3))+'''%) | |
Nirai (including niraipu): '''+str(totalstrongnirai)+''' or '''+str(round((totalstrongnirai/totalstrong)*100,3))+'''% | |
- LL ('''+str(strong["nirai"]["LL"])+''' or '''+str(round((strong["nirai"]["LL"]/totalstrongnirai)*100,3))+'''%) | |
- LG ('''+str(strong["nirai"]["LG"])+''' or '''+str(round((strong["nirai"]["LG"]/totalstrongnirai)*100,3))+'''%) | |
- LLu ('''+str(strong["niraipu"]["LLu"])+''' or '''+str(round((strong["niraipu"]["LLu"]/totalstrongnirai)*100,3))+'''%) | |
- LGu ('''+str(strong["niraipu"]["LGu"])+''' or '''+str(round((strong["niraipu"]["LGu"]/totalstrongnirai)*100,3))+'''%) | |
Split in weak positions: | |
Nēr (including nērpu): '''+str(totalweakner)+''' or '''+str(round((totalweakner/totalweak)*100,3))+'''% | |
- G ('''+str(weak["nēr"]["G"])+''' or '''+str(round((weak["nēr"]["G"]/totalweakner)*100,3))+'''%) | |
- L ('''+str(weak["nēr"]["L"])+''' or '''+str(round((weak["nēr"]["L"]/totalweakner)*100,3))+'''%) | |
- Gu ('''+str(weak["nērpu"]["Gu"])+''' or '''+str(round((weak["nērpu"]["Gu"]/totalstrongner)*100,3))+'''%) | |
Nirai (including niraipu): '''+str(totalweaknirai)+''' or '''+str(round((totalweaknirai/totalweak)*100,3))+'''% | |
- LL ('''+str(weak["nirai"]["LL"])+''' or '''+str(round((weak["nirai"]["LL"]/totalweaknirai)*100,3))+'''%) | |
- LG ('''+str(weak["nirai"]["LG"])+''' or '''+str(round((weak["nirai"]["LG"]/totalweaknirai)*100,3))+'''%) | |
- LLu ('''+str(weak["niraipu"]["LLu"])+''' or '''+str(round((weak["niraipu"]["LLu"]/totalweaknirai)*100,3))+'''%) | |
- LGu ('''+str(weak["niraipu"]["LGu"])+''' or '''+str(round((weak["niraipu"]["LGu"]/totalweaknirai)*100,3))+'''%) | |
Number of nēr-acai with a L in strong position throughout the line: | |
- First cīr: '''+str(lstrong["1"])+''' | |
- Second cīr: '''+str(lstrong["2"])+''' | |
- Third cīr: '''+str(lstrong["3"])+''' | |
- Fourth cīr: '''+str(lstrong["4"])+''' | |
- Fifth cīr: '''+str(lstrong["5"])+''' | |
Some conditional probabilities for a cīr: | |
'''+json.dumps(cirdata,indent=4,sort_keys=True,ensure_ascii=False) | |
convertTxtToJson(f) | |
for index, poem in enumerate(poems): | |
scanLines(poem,index) | |
logFile.write("Scanned "+str(len(poems))+" poems, with "+str(totalSyllables)+" syllables.\n") | |
if discardedSyllables > 1: | |
percent = round((discardedSyllables/totalSyllables)*100,3) | |
logFile.write('''I was unable to parse '''+str(discardedSyllables)+''' syllables ('''+str(percent)+'''%). | |
Possible reasons for failure: | |
- The text is incorrect. | |
- CVR should be counted as light (the parser counts it as heavy).\n\n''') | |
logFile.write(log) | |
logFile.write(statistics()) | |
logFile.close() | |
jsonFile.write(json.dumps(poems, indent=4, sort_keys=True, ensure_ascii=False)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment