Spebby/wordclassification.py

## wordclassification.py
#!/usr/bin/env python3
"""
Analyses words based on their syllables and their length.
"""

import sys
import re
import math

syllCounts, syllTable = {}, {}
adj, adv, nou, ver = set(), set(), set(), set()
types = {"adj": adj, "adv": adv, "noun": nou, "verb": ver}
counts = {"adj": 0, "adv": 0, "noun": 0, "verb": 0}

# assembles a table of words and their type
with open("/srv/datasets/wordnet.sorted.txt") as data:
    for line in data:
        words = line.lower().split("\t")[0].split(";")
        var = line.split("\t")[1]
        for word in words:
            types[var].add(word)

# loop through words and count the number of syllables
with open("/srv/datasets/syllables.txt") as data:
    for line in data:
        __syll__ = line.replace(";", "").rstrip("\n")
        if __syll__ in syllTable and syllTable[__syll__] > len(line.split(";")):
            continue
        syllTable[__syll__] = len(line.split(";"))
        # if not in table or if the value is larger, add it to the table

counter = 0
wordLength = 0
for line in sys.stdin:
    # filtering
    words = re.split(r"[^a-z']+", line.lower())
    for word in words:
        word = word.rstrip("\n").strip("'")
        if not word:
            counter += 1
            continue

        // this is several times faster then a more extendable for loop.
        if word in types["adj"]:
            counts["adj"] += 1
        if word in types["adv"]:
            counts["adv"] += 1
        if word in types["noun"]:
            counts["noun"] += 1
        if word in types["verb"]:
            counts["verb"] += 1

        if word not in syllTable:
            var = math.ceil(len(word) / 5)
            syllCounts[var] = 1 if var not in syllCounts else syllCounts[var] + 1
            continue
        syllCounts[syllTable[word]] = (
            1 if syllTable[word] not in syllCounts else syllCounts[syllTable[word]] + 1
        )
    wordLength += len(words)
wordLength -= counter
adjLen, advLen, nouLen, verLen = (
    counts["adj"] / wordLength * 100,
    counts["adv"] / wordLength * 100,
    counts["noun"] / wordLength * 100,
    counts["verb"] / wordLength * 100,
)
print(f"words   {wordLength} \n")
print(f"adj     {counts['adj']} {adjLen:.3f}% ", "*" * round(adjLen), sep="")
print(f"adv     {counts['adv']} {advLen:.3f}% ", "*" * round(advLen), sep="")
print(f"noun    {counts['noun']} {nouLen:.3f}% ", "*" * round(nouLen), sep="")
print(f"verb    {counts['verb']} {verLen:.3f}% ", "*" * round(verLen), "\n", sep="")

# sort syllable counts based on key names
for key in sorted(syllCounts.keys()):
    syllLen = syllCounts[key] / wordLength * 100
    print(f"{(key)}-syll  {syllCounts[key]} {syllLen:.3f}% {'*' * round(syllLen)}")
	#!/usr/bin/env python3
	"""
	Analyses words based on their syllables and their length.
	"""

	import sys
	import re
	import math

	syllCounts, syllTable = {}, {}
	adj, adv, nou, ver = set(), set(), set(), set()
	types = {"adj": adj, "adv": adv, "noun": nou, "verb": ver}
	counts = {"adj": 0, "adv": 0, "noun": 0, "verb": 0}

	# assembles a table of words and their type
	with open("/srv/datasets/wordnet.sorted.txt") as data:
	for line in data:
	words = line.lower().split("\t")[0].split(";")
	var = line.split("\t")[1]
	for word in words:
	types[var].add(word)

	# loop through words and count the number of syllables
	with open("/srv/datasets/syllables.txt") as data:
	for line in data:
	__syll__ = line.replace(";", "").rstrip("\n")
	if __syll__ in syllTable and syllTable[__syll__] > len(line.split(";")):
	continue
	syllTable[__syll__] = len(line.split(";"))
	# if not in table or if the value is larger, add it to the table

	counter = 0
	wordLength = 0
	for line in sys.stdin:
	# filtering
	words = re.split(r"[^a-z']+", line.lower())
	for word in words:
	word = word.rstrip("\n").strip("'")
	if not word:
	counter += 1
	continue

	// this is several times faster then a more extendable for loop.
	if word in types["adj"]:
	counts["adj"] += 1
	if word in types["adv"]:
	counts["adv"] += 1
	if word in types["noun"]:
	counts["noun"] += 1
	if word in types["verb"]:
	counts["verb"] += 1

	if word not in syllTable:
	var = math.ceil(len(word) / 5)
	syllCounts[var] = 1 if var not in syllCounts else syllCounts[var] + 1
	continue
	syllCounts[syllTable[word]] = (
	1 if syllTable[word] not in syllCounts else syllCounts[syllTable[word]] + 1
	)
	wordLength += len(words)
	wordLength -= counter
	adjLen, advLen, nouLen, verLen = (
	counts["adj"] / wordLength * 100,
	counts["adv"] / wordLength * 100,
	counts["noun"] / wordLength * 100,
	counts["verb"] / wordLength * 100,
	)
	print(f"words {wordLength} \n")
	print(f"adj {counts['adj']} {adjLen:.3f}% ", "" round(adjLen), sep="")
	print(f"adv {counts['adv']} {advLen:.3f}% ", "" round(advLen), sep="")
	print(f"noun {counts['noun']} {nouLen:.3f}% ", "" round(nouLen), sep="")
	print(f"verb {counts['verb']} {verLen:.3f}% ", "" round(verLen), "\n", sep="")

	# sort syllable counts based on key names
	for key in sorted(syllCounts.keys()):
	syllLen = syllCounts[key] / wordLength * 100
	print(f"{(key)}-syll {syllCounts[key]} {syllLen:.3f}% {'' round(syllLen)}")