Skip to content

Instantly share code, notes, and snippets.

@Spebby
Last active November 23, 2022 21:43
Show Gist options
  • Save Spebby/8a83a787d30a6f8a19969f1291e65387 to your computer and use it in GitHub Desktop.
Save Spebby/8a83a787d30a6f8a19969f1291e65387 to your computer and use it in GitHub Desktop.
Analyses words based on their syllables and their length.
#!/usr/bin/env python3
"""
Analyses words based on their syllables and their length.
"""
import sys
import re
import math
syllCounts, syllTable = {}, {}
adj, adv, nou, ver = set(), set(), set(), set()
types = {"adj": adj, "adv": adv, "noun": nou, "verb": ver}
counts = {"adj": 0, "adv": 0, "noun": 0, "verb": 0}
# assembles a table of words and their type
with open("/srv/datasets/wordnet.sorted.txt") as data:
for line in data:
words = line.lower().split("\t")[0].split(";")
var = line.split("\t")[1]
for word in words:
types[var].add(word)
# loop through words and count the number of syllables
with open("/srv/datasets/syllables.txt") as data:
for line in data:
__syll__ = line.replace(";", "").rstrip("\n")
if __syll__ in syllTable and syllTable[__syll__] > len(line.split(";")):
continue
syllTable[__syll__] = len(line.split(";"))
# if not in table or if the value is larger, add it to the table
counter = 0
wordLength = 0
for line in sys.stdin:
# filtering
words = re.split(r"[^a-z']+", line.lower())
for word in words:
word = word.rstrip("\n").strip("'")
if not word:
counter += 1
continue
// this is several times faster then a more extendable for loop.
if word in types["adj"]:
counts["adj"] += 1
if word in types["adv"]:
counts["adv"] += 1
if word in types["noun"]:
counts["noun"] += 1
if word in types["verb"]:
counts["verb"] += 1
if word not in syllTable:
var = math.ceil(len(word) / 5)
syllCounts[var] = 1 if var not in syllCounts else syllCounts[var] + 1
continue
syllCounts[syllTable[word]] = (
1 if syllTable[word] not in syllCounts else syllCounts[syllTable[word]] + 1
)
wordLength += len(words)
wordLength -= counter
adjLen, advLen, nouLen, verLen = (
counts["adj"] / wordLength * 100,
counts["adv"] / wordLength * 100,
counts["noun"] / wordLength * 100,
counts["verb"] / wordLength * 100,
)
print(f"words {wordLength} \n")
print(f"adj {counts['adj']} {adjLen:.3f}% ", "*" * round(adjLen), sep="")
print(f"adv {counts['adv']} {advLen:.3f}% ", "*" * round(advLen), sep="")
print(f"noun {counts['noun']} {nouLen:.3f}% ", "*" * round(nouLen), sep="")
print(f"verb {counts['verb']} {verLen:.3f}% ", "*" * round(verLen), "\n", sep="")
# sort syllable counts based on key names
for key in sorted(syllCounts.keys()):
syllLen = syllCounts[key] / wordLength * 100
print(f"{(key)}-syll {syllCounts[key]} {syllLen:.3f}% {'*' * round(syllLen)}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment