Skip to content

Instantly share code, notes, and snippets.

@perryBunn
Created July 26, 2021 00:24
Show Gist options
  • Save perryBunn/794d25ad8467a120179d6ebc786cfbfc to your computer and use it in GitHub Desktop.
Save perryBunn/794d25ad8467a120179d6ebc786cfbfc to your computer and use it in GitHub Desktop.
This is realated to a discussion that me and my girlfriend were having about the Infinite monkey theorem. Needless to say it devolved into us wondering what the average word length of all of shakespeare is. Text file is from Project Gutenberg and can be found on their website.
def main():
words = {
# word: [
# length,
# count
# ]
}
with open('t8.shakespeare.txt', 'r', encoding='utf-8') as file:
comment = False
for line in file:
if '>>' in line:
comment = False
continue
if '<<' in line or comment is True:
comment = True
continue
for word in line.split():
# add word to dictionary
if word in words.keys():
words[word][1] = words[word][1] + 1
else:
words[word] = [len(word), 1]
printStats(words)
def printStats(dict):
print("Stats:")
totalChars = 0
for word in dict:
totalChars += dict[word][0]*dict[word][1]
totalWords = 0
for word in dict:
totalWords += dict[word][1]
avgWordLen = totalChars / totalWords
print("Avg word length:", round(avgWordLen, 4))
sort = sorted(dict, key=lambda w: dict[w][1], reverse=True)
iteration = 10
i = 0
print("Top", iteration, "occured words in Shakespeare's writing...")
for word in sort:
if i > iteration:
break
print("|_", word)
print("| |_ Length:", dict[word][0])
print("| |_ Occurence:", dict[word][1])
i += 1
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment