Created
March 7, 2019 01:28
-
-
Save unixpickle/8777fe73d5cf44418a99293fbc888a9b to your computer and use it in GitHub Desktop.
Word stats
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
letters = 'abcdefghijklmnopqrstuvwxyz' | |
with open('/usr/share/dict/words', 'rt') as in_file: | |
words = [x.strip() for x in in_file.readlines()] | |
counts = {} | |
starts = {} | |
total = 0 | |
for letter in letters: | |
counts[letter] = len([x for x in words if x.endswith(letter)]) | |
starts[letter] = len([x for x in words if x.startswith(letter)]) | |
total += counts[letter] | |
pairs = sorted(counts.items(), key=lambda x: x[1], reverse=True) | |
for letter, count in pairs: | |
print('%s: %.5f (ratio: %.5f)' % (letter, count / total, count / starts[letter])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment