Skip to content

Instantly share code, notes, and snippets.

@jpountz
Created July 28, 2015 15:53
Show Gist options
  • Save jpountz/a34e465f0c17d258fdd7 to your computer and use it in GitHub Desktop.
Save jpountz/a34e465f0c17d258fdd7 to your computer and use it in GitHub Desktop.
width = 4
top_count = 256
total = 0
freqs = {}
for line in open("/usr/share/dict/words").readlines():
line = line.rstrip('\n')
if line.find('\'') >= 0:
continue
total += len(line)
i = len(line)
while (i >= width):
sub = line[i-width:i]
if freqs.has_key(sub):
freqs[sub] = freqs[sub] + 1
else:
freqs[sub] = 1
i -= width
top = sorted(freqs.items(), key=lambda x: -x[1])
print total
top = top[0:top_count]
sub = 0
for pair in top:
sub += pair[1]
print sub
print top
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment