Skip to content

Instantly share code, notes, and snippets.

@darius
Last active February 5, 2017 05:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save darius/07be4a00fe46459a8d7f18a468333edc to your computer and use it in GitHub Desktop.
Save darius/07be4a00fe46459a8d7f18a468333edc to your computer and use it in GitHub Desktop.
from __future__ import division
# layout: group keys that are typed by the same finger.
dvorak_layout = '`1!\'a; 2@,oq 3#ej $4puk%5yix ^6fdb&7ghm *8ctw (9rnv )0lsz[{]}/?+=|\\-_'.split()
qwerty_layout = '''`~!1qaz @2wsx #3edc $4rfv%5tgb ^6yhn&7ujm *8ik,< (9ol.> )0p;:?/-_[{'"=+]}|\\'''.split()
layout = qwerty_layout #dvorak_layout
finger = {ch: i
for i, chars in enumerate(layout)
for ch in chars}
def mash_naive(word):
return ''.join(set(word))
max_fingers = 4 # 5
def mash_first_fingers(word):
# Use up to max_fingers of the first letters you'd type:
fingers = set()
keys = []
for ch in word:
f = finger[ch]
if f not in fingers:
keys.append(ch)
fingers.add(f)
if max_fingers == len(fingers): break
keys.sort()
return ''.join(keys)
mash = mash_first_fingers # mash_naive
def gen_common_words():
with open('count_1w_100k.txt') as wordfile:
for line in wordfile:
word, count = line.split('\t')
yield word, int(count)
freqs = dict(gen_common_words())
unmashed = {}
for word, count in freqs.iteritems():
key = mash(word)
unmashed.setdefault(key, []).append(word)
for candidates in unmashed.itervalues():
candidates.sort(key=freqs.get, reverse=True)
freqs_right = [0] * 3
total_freq = 0
for word, count in freqs.iteritems():
total_freq += count
candidates = unmashed[mash(word)]
for n_off in range(len(freqs_right)):
if word in candidates[:n_off+1]:
freqs_right[n_off] += count
for n_off in range(len(freqs_right)):
print "Correct within %d guesses: %.2f%%" % (n_off+1, 100*freqs_right[n_off]/total_freq)
@darius
Copy link
Author

darius commented Feb 5, 2017

 $ python mashing.py
Correct within 1 guesses: 58.34%
Correct within 2 guesses: 71.48%
Correct within 3 guesses: 77.74%

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment