Skip to content

Instantly share code, notes, and snippets.

@mattboehm
Created February 23, 2018 22:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mattboehm/b9554c3e042f67fbc16054217e64958f to your computer and use it in GitHub Desktop.
Save mattboehm/b9554c3e042f67fbc16054217e64958f to your computer and use it in GitHub Desktop.
#python3
from collections import defaultdict, Counter
import pprint as pp
DV_KEYS = [
"',.pyfgcrl",
"aoeuidhtns",
";qjkxbmwvz",
]
KEYS = [
"qwertyuiop",
"asdfghjkl;",
"zxcvbnm,./",
]
MIRRORS = {}
for row in KEYS:
rev = "".join(reversed(row))
for x in range(len(row)//2):
o = -1*(x + 1)
MIRRORS[row[x]] = row[o]
MIRRORS[row[o]] = row[x]
pp.pprint(MIRRORS)
def key(word):
return "".join(min(letter, MIRRORS[letter]) for letter in word.lower())
words_by_key = defaultdict(set)
total_words = 0
bad_words = set()
with open("/usr/share/dict/words") as f:
for word in f:
word = word.strip().lower()
try:
words_by_key[key(word)].add(word)
except KeyError:
bad_words.add(word)
total_words += 1
processed_words = total_words - len(bad_words)
print(total_words, "words total")
print(len(bad_words), "words unable to process: ", list(bad_words)[:10])
lens = Counter((len(val) for val in words_by_key.values()))
print("Size of groups: (size of 1 means no collisions, 2 means 1 collision, etc.")
print(lens.most_common())
print("Probability of a word having N collisions:")
for numcoll, count in sorted(lens.most_common()):
probability = numcoll * count / processed_words * 100
print(numcoll-1, probability)
cc = 0
print("Some sample collisions:")
for wds in words_by_key.values():
if len(wds) > 1:
cc += 1
print(wds)
if cc > 10:
break
# QWERTY
# 235886 words total
# 2 words unable to process: ['jean-pierre', 'jean-christophe']
# Size of groups: (size of 1 means no collisions, 2 means 1 collision, etc.
# [(1, 221334), (2, 5101), (3, 602), (4, 165), (5, 38), (6, 20), (7, 7), (8, 1)]
# Probability of a word having N collisions:
# 0 93.83171389326958
# 1 4.325007206932221
# 2 0.7656305641756117
# 3 0.27979854504756574
# 4 0.08054806599854165
# 5 0.05087246273592105
# 6 0.020772922283834427
# 7 0.00339149751572807
# Some sample collisions:
# {'dub', 'dun'}
# {'killable', 'kissable'}
# {'percival', 'perceval'}
# {'it', 'ey'}
# {'scruf', 'scurf'}
# {'silverness', 'silverbill'}
# {'singer', 'linger'}
# {'wade', 'wake', 'wadi'}
# {'jag', 'fag'}
# {'wryly', 'outly'}
# {'pegasian', 'pegasean'}
# DVORAK
# 235886 words total
# 2 words unable to process: ['jean-pierre', 'jean-christophe']
# Size of groups: (size of 1 means no collisions, 2 means 1 collision, etc.
# [(1, 227220), (2, 3017), (3, 305), (4, 46), (5, 2), (6, 1)]
# Probability of a word having N collisions:
# 0 96.3270081904665
# 1 2.5580370012378966
# 2 0.387902528361398
# 3 0.0780044428617456
# 4 0.004239371894660088
# 5 0.002543623136796052
# Some sample collisions:
# {'apathism', 'agathism'}
# {'balk', 'balm'}
# {'unary', 'hoary'}
# {'cypris', 'cypria'}
# {'indiscreetly', 'indiscretely'}
# {'pump', 'gump'}
# {'yond', 'food'}
# {'getae', 'geest'}
# {'trig', 'trip'}
# {'apselaphesia', 'apselaphesis'}
# {'tach', 'each'}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment