mattboehm/mirrorboard_collisions.py

## mirrorboard_collisions.py
#python3
from collections import defaultdict, Counter
import pprint as pp
DV_KEYS = [
"',.pyfgcrl",
"aoeuidhtns",
";qjkxbmwvz",
]
KEYS = [
"qwertyuiop",
"asdfghjkl;",
"zxcvbnm,./",
]
MIRRORS = {}
for row in KEYS:
    rev = "".join(reversed(row))
    for x in range(len(row)//2):
        o = -1*(x + 1)
        MIRRORS[row[x]] = row[o]
        MIRRORS[row[o]] = row[x]
pp.pprint(MIRRORS)


def key(word):
    return "".join(min(letter, MIRRORS[letter]) for letter in word.lower())

words_by_key = defaultdict(set)
total_words = 0
bad_words = set()
with open("/usr/share/dict/words") as f:
    for word in f:
        word = word.strip().lower()
        try:
            words_by_key[key(word)].add(word)
        except KeyError:
            bad_words.add(word)
        total_words += 1
processed_words = total_words - len(bad_words)

print(total_words, "words total")
print(len(bad_words), "words unable to process: ", list(bad_words)[:10])

lens = Counter((len(val) for val in words_by_key.values()))
print("Size of groups: (size of 1 means no collisions, 2 means 1 collision, etc.")
print(lens.most_common())

print("Probability of a word having N collisions:")
for numcoll, count in sorted(lens.most_common()):
    probability = numcoll * count / processed_words * 100
    print(numcoll-1, probability)

cc = 0
print("Some sample collisions:")
for wds in words_by_key.values():
    if len(wds) > 1:
        cc += 1
        print(wds)
        if cc > 10:
            break

# QWERTY
# 235886 words total
# 2 words unable to process:  ['jean-pierre', 'jean-christophe']
# Size of groups: (size of 1 means no collisions, 2 means 1 collision, etc.
# [(1, 221334), (2, 5101), (3, 602), (4, 165), (5, 38), (6, 20), (7, 7), (8, 1)]
# Probability of a word having N collisions:
# 0 93.83171389326958
# 1 4.325007206932221
# 2 0.7656305641756117
# 3 0.27979854504756574
# 4 0.08054806599854165
# 5 0.05087246273592105
# 6 0.020772922283834427
# 7 0.00339149751572807
# Some sample collisions:
# {'dub', 'dun'}
# {'killable', 'kissable'}
# {'percival', 'perceval'}
# {'it', 'ey'}
# {'scruf', 'scurf'}
# {'silverness', 'silverbill'}
# {'singer', 'linger'}
# {'wade', 'wake', 'wadi'}
# {'jag', 'fag'}
# {'wryly', 'outly'}
# {'pegasian', 'pegasean'}

# DVORAK
# 235886 words total
# 2 words unable to process:  ['jean-pierre', 'jean-christophe']
# Size of groups: (size of 1 means no collisions, 2 means 1 collision, etc.
# [(1, 227220), (2, 3017), (3, 305), (4, 46), (5, 2), (6, 1)]
# Probability of a word having N collisions:
# 0 96.3270081904665
# 1 2.5580370012378966
# 2 0.387902528361398
# 3 0.0780044428617456
# 4 0.004239371894660088
# 5 0.002543623136796052
# Some sample collisions:
# {'apathism', 'agathism'}
# {'balk', 'balm'}
# {'unary', 'hoary'}
# {'cypris', 'cypria'}
# {'indiscreetly', 'indiscretely'}
# {'pump', 'gump'}
# {'yond', 'food'}
# {'getae', 'geest'}
# {'trig', 'trip'}
# {'apselaphesia', 'apselaphesis'}
# {'tach', 'each'}
	#python3
	from collections import defaultdict, Counter
	import pprint as pp
	DV_KEYS = [
	"',.pyfgcrl",
	"aoeuidhtns",
	";qjkxbmwvz",
	]
	KEYS = [
	"qwertyuiop",
	"asdfghjkl;",
	"zxcvbnm,./",
	]
	MIRRORS = {}
	for row in KEYS:
	rev = "".join(reversed(row))
	for x in range(len(row)//2):
	o = -1*(x + 1)
	MIRRORS[row[x]] = row[o]
	MIRRORS[row[o]] = row[x]
	pp.pprint(MIRRORS)


	def key(word):
	return "".join(min(letter, MIRRORS[letter]) for letter in word.lower())

	words_by_key = defaultdict(set)
	total_words = 0
	bad_words = set()
	with open("/usr/share/dict/words") as f:
	for word in f:
	word = word.strip().lower()
	try:
	words_by_key[key(word)].add(word)
	except KeyError:
	bad_words.add(word)
	total_words += 1
	processed_words = total_words - len(bad_words)

	print(total_words, "words total")
	print(len(bad_words), "words unable to process: ", list(bad_words)[:10])

	lens = Counter((len(val) for val in words_by_key.values()))
	print("Size of groups: (size of 1 means no collisions, 2 means 1 collision, etc.")
	print(lens.most_common())

	print("Probability of a word having N collisions:")
	for numcoll, count in sorted(lens.most_common()):
	probability = numcoll * count / processed_words * 100
	print(numcoll-1, probability)

	cc = 0
	print("Some sample collisions:")
	for wds in words_by_key.values():
	if len(wds) > 1:
	cc += 1
	print(wds)
	if cc > 10:
	break

	# QWERTY
	# 235886 words total
	# 2 words unable to process: ['jean-pierre', 'jean-christophe']
	# Size of groups: (size of 1 means no collisions, 2 means 1 collision, etc.
	# [(1, 221334), (2, 5101), (3, 602), (4, 165), (5, 38), (6, 20), (7, 7), (8, 1)]
	# Probability of a word having N collisions:
	# 0 93.83171389326958
	# 1 4.325007206932221
	# 2 0.7656305641756117
	# 3 0.27979854504756574
	# 4 0.08054806599854165
	# 5 0.05087246273592105
	# 6 0.020772922283834427
	# 7 0.00339149751572807
	# Some sample collisions:
	# {'dub', 'dun'}
	# {'killable', 'kissable'}
	# {'percival', 'perceval'}
	# {'it', 'ey'}
	# {'scruf', 'scurf'}
	# {'silverness', 'silverbill'}
	# {'singer', 'linger'}
	# {'wade', 'wake', 'wadi'}
	# {'jag', 'fag'}
	# {'wryly', 'outly'}
	# {'pegasian', 'pegasean'}

	# DVORAK
	# 235886 words total
	# 2 words unable to process: ['jean-pierre', 'jean-christophe']
	# Size of groups: (size of 1 means no collisions, 2 means 1 collision, etc.
	# [(1, 227220), (2, 3017), (3, 305), (4, 46), (5, 2), (6, 1)]
	# Probability of a word having N collisions:
	# 0 96.3270081904665
	# 1 2.5580370012378966
	# 2 0.387902528361398
	# 3 0.0780044428617456
	# 4 0.004239371894660088
	# 5 0.002543623136796052
	# Some sample collisions:
	# {'apathism', 'agathism'}
	# {'balk', 'balm'}
	# {'unary', 'hoary'}
	# {'cypris', 'cypria'}
	# {'indiscreetly', 'indiscretely'}
	# {'pump', 'gump'}
	# {'yond', 'food'}
	# {'getae', 'geest'}
	# {'trig', 'trip'}
	# {'apselaphesia', 'apselaphesis'}
	# {'tach', 'each'}