Skip to content

Instantly share code, notes, and snippets.

@DavidBuchanan314
Last active February 6, 2022 22:11
Embed
What would you like to do?
import itertools
wordlist = sorted(open("english_words_original_wordle.txt").read().strip().split("\n"))
ALPHABET = "abcdefghijklmnopqrstuvwxyz"
"""
we have a tree.
Each node of the tree is either a list of suffixes, or a list of trees (whichever is smaller)
first bit specifies which. 0=trees, 1=suffixes. (unless we're on the last level, which must be a list of suffixes)
suffix lists are encoded via 5-bit chars. the list is terminated with the sentinel value 31
zero-length lists are encoded as a single 0 bit
non-zero-length lists are prefixed by a 1 bit
The next bit specifies whether the list is sentinel-terminated, or fixed length (specified by a 2-bit length field)
"""
def pack_bits(x, n):
buf = []
for i in range(n):
buf.append((x>>(n-i-1))&1)
return buf
def unpack_bits(buf, offset, n):
x = 0
for i in range(n):
x <<= 1
x += buf[offset+i]
return x, offset + n
def pack_list(suffixes):
if not suffixes:
return [0]
buf = [1]
using_sentinel = len(suffixes) > 4
if using_sentinel:
buf += [1]
else:
buf += [0] + pack_bits(len(suffixes)-1, 2)
for s in suffixes:
for c in s:
buf += pack_bits(ALPHABET.index(c), 5)
if using_sentinel:
buf += pack_bits(31, 5)
return buf
def unpack_list(buf, offset, depth):
first, offset = unpack_bits(buf, offset, 1)
if not first:
return [], offset
using_sentinel, offset = unpack_bits(buf, offset, 1)
if using_sentinel:
length = 99999999
else:
length, offset = unpack_bits(buf, offset, 2)
res = []
for _ in range(length+1):
first, offset = unpack_bits(buf, offset, 5)
if using_sentinel and first == 31:
return res, offset
suffix = ALPHABET[first]
for _ in range(depth-1):
next, offset = unpack_bits(buf, offset, 5)
suffix += ALPHABET[next]
res.append(suffix)
return res, offset
def pack_tree(words):
if not words or len(words[0]) == 1:
return pack_list(words)
res = []
for x in ALPHABET:
l = [w[1:] for w in words if w[0] == x]
packed_list = pack_list(l)
packed_tree = pack_tree(l)
if len(packed_list) <= len(packed_tree):
res += [1] + packed_list
else:
res += [0] + packed_tree
#maxlen = max(maxlen, len(clusters[x]))
return res
def unpack_tree(buf, offset, depth):
if depth == 1:
return unpack_list(buf, offset, depth)
res = {}
for c in ALPHABET:
first, offset = unpack_bits(buf, offset, 1)
if first:
result, offset = unpack_list(buf, offset, depth-1)
else:
result, offset = unpack_tree(buf, offset, depth-1)
#print(c, result)
if result:
res[c] = result
return res, offset
def query_tree(tree, word):
if type(tree) is list:
return word in tree
prefix, suffix = word[0], word[1:]
if prefix not in tree:
return False
return query_tree(tree[prefix], suffix)
"""
bestperm = None
bestsize = 999999999999
for permut in itertools.permutations(range(5)):
t = pack_tree(["".join([w[i] for i in permut]) for w in wordlist])
print(permut)
print("Tree size (bytes)", len(t)/8)
if len(t) < bestsize:
bestsize = len(t)
bestperm = permut
print(bestperm)
exit()
"""
def permute(word):
return "".join([word[i] for i in (4, 3, 1, 2, 0)])
t = pack_tree([permute(w) for w in wordlist])
print("Tree size (bytes)", len(t)/8) # 17362.125
ut, _ = unpack_tree(t, 0, 5)
print(query_tree(ut, permute("sugar")))
print(query_tree(ut, permute("blahh")))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment