This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import itertools | |
| wordlist = sorted(open("english_words_original_wordle.txt").read().strip().split("\n")) | |
| ALPHABET = "abcdefghijklmnopqrstuvwxyz" | |
| """ | |
| we have a tree. | |
| Each node of the tree is either a list of suffixes, or a list of trees (whichever is smaller) | |
| first bit specifies which. 0=trees, 1=suffixes. (unless we're on the last level, which must be a list of suffixes) | |
| suffix lists are encoded via 5-bit chars. the list is terminated with the sentinel value 31 | |
| zero-length lists are encoded as a single 0 bit | |
| non-zero-length lists are prefixed by a 1 bit | |
| The next bit specifies whether the list is sentinel-terminated, or fixed length (specified by a 2-bit length field) | |
| """ | |
| def pack_bits(x, n): | |
| buf = [] | |
| for i in range(n): | |
| buf.append((x>>(n-i-1))&1) | |
| return buf | |
| def unpack_bits(buf, offset, n): | |
| x = 0 | |
| for i in range(n): | |
| x <<= 1 | |
| x += buf[offset+i] | |
| return x, offset + n | |
| def pack_list(suffixes): | |
| if not suffixes: | |
| return [0] | |
| buf = [1] | |
| using_sentinel = len(suffixes) > 4 | |
| if using_sentinel: | |
| buf += [1] | |
| else: | |
| buf += [0] + pack_bits(len(suffixes)-1, 2) | |
| for s in suffixes: | |
| for c in s: | |
| buf += pack_bits(ALPHABET.index(c), 5) | |
| if using_sentinel: | |
| buf += pack_bits(31, 5) | |
| return buf | |
| def unpack_list(buf, offset, depth): | |
| first, offset = unpack_bits(buf, offset, 1) | |
| if not first: | |
| return [], offset | |
| using_sentinel, offset = unpack_bits(buf, offset, 1) | |
| if using_sentinel: | |
| length = 99999999 | |
| else: | |
| length, offset = unpack_bits(buf, offset, 2) | |
| res = [] | |
| for _ in range(length+1): | |
| first, offset = unpack_bits(buf, offset, 5) | |
| if using_sentinel and first == 31: | |
| return res, offset | |
| suffix = ALPHABET[first] | |
| for _ in range(depth-1): | |
| next, offset = unpack_bits(buf, offset, 5) | |
| suffix += ALPHABET[next] | |
| res.append(suffix) | |
| return res, offset | |
| def pack_tree(words): | |
| if not words or len(words[0]) == 1: | |
| return pack_list(words) | |
| res = [] | |
| for x in ALPHABET: | |
| l = [w[1:] for w in words if w[0] == x] | |
| packed_list = pack_list(l) | |
| packed_tree = pack_tree(l) | |
| if len(packed_list) <= len(packed_tree): | |
| res += [1] + packed_list | |
| else: | |
| res += [0] + packed_tree | |
| #maxlen = max(maxlen, len(clusters[x])) | |
| return res | |
| def unpack_tree(buf, offset, depth): | |
| if depth == 1: | |
| return unpack_list(buf, offset, depth) | |
| res = {} | |
| for c in ALPHABET: | |
| first, offset = unpack_bits(buf, offset, 1) | |
| if first: | |
| result, offset = unpack_list(buf, offset, depth-1) | |
| else: | |
| result, offset = unpack_tree(buf, offset, depth-1) | |
| #print(c, result) | |
| if result: | |
| res[c] = result | |
| return res, offset | |
| def query_tree(tree, word): | |
| if type(tree) is list: | |
| return word in tree | |
| prefix, suffix = word[0], word[1:] | |
| if prefix not in tree: | |
| return False | |
| return query_tree(tree[prefix], suffix) | |
| """ | |
| bestperm = None | |
| bestsize = 999999999999 | |
| for permut in itertools.permutations(range(5)): | |
| t = pack_tree(["".join([w[i] for i in permut]) for w in wordlist]) | |
| print(permut) | |
| print("Tree size (bytes)", len(t)/8) | |
| if len(t) < bestsize: | |
| bestsize = len(t) | |
| bestperm = permut | |
| print(bestperm) | |
| exit() | |
| """ | |
| def permute(word): | |
| return "".join([word[i] for i in (4, 3, 1, 2, 0)]) | |
| t = pack_tree([permute(w) for w in wordlist]) | |
| print("Tree size (bytes)", len(t)/8) # 17362.125 | |
| ut, _ = unpack_tree(t, 0, 5) | |
| print(query_tree(ut, permute("sugar"))) | |
| print(query_tree(ut, permute("blahh"))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment