Skip to content

Instantly share code, notes, and snippets.

@phoeagon
Last active November 17, 2015 03:09
Show Gist options
  • Save phoeagon/34ab38498a4af1bd9937 to your computer and use it in GitHub Desktop.
Save phoeagon/34ab38498a4af1bd9937 to your computer and use it in GitHub Desktop.
wordacademy
(5,6,5,8,5,7)
oenioa
nahpnc
ecgcen
nradrr
rtrosh
ohdbur
(4,5)
rah
psa
clw
lac
kre
cof
thg
mif
ous
eyr
icr
rcu
gni
ber
xor
cai
htr
aes
bnp
tor
koi
nai
thg
uge
(3,6)
pro
aef
sst
(5,4)
hry
arr
ifu
mth
eyn
mit
upc
rsa
seh
car
osf
tob
(6,3)
gna
eri
ogf
(5,4)
ael
fad
sla
(3,6)
IID
ADK
PER
(4,5)
pca
ato
rka
(3,6)
oub
eit
ntt
(5,4)
wii
nfp
eoh
(5,4)
ene
csa
ply
(3,6)
rbu
cpt
uet
(5,4)
erb
ige
tar
#########
(3, 6)
ini
uki
nsb
###########
#[[' ', ' ', 'b'], [' ', 'k', 'i'], ['i', 'n', 'i']]
nup
gou
ebl
vif
eri
rsh
ner
imv
cae
(3,6)
efc
eeo
atf
(4,5)
wee
svt
loe
eoh
ynl
god
elp
ifa
drk
cim
enm
lab
ppe
paa
ler
sny
unb
nur
(4,5)
erp
tel
tna
(5,4)
eah
bct
kie
(4,5)
llo
arh
tst
(4,5)
let
raa
kew
import itertools
import pprint
import nltk
import dill
import copy
from pathos.multiprocessing import ProcessingPool as Pool
DICT_FILE=['/Users/zheq/Downloads/words', '/run/shm/words','/etc/dictionaries-common/words']
DIR=list(itertools.product([1,-1,0], repeat=2))
DIR.remove((0,0))
def get_dict_file():
for path in DICT_FILE:
try:
return open(path, 'r')
except:
pass
return None
def get_freq_dict():
text = nltk.corpus.gutenberg.words()
fdisk = nltk.FreqDist(w.lower() for w in text)
print "Loaded frequency dict"
return fdisk
def get_word_lens():
lens_tuple = input('Please input the lengths of each word, as tuple ')
return sorted(lens_tuple)
def get_dict(f):
return {word.strip() for word in f.readlines()}
def get_prefix_dict(dictionary):
prefix_dict = set()
for word in dictionary:
for i in range(1, len(word)):
prefix_dict.add(word[:i])
prefix_dict.add('')
return prefix_dict
def get_board(total_len):
cur_len = 0
board = []
while cur_len < total_len:
line = raw_input().lower()
cur_len += len(line)
board.append(list(line))
board.reverse()
return board
def work_multi(board, word_lens, dictionary):
n = len(board)
m = len(board[0])
global_ans = []
prefix_dict = get_prefix_dict(dictionary)
def work_sg(pos, word_lens):
global_ans = []
#for word_lens in set(itertools.permutations(word_lens)):
dfs(board, n, m, pos[0], pos[1], '', ' ', [], word_lens,
global_ans, dictionary, prefix_dict)
return global_ans
return reduce(list.__add__,
Pool().map(work_sg, list(itertools.product(range(n), range(m))),
[word_lens] * (n*m)))
def work(board, word_lens, dictionary):
n = len(board)
m = len(board[0])
global_ans = []
prefix_dict = get_prefix_dict(dictionary)
try:
for i in range(n):
for j in range(m):
#for word_lens in set(itertools.permutations(word_lens)):
dfs(board, n, m, i, j, '', ' ', [], word_lens,
global_ans, dictionary, prefix_dict)
except KeyboardInterrupt:
print global_ans
return global_ans
def clear_selected(board, del_c, height):
board = [list(line) for line in zip(*board)]
for i in range(len(board)):
board[i] = filter(lambda x: x != del_c, board[i])
for i in range(len(board)):
board[i] += [del_c] * (height - len(board[i]))
return [list(line) for line in zip(*board)]
def count_connect(board, n, m, x, y, del_c):
if x >= 0 and x < n and y >= 0 and y < m:
if (board[x][y] != del_c):
tmp , board[x][y] = board[x][y] , del_c
ans = 1 + sum([count_connect(board, n, m,
x + diff[0], y + diff[1], del_c) for diff in DIR])
return ans
return 0
def subsets(mySet):
return reduce(lambda z, x: z + [y + [x] for y in z], mySet, [[]])
def test_is_possible(connect_count, word_lens):
if connect_count < word_lens[0]:
return False
possible_sum = {sum(x) for x in subsets(word_lens[1:])}
possible_sum.add(0)
# print possible_sum, word_lens, connect_count, connect_count - word_lens[0]
if (connect_count - word_lens[0]) not in possible_sum :
return False
return True
def dfs(board, n, m, x, y, cur, del_c, solutions, word_lens, global_ans,
dictionary, prefix_dict):
if cur not in prefix_dict:
return
if x >= 0 and x < n and y >= 0 and y < m:
if board[x][y] != del_c:
tmp_c = board[x][y]
cur += tmp_c
board[x][y] = del_c
# If len fullfilled
if len(cur) in word_lens:
if cur in dictionary:
if len(word_lens) == 1:
# All keyboard searched
global_ans.append(solutions + [cur])
else:
# If not, still look for the next word
new_board = clear_selected(board, del_c, n)
for u in range(n):
for v in range(m):
if board[u][v] != del_c:
tmp_board = copy.deepcopy(new_board)
cnt = count_connect(tmp_board, n, m, u, v, del_c)
word_lens.remove(len(cur))
if test_is_possible(cnt, word_lens):
dfs(new_board, n, m, u, v, '', del_c,
solutions + [cur], word_lens,
global_ans, dictionary, prefix_dict)
word_lens.append(len(cur))
# Either way, search for the next letter
for diff in DIR:
i = x + diff[0]
j = y + diff[1]
dfs(board, n, m, i, j, cur, del_c, solutions,
word_lens, global_ans, dictionary, prefix_dict)
board[x][y] = tmp_c
def main():
dictionary = get_dict(get_dict_file())
word_lens = get_word_lens()
board = get_board(sum(word_lens))
try:
ans = work_multi(board, word_lens, dictionary)
except:
print "Fall back to single thread"
ans = work(board, word_lens, dictionary)
pprint.pprint(ans)
print "\n" * 3
fdisk = get_freq_dict()
ans = sorted(ans, key=lambda x: -min(fdisk[xx] for xx in x))
pprint.pprint(ans[:20])
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment