Skip to content

Instantly share code, notes, and snippets.

@hdf
Last active January 5, 2023 14:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hdf/ef8f06726c7b1781ed351e5640770c9c to your computer and use it in GitHub Desktop.
Save hdf/ef8f06726c7b1781ed351e5640770c9c to your computer and use it in GitHub Desktop.
Crossword puzzle solving helpers.
# Usage:
# ---
# python combine2len.py [total_len [num_num_spaces ['words.txt' ['banned.txt']]]]
from itertools import combinations
from pprint import pprint
from sys import argv
def get_combinations(strings, num_strings, total_length):
result = []
for combination in combinations(strings, num_strings):
if sum(len(s) for s in combination) == total_length:
result.append(frozenset(combination))
return set(result)
if __name__ == '__main__':
total_len = int(argv[1]) if len(argv) > 1 and int(argv[1]) > 1 else 9
num_spaces = int(argv[2]) if len(argv) > 2 and int(argv[2]) in range(4) else 1 # 0-3
if len(argv) > 3:
with open(argv[3], 'rt') as f:
words = set([l.strip() for l in f])
else:
words = set(['EGYEDÜL', 'A', 'KÁR', 'TE', 'DOHÁNYBOLT', 'VAGY', 'FÉNYEK', 'KICSINY', 'BÁL', 'VAGY', 'ELSŐ', 'AZ', 'MÉLABÚ', 'VARÁZSHEGYEN', 'KÉSŐ', 'NEKEM', 'A', 'JEL', 'HÁZ', 'MÁR', 'DARABOT', 'LE', 'VÁLUNK', 'HELYEM', 'KÉNE', 'CSÚSSZ', 'A', 'TUDOM', 'AZ', 'ÉVAD', 'MOST', 'NEM', 'SZÍVEMBŐL', 'KEZDENÜNK', 'SZÉP', 'MAJD', 'A', 'INDIÁN', 'SOSEM', 'AKI', 'ÚR', 'VIGYÁZZ', 'ÉGNEK', 'VAGY', 'ÉLLEK', 'KIAZAKI', 'MOSD', 'VAGY', 'LÉGY', 'FEHÉRRE', 'TÚL', 'NEM', 'A', 'TANÁR', 'MIÉRT', 'RÁM', 'SPANOM', 'A', 'OPERÁBAN', 'FALUM', 'EGY', 'NYÁR', 'LEONARD', 'KÁR', 'DÜBÖRÖG', 'IMMUNISSÁ', 'NYÁR', 'FONYÓDI', 'VAN', 'KERESEM', 'TÚL'])
if len(argv) > 4:
with open(argv[4], 'rt') as f:
banned = set([l.strip() for l in f])
else:
banned = set(['FONYÓDI', 'MÉLABÚ', 'DÜBÖRÖG', 'HÁZ', 'ÉLLEK', 'TÚL', 'MIÉRT', 'SZÉP', 'NEKEM', 'TUDOM', 'ÉGNEK', 'FÉNYEK', 'KERESEM', 'HELYEM', 'DOHÁNYBOLT', 'SPANOM', 'VAN', 'KICSINY', 'FALUM', 'LÉGY', 'AKI', 'VIGYÁZZ', 'RÁM'])
words -= banned
res = get_combinations(words, num_spaces + 1, total_len - num_spaces)
sorted_sub = [sorted(sorted(e), key=len, reverse=True) for e in res]
if num_spaces == 0:
sorted_by_len = sorted(sorted([list(e)[0] for e in res]), key=len, reverse=True)
elif num_spaces == 1:
sorted_by_alpha = sorted(sorted_sub, key=lambda e: (e[0], e[1]))
sorted_by_len = sorted(sorted_by_alpha, key=lambda e: (-len(e[0]), -len(e[1])))
elif num_spaces == 2:
sorted_by_alpha = sorted(sorted_sub, key=lambda e: (e[0], e[1], e[2]))
sorted_by_len = sorted(sorted_by_alpha, key=lambda e: (-len(e[0]), -len(e[1]), -len(e[2])))
elif num_spaces == 3:
sorted_by_alpha = sorted(sorted_sub, key=lambda e: (e[0], e[1], e[2], e[3]))
sorted_by_len = sorted(sorted_by_alpha, key=lambda e: (-len(e[0]), -len(e[1]), -len(e[2]), -len(e[3])))
pprint(sorted_by_len)
print(len(res))
# Prerequisites:
# ---
# conda install pyenchant
# From:
# https://github.com/LibreOffice/dictionaries/tree/master/hu_HU
# Copy 'hu_HU.aff' and 'hu_HU.dic' to:
# H:\Anaconda3\Lib\site-packages\enchant\data\mingw64\share\enchant\hunspell
# "magyar-szavak.txt" from:
# https://gist.github.com/Konstantinusz/f9517357e46fa827c3736031ac8d01c7
# Usage:
# ---
# python findword.py ['string' [sought_length ['hu_HU']]]
from sys import argv, exit
from collections import Counter, defaultdict
from itertools import combinations
import enchant
def find_all_anagrams(seek_word):
result = []
seek_word_counter = Counter(seek_word.lower())
for word in open('magyar-szavak.txt', mode='r', encoding='utf-8'):
word = word.strip()
if len(word) > 1 and word not in result:
word_counter = Counter(word)
if all(n <= seek_word_counter[letter] for letter, n in word_counter.items()):
result.append(word)
return result
def len_histogram(strings):
hist = defaultdict(int)
for s in strings:
hist[len(s)] += 1
return hist
def group_by_length(strings):
groups = defaultdict(list)
for s in strings:
groups[len(s)].append(s)
return groups
def pair_by_length(groups, dct, length):
result = []
for l in range(1, length):
if l in groups and length - l in groups:
for s1 in groups[l]:
for s2 in groups[length - l]:
compound_str = s1 + s2
# Unfortunately, this is useless, as enchant accepts words it should not.
if compound_str not in result and dct.check(compound_str):
result.append(compound_str)
compound_str = s2 + s1
if compound_str not in result and dct.check(compound_str):
result.append(compound_str)
return result
if __name__ == '__main__':
chars = argv[1] if len(argv) > 1 else 'PLASMFHPASYŰARÁKTLBPCRIS'
length = int(argv[2]) if len(argv) > 2 else 14
dct = argv[3] if len(argv) > 3 else 'hu_HU'
if not enchant.dict_exists(dct):
print(dct, 'not found, installed dictionaries:', enchant.list_languages())
exit()
d = enchant.Dict(dct)
substrings = sorted(sorted(find_all_anagrams(chars)), key=len, reverse=True)
string_groups = group_by_length(substrings)
print(string_groups)
with open('found_words.txt', mode='wt', encoding='utf-8') as fp:
fp.writelines(s + '\n' for s in substrings)
print(len_histogram(substrings))
#with open('compund_words.txt', mode='wt', encoding='utf-8') as fp:
# fp.writelines(s + '\n' for s in pair_by_length(string_groups, d, length))
# Prerequisites:
# ---
# conda install pyenchant more-itertools
# From:
# https://github.com/LibreOffice/dictionaries/tree/master/hu_HU
# Copy 'hu_HU.aff' and 'hu_HU.dic' to:
# H:\Anaconda3\Lib\site-packages\enchant\data\mingw64\share\enchant\hunspell
# Usage:
# ---
# python xwordsolver.py ['input.csv' 'output.csv' 'found_words.csv' [min_word_length ['hu_HU' [nth_unused]]]]
# Also usefull:
# https://www.szogenerator.hu/
import enchant, numpy as np, string
from more_itertools import substrings_indexes
from sys import argv, exit
input_file = argv[1] if len(argv) > 3 else 'tabla.csv'
output_file = argv[2] if len(argv) > 3 else 'tabla_kesz.csv'
found_words_file = argv[3] if len(argv) > 3 else 'talalt_szavak.csv'
min_length = int(argv[4]) if len(argv) > 4 else 4
nth = int(argv[6]) if len(argv) > 6 else 2
#print(enchant.list_languages())
dct = argv[5] if len(argv) > 5 else 'hu_HU'
if not enchant.dict_exists(dct):
print(dct, 'not found, installed dictionaries:', enchant.list_languages())
exit()
d = enchant.Dict(dct)
extra_words = ['OREGÁNÓ', 'LILAKÁPOSZTA', 'LILAHAGYMA', 'FEKETEBORS']
for w in extra_words:
if not d.check(w):
d.add(w)
banned_words = ['IRAM', 'GUTIS', 'MIND', 'PIÉT', 'KUKK', 'CSÓRD', 'ÉTOLAJÉ', 'LASZA', 'BUZI', 'APÓI', 'SORS', 'ISZOD', 'NANA', 'ÁSÓK', 'ALAK', 'ASMÉHI', 'BORA', 'GUTH', 'SCPM', 'ZSIL', 'KAKUKKFŰZ', 'TROPA', 'ÁTSI', 'PÁKA', 'AVAR', 'TEKE', 'HALI', 'DÉLT', 'KAIN', 'ÓNÁG', 'KARÉ', 'COLTI', 'RENG', 'INAL', 'KAPORT', 'SKATÓ', 'OBRA', 'ZSELÉK', 'PAFF', 'BOYI', 'FOST', 'FÚST', 'GUTI', 'CSÓR', 'ASZAB', 'GISZ', 'ASMÉH', 'FIID', 'ZAYK', 'COLT', 'KATÓ', 'LASZ', 'MÉHI', 'ASZA', 'KUKA', 'SZAB', 'TÁLAS', 'TÁLA', 'DARA', 'ZSELÉ', 'FOKHAGYMAPORA', 'PITA', 'BUCI']
special_words = ['SÓ', 'VÍZ', 'HÚS']
data = np.genfromtxt(input_file, delimiter=';', encoding='utf-8', dtype=str)
data_coords = np.zeros(data.shape, dtype=object)
for i in range(data.shape[0]):
for j in range(data.shape[1]):
data[i, j] = data[i, j].upper()
data_coords[i, j] = (i, j)
data_out = np.copy(data)
found_longest_words = []
data_vertical = data.T
data_coords_vertical = data_coords.T
data_reverse = np.fliplr(data)
data_coords_reverse = np.fliplr(data_coords)
data_vertical_reverse = np.fliplr(data_vertical)
data_coords_vertical_reverse = np.fliplr(data_coords_vertical)
data_diagonal_right = [data[::-1, :].diagonal(i) for i in range(-data.shape[0] + 1, data.shape[1])]
data_coords_diagonal_right = [data_coords[::-1, :].diagonal(i) for i in range(-data_coords.shape[0] + 1, data_coords.shape[1])]
data_diagonal_left = [data.diagonal(i) for i in range(data.shape[1] - 1, -data.shape[0], -1)]
data_coords_diagonal_left = [data_coords.diagonal(i) for i in range(data_coords.shape[1] - 1, -data_coords.shape[0], -1)]
data_diagonal_right_reverse = [row[::-1] for row in data_diagonal_right]
data_coords_diagonal_right_reverse = [row[::-1] for row in data_coords_diagonal_right]
data_diagonal_left_reverse = [row[::-1] for row in data_diagonal_left]
data_coords_diagonal_left_reverse = [row[::-1] for row in data_coords_diagonal_left]
views = (data, data_vertical, data_reverse, data_vertical_reverse, data_diagonal_right, data_diagonal_left, data_diagonal_right_reverse, data_diagonal_left_reverse)
coord_views = (data_coords, data_coords_vertical, data_coords_reverse, data_coords_vertical_reverse, data_coords_diagonal_right, data_coords_diagonal_left, data_coords_diagonal_right_reverse, data_coords_diagonal_left_reverse)
view_direction_arrows = [u'\u2192', u'\u2193', u'\u2190', u'\u2191', u'\u2197', u'\u2198', u'\u2199', u'\u2196']
view_direction_strings = ['R', 'D', 'L', 'U', 'RU', 'RD', 'LD', 'LU']
print('Found words and their starting location (in Excel format as well) and direction:')
def n2a(n, b=string.ascii_uppercase):
d, m = divmod(n, len(b))
return n2a(d - 1, b) + b[m] if d else b[m]
def coord_to_excel(xy):
return n2a(xy[1]) + str(xy[0] + 1)
for view_index in range(len(views)):
view = views[view_index]
for i in range(len(view)):
if len(view[i]) < min_length: # In diagonal views skip corners.
continue
#print(view_index, i)
found_strs = []
for substr in substrings_indexes(view[i], reverse=True):
if len(substr[0]) < min_length and ''.join(substr[0]) not in special_words:
continue
current_str = ''.join(substr[0])
if d.check(current_str) and current_str not in banned_words:
if len(found_strs) > 0 and any(current_str in found_str for found_str in found_strs):
#print(current_str, 'in', found_strs, substr[1:])
continue
found_strs.append(current_str)
coords = coord_views[view_index][i][substr[1]]
print(
current_str,
coords,
coord_to_excel(coords),
view_direction_strings[view_index],
view_direction_arrows[view_index]
)
# We remove the largest found strings from the output table.
if len(found_strs) == 1:
found_longest_words.append(current_str)
for j in range(substr[1], substr[2]):
data_out[coord_views[view_index][i][j]] = ' '
np.savetxt(output_file, data_out, fmt='%s', delimiter=';', encoding='utf-8')
np.savetxt(found_words_file, found_longest_words, fmt='%s', delimiter=';', encoding='utf-8')
def first_nth_unused(arr2d, n=2):
result = []
for row in arr2d:
at = 1
for i in range(len(row)):
if at == n and row[i] != ' ':
result.append(row[i])
break
if row[i] != ' ':
at += 1
return ''.join(result)
def last_nth_unused(arr2d, n=2):
result = []
for row in arr2d:
at = 1
for i in reversed(range(len(row))):
if at == n and row[i] != ' ':
result.append(row[i])
break
if row[i] != ' ':
at += 1
return ''.join(result)
print('\nFrom each row, we take the ' + str(nth) + 'th not found character, from the left:')
print(first_nth_unused(data_out, nth))
print('\nFrom each row, we take the ' + str(nth) + 'th not found character, from the right:')
print(last_nth_unused(data_out, nth))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment