mikeboers/puzzle.txt

## puzzle.txt
LAHLERIRAHLEDDIKWKT
CNANOCIESRRAUNAFAOG
WKRGGVREGAASNSESLRB
TETEUAEGNLWOGNTKDET
IUHLUHTCITDSERIAAAA
RECKSUPERHEROEWSRRC
EATCTVNCEANRNETGTKK
NBAOAENNHRFAPMAKHEY
IKGPREDATORTARRLVRR
LSYSKCRVFHNSYYIAATU
BKGYERFFOJTEACASDRB
OTANDROIDENLYTKEEAD
GHXENOMORPHTDYBRRTA
TYOGSOTHOTHTWRIBUSR
YTOBORRRLSTARWARSLB
VAJLBRRULILBORIGKLL
TDRAZIWSAKTDIREWOLF
AROFWOTLESRAASEMINA
FGILEANRLFNATACOREI

## solve.py
import argparse

parser = argparse.ArgumentParser()
parser.add_argument('-w', '--words', action='append')
parser.add_argument('puzzle')
args = parser.parse_args()


puzzle = [list(line.strip()) for line in open(args.puzzle).read().strip().split()]
rows = len(puzzle)
cols = len(puzzle[0])
print 'Puzzle is', rows, 'by', cols


dictionary = {}
FIN = '*'


def add_word_to_dictionary(word, node=dictionary):
    node = node.setdefault(word[0], {})
    remaining = word[1:]
    if remaining:
        add_word_to_dictionary(remaining, node)
    else:
        node[FIN] = node.get(FIN, 0) + 1


word_count = 0
for path in args.words or ['/usr/share/dict/words']:
    print 'Loading', path
    for line in open(path):
        line = line.strip()
        if line:
            add_word_to_dictionary(line.upper())
            word_count += 1
print word_count, 'words'


directions = [
    ('N' ,  0, -1),
    ('NE',  1, -1),
    ('E' ,  1,  0),
    ('SE',  1,  1),
    ('S' ,  0,  1),
    ('SW', -1,  1),
    ('W' , -1,  0),
    ('NW', -1, -1),
]


for sx in xrange(cols):
    for sy in xrange(rows):
        for direction, dx, dy in directions:
            solutions = []
            node = dictionary
            x = sx
            y = sy
            word = ''
            while (
                node and
                x >= 0 and x < cols and
                y >= 0 and y < rows
            ):
                if False and len(word) > 2:
                    print '  %2d,%2d %2s: %s' % (sx, sy, direction, word)
                char = puzzle[y][x]
                word += char
                node = node.get(char)
                if node and FIN in node:
                    solutions.append(word)
                x += dx
                y += dy
            for word in sorted(solutions, key=len, reverse=True):
                if len(word) > 3:
                    print '* %2d,%2d %2s: %s' % (sx, sy, direction, word)


## wp_fetch.py
import argparse
import json
import os
import re

import requests


base_url = 'http://en.wikipedia.org/w/api.php'
base_params = dict(
    format='json',
    action='query',
    titles='Main Page',
    prop='revisions',
    rvprop='content',
)


def get_content(title):

    path = os.path.join('wp_data', title +'.json')
    if not os.path.exists(path):
        params = base_params.copy()
        params['titles'] = title
        res = requests.get(base_url, params=params)
        with open(path, 'w') as fh:
            fh.write(res.text)

    data = json.load(open(path))
    try:
        return data['query']['pages'].values()[0]['revisions'][0]['*']
    except (KeyError, IndexError) as e:
        # This only happens on "file" pages, which we don't care about.
        return ''


parser = argparse.ArgumentParser()
parser.add_argument('title', nargs='+')
parser.add_argument('-d', '--depth', type=int, default=0)
parser.add_argument('-o', '--output', nargs='?')
args = parser.parse_args()


def walk(title, depth=0):
    content = get_content(title)
    yield title, content
    if depth <= 0:
        return
    for m in re.finditer(r'\[\[(.+?)(?:\||\]\])', content):
        subtitle = m.group(1)
        for x in walk(subtitle, depth - 1):
            yield x


output = open(args.output, 'w') if args.output else None
seen = set()


for starting_title in args.title:
    for title, content in walk(starting_title, args.depth):
        print title, len(content)
        if output:
            for word in re.findall(r'\b[a-z]+\b', content):
                if word not in seen:
                    output.write(word + '\n')
                    seen.add(word)
	LAHLERIRAHLEDDIKWKT
	CNANOCIESRRAUNAFAOG
	WKRGGVREGAASNSESLRB
	TETEUAEGNLWOGNTKDET
	IUHLUHTCITDSERIAAAA
	RECKSUPERHEROEWSRRC
	EATCTVNCEANRNETGTKK
	NBAOAENNHRFAPMAKHEY
	IKGPREDATORTARRLVRR
	LSYSKCRVFHNSYYIAATU
	BKGYERFFOJTEACASDRB
	OTANDROIDENLYTKEEAD
	GHXENOMORPHTDYBRRTA
	TYOGSOTHOTHTWRIBUSR
	YTOBORRRLSTARWARSLB
	VAJLBRRULILBORIGKLL
	TDRAZIWSAKTDIREWOLF
	AROFWOTLESRAASEMINA
	FGILEANRLFNATACOREI
	import argparse

	parser = argparse.ArgumentParser()
	parser.add_argument('-w', '--words', action='append')
	parser.add_argument('puzzle')
	args = parser.parse_args()


	puzzle = [list(line.strip()) for line in open(args.puzzle).read().strip().split()]
	rows = len(puzzle)
	cols = len(puzzle[0])
	print 'Puzzle is', rows, 'by', cols


	dictionary = {}
	FIN = '*'


	def add_word_to_dictionary(word, node=dictionary):
	node = node.setdefault(word[0], {})
	remaining = word[1:]
	if remaining:
	add_word_to_dictionary(remaining, node)
	else:
	node[FIN] = node.get(FIN, 0) + 1


	word_count = 0
	for path in args.words or ['/usr/share/dict/words']:
	print 'Loading', path
	for line in open(path):
	line = line.strip()
	if line:
	add_word_to_dictionary(line.upper())
	word_count += 1
	print word_count, 'words'


	directions = [
	('N' , 0, -1),
	('NE', 1, -1),
	('E' , 1, 0),
	('SE', 1, 1),
	('S' , 0, 1),
	('SW', -1, 1),
	('W' , -1, 0),
	('NW', -1, -1),
	]


	for sx in xrange(cols):
	for sy in xrange(rows):
	for direction, dx, dy in directions:
	solutions = []
	node = dictionary
	x = sx
	y = sy
	word = ''
	while (
	node and
	x >= 0 and x < cols and
	y >= 0 and y < rows
	):
	if False and len(word) > 2:
	print ' %2d,%2d %2s: %s' % (sx, sy, direction, word)
	char = puzzle[y][x]
	word += char
	node = node.get(char)
	if node and FIN in node:
	solutions.append(word)
	x += dx
	y += dy
	for word in sorted(solutions, key=len, reverse=True):
	if len(word) > 3:
	print '* %2d,%2d %2s: %s' % (sx, sy, direction, word)
	import argparse
	import json
	import os
	import re

	import requests


	base_url = 'http://en.wikipedia.org/w/api.php'
	base_params = dict(
	format='json',
	action='query',
	titles='Main Page',
	prop='revisions',
	rvprop='content',
	)


	def get_content(title):

	path = os.path.join('wp_data', title +'.json')
	if not os.path.exists(path):
	params = base_params.copy()
	params['titles'] = title
	res = requests.get(base_url, params=params)
	with open(path, 'w') as fh:
	fh.write(res.text)

	data = json.load(open(path))
	try:
	return data['query']['pages'].values()[0]['revisions'][0]['*']
	except (KeyError, IndexError) as e:
	# This only happens on "file" pages, which we don't care about.
	return ''


	parser = argparse.ArgumentParser()
	parser.add_argument('title', nargs='+')
	parser.add_argument('-d', '--depth', type=int, default=0)
	parser.add_argument('-o', '--output', nargs='?')
	args = parser.parse_args()


	def walk(title, depth=0):
	content = get_content(title)
	yield title, content
	if depth <= 0:
	return
	for m in re.finditer(r'\[\[(.+?)(?:\\|\|\]\])', content):
	subtitle = m.group(1)
	for x in walk(subtitle, depth - 1):
	yield x


	output = open(args.output, 'w') if args.output else None
	seen = set()


	for starting_title in args.title:
	for title, content in walk(starting_title, args.depth):
	print title, len(content)
	if output:
	for word in re.findall(r'\b[a-z]+\b', content):
	if word not in seen:
	output.write(word + '\n')
	seen.add(word)