Created
October 31, 2012 04:47
-
-
Save joelverhagen/3984837 to your computer and use it in GitHub Desktop.
Solve Dan's hangman puzzle. This is cludgy code. Please disregard.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# digraphs, sorted by how common they are in the english language | |
digraphs = ['th', 'he', 'in', 'er', 'an', 're', 'on', 'at', 'en', 'nd', 'st', 'or', 'te', 'es', 'is', 'ha', 'ou', 'it', 'to', 'ed', 'ti', 'ng', 'ar', 'se', 'al', 'nt', 'as', 'le', 've', 'of', 'me', 'hi', 'ea', 'ne', 'de', 'co', 'ro', 'll', 'ri', 'li', 'ra', 'io', 'be', 'el', 'ch', 'ic', 'ce', 'ta', 'ma', 'ur', 'om', 'ho', 'et', 'no', 'ut', 'si', 'ca', 'la', 'il', 'fo', 'us', 'pe', 'ot', 'ec', 'lo', 'di', 'ns', 'ge', 'ly', 'ac', 'wi', 'wh', 'tr', 'ee', 'so', 'un', 'rs', 'wa', 'ow', 'id', 'ad', 'ai', 'ss', 'pr', 'ct', 'we', 'mo', 'ol', 'em', 'nc', 'rt', 'sh', 'po', 'ie', 'ul', 'im', 'ts', 'am', 'ir', 'yo', 'fi', 'os', 'pa', 'ni', 'ld', 'sa', 'ay', 'ke', 'mi', 'na', 'oo', 'su', 'do', 'ig', 'ev', 'gh', 'bl', 'if', 'tu', 'av', 'pl', 'wo', 'ry', 'bu', 'iv', 'ab', 'ia', 'vi', 'ex', 'op', 'bo', 'fe', 'ag', 'ci', 'da', 'mp', 'tt', 'sp', 'ck', 'ty', 'fr', 'ei', 'ap', 'rd', 'gr', 'od', 'ef', 'go', 'ba', 'ey', 'cl', 'cr', 'ov', 'ht', 'rn', 'fa', 'ls', 'gi', 'sc', 'up', 'cu', 'ue', 'ep', 'ga', 'ak', 'va', 'ff', 'uc', 'ki', 'by', 'qu', 'ew', 'ug', 'au', 'rr', 'rm', 'ds', 'oc', 'um', 'og', 'pp', 'ru', 'pi', 'rc', 'lu', 'oi', 'tl', 'my', 'ye', 'ua', 'eg', 'mu', 'dr', 'lt', 'ny', 'bi', 'pu', 'br', 'mb', 'ob', 'pt', 'ft', 'ui', 'ys', 'ub', 'ud', 'hr', 'rg', 'du', 'fu', 'rl', 'ok', 'nk', 'ms', 'wn', 'mm', 'eo', 'nu', 'ib', 'rk', 'hu', 'af', 'nl', 'nn', 'vo', 'cc', 'ik', 'tw', 'gu', 'aw', 'xt', 'ph', 'sm', 'ip', 'lf', 'dd', 'kn', 'gs', 'fl', 'iz', 'oa', 'ju', 'ks', 'gl', 'nf', 'ps', 'ze', 'xp', 'sl', 'rv', 'gn', 'sk', 'eq', 'dy', 'tc', 'nv', 'hy', 'sy', 'dl', 'bs', 'je', 'jo', 'ws', 'oe', 'mr', 'gg', 'eb', 'yi', 'sw', 'rp', 'wr', 'cy', 'rf', 'xi', 'ja', 'xa', 'oy', 'tm', 'lv', 'yp', 'dg', 'cs', 'lp', 'lm', 'eu', 'ox', 'eh', 'xc', 'ka', 'yt', 'nm', 'ek', 'ax', 'lk', 'ym', 'sn', 'ae', 'rb', 'uf', 'tp', 'ya', 'ix', 'za', 'dn', 'bj', 'dv', 'gy', 'tf', 'ah', 'hs', 'xe', 'ko', 'py', 'gt', 'az', 'dm', 'rh', 'sd', 'oh', 'bt', 'wl', 'lw', 'hm', 'lc', 'rw', 'hn', 'kl', 'yl', 'lr', 'bb', 'tn', 'zi', 'yb', 'np', 'pm', 'aq', 'hl', 'gm', 'nh', 'xy', 'ln', 'cp', 'fs', 'yc', 'sf', 'fy', 'yn', 'iu', 'dt', 'bc', 'td', 'mn', 'ku', 'sr', 'uo', 'ml', 'tb', 'nj', 'cm', 'ky', 'aj', 'zo', 'db', 'uy', 'ww', 'dw', 'pc', 'ii', 'nw', 'nr', 'oj', 'ao', 'sq', 'sb', 'iq', 'yr', 'mg', 'sg', 'pd', 'dc', 'nb', 'mt', 'cd', 'lg', 'vp', 'df', 'hb', 'yw', 'oz', 'pv', 'ez', 'mc', 'lb', 'hd', 'nq', 'tg', 'wt', 'kh', 'dp', 'tz', 'mv', 'wd', 'zz', 'fg', 'fc', 'zu', 'yd', 'xu', 'cq', 'ej', 'bv', 'vy', 'kg', 'cg', 'md', 'hw', 'mf', 'tv', 'ji', 'uz', 'gc', 'vn', 'wy', 'qi', 'tx', 'dh', 'ih', 'uk', 'kr', 'bm', 'aa', 'wp', 'fn', 'yg', 'kb', 'pg', 'cn', 'xh', 'zy', 'qw', 'wx', 'xx', 'gb', 'fd', 'sz', 'yu', 'xo', 'ux', 'gd', 'hk', 'gf', 'nx', 'bd', 'nz', 'kf', 'wm', 'ij', 'wf', 'jp', 'kw', 'hf', 'xs', 'hp', 'vs', 'sv', 'hc', 'pf', 'wc', 'dj', 'kt', 'dk', 'fh', 'uv', 'uh', 'bh', 'xf', 'yz', 'pk', 'kp', 'zl', 'bn', 'vu', 'bg', 'fp', 'wb', 'wk', 'cf', 'fx', 'fb', 'dx', 'xm', 'xn', 'lh', 'qa', 'vt', 'zh', 'wu', 'cb', 'yh', 'gp', 'jm', 'pb', 'fm', 'pw', 'fw', 'bw', 'vd', 'km', 'kk', 'iy', 'yf', 'xv', 'xb', 'kd', 'mw', 'jb', 'bp', 'rx', 'gw', 'ql', 'rq', 'xd', 'rz', 'xl', 'jl', 'vl', 'js', 'uu', 'tj', 'qq', 'vv', 'jt', 'lq', 'yv', 'hg', 'pn', 'hq', 'tk', 'rj', 'hv', 'cx', 'oq', 'hh', 'mh', 'lx', 'jf', 'gv', 'vr', 'qr', 'cz', 'gk', 'vh', 'sx', 'jc', 'kc', 'cv', 'bk', 'bf', 'qn', 'iw', 'dq', 'zn', 'bx', 'xr', 'vc', 'gz', 'qs', 'zs', 'jr', 'zw', 'zb', 'fk', 'dz', 'gx', 'jd', 'yk', 'vm', 'vb', 'qe', 'vk', 'cw', 'zt', 'fv', 'mx', 'vg', 'lz', 'yy', 'zc', 'zg', 'zm', 'lj', 'px', 'wg', 'sj', 'xq', 'mk', 'uj', 'yj', 'xg', 'zj', 'yx', 'uq', 'pz', 'xw', 'jk', 'cj', 'bz', 'qc', 'zk', 'kv', 'mj', 'tq', 'jh', 'jn', 'fz', 'zd', 'kj', 'wv', 'vx', 'zv', 'fq', 'kq', 'uw', 'pq', 'zx', 'zf', 'vw', 'xk', 'zp', 'xj', 'vj', 'jj', 'qt', 'qz', 'xz', 'pj', 'hj', 'bq', 'mq', 'qd', 'qv', 'jz', 'jq', 'jy', 'jx', 'kx', 'qm', 'vf', 'qo', 'hz', 'zq', 'fj', 'zr', 'jv', 'wz', 'yq', 'wq', 'jg', 'gj', 'wj', 'qb', 'gq', 'jw', 'mz', 'qy', 'kz', 'hx'] | |
alphabet = set(map(chr, range(97, 97 + 26))) | |
# the letters I guessed | |
word = [None, 'l', 'e', None, 't', 'o', 'r'] | |
# the letters I guess that are wrong | |
failed_letters = set(['c', 's', 'a', 'f']) | |
# set logics | |
success_letters = set(word) - set([None]) | |
used_letters = failed_letters | success_letters | |
unused_letters = alphabet - used_letters | |
matches = {} | |
# CHECK ALL THE DIGRAPHS! | |
letter_limit = 1000 | |
for i in range(len(word)): | |
letter = word[i] | |
if letter is not None: | |
continue | |
# MESS | |
if i > 0: | |
prev = word[i - 1] | |
for digraph in digraphs: | |
if digraph[0] == prev: | |
match = digraph[1] | |
if match in unused_letters: | |
if i not in matches: | |
matches[i] = [match] | |
elif len(matches[i]) < letter_limit: | |
matches[i].append(match) | |
# ANOTHER MESS | |
if i < len(word) - 1: | |
next = word[i + 1] | |
for digraph in digraphs: | |
if digraph[1] == next: | |
match = digraph[0] | |
if match in unused_letters: | |
if i not in matches: | |
matches[i] = [match] | |
elif len(matches[i]) < letter_limit: | |
matches[i].append(match) | |
import itertools | |
keys = sorted(matches.keys()) | |
candidates = set() | |
# fill all of the empty spots with all possibilities | |
for blanks in itertools.product(*list(map(lambda key: matches[key], keys))): | |
test_word = list(word) | |
for i in range(len(keys)): | |
test_word[keys[i]] = blanks[i] | |
candidates.add(''.join(test_word)) | |
# function to see how many hits a given search returns on Amazon | |
import requests | |
import lxml.html | |
import re | |
def get_amazon_hit_count(search): | |
response = requests.get('http://www.amazon.com/s/', params={ | |
'field-keywords': search | |
}, headers={ | |
'User-Agent': 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.94 Safari/537.4' | |
}) | |
html = lxml.html.fromstring(response.text) | |
span = html.xpath('.//h2[@id = "resultCount"]/span') | |
if len(span) > 0: | |
count_text = span[0].text.strip() | |
m = re.search(r'^Showing (\d+) - (\d+) of (?P<result_count>\d+) Results$', count_text) | |
if m is not None: | |
return int(m.group('result_count')) | |
m = re.search(r'^Showing (?P<result_count>\d+) Results?', count_text) | |
if m is not None: | |
return int(m.group('result_count')) | |
raise Exception('Unexpected result count text: ' + count_text) | |
return 0 | |
# try every candidate. | |
hits = [] | |
for c in candidates: | |
count = get_amazon_hit_count(c) | |
# print('Found %s with %d hits.' % (c, count)) | |
if count > 0: | |
hits.append((c, count)) | |
print('Likely hits:') | |
hits = sorted(hits, key=lambda hit: hit[1], reversed=True) | |
for hit in hits: | |
print('- %s, with %d hits.' % hit) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Likely hits: | |
- plextor, with 195 hits. | |
- kleitor, with 10 hits. | |
- blentor, with 1 hits. | |
- ilektor, with 1 hits. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment