Skip to content

Instantly share code, notes, and snippets.

@temoto
Last active December 18, 2016 20:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save temoto/4ce8dddfd74a9fc7983e6922b307e38c to your computer and use it in GitHub Desktop.
Save temoto/4ce8dddfd74a9fc7983e6922b307e38c to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import argparse
import bisect
import itertools
import sys
# % cat wordlist
# a b c
# pig bird
# rig sig
# % cat input
# 2473
# bird vine
# windows
# % ./phoneword.py -words wordlist <input
# 2473 bird
# bird 2473
# vine 8463
# windows 9463697
cmdline = argparse.ArgumentParser()
cmdline.add_argument('-debug', default=False, action='store_true')
cmdline.add_argument('-test', default=False, action='store_true', help='run internal tests')
cmdline.add_argument('-words', metavar='FILE', required=True, help='path to words dictionary')
keymap = {
'1': '', '2': 'abcабвг', '3': 'defдеёжз',
'4': 'ghiийкл', '5': 'jklмноп', '6': 'mnoрсту',
'7': 'pqrsфхцч', '8': 'tuvшщъы', '9': 'wxyzьэюя',
'0': '',
}
alpha_num_map = {l: d for d, word in keymap.items() for l in word}
config_languages = (
'abcdefghijklmnopqrstuvwxyz',
'абвгдеёжзиклмнопрстуфхцчшщъыьэюя',
)
language_map = {c: i for i, cs in enumerate(config_languages) for c in cs}
words = set()
words_sorted = []
debug = False
def log_debug(msg):
if debug:
sys.stderr.write('DEBUG ' + msg + '\n')
def log(msg):
sys.stderr.write(msg + '\n')
def single_language(w):
ls = {language_map[c] for c in w if c in language_map}
return len(ls) == 1
def words_like(win):
i1 = bisect.bisect_left(words_sorted, win)
i2 = bisect.bisect_right(words_sorted, win + 'Ω')
ws = [w for w in words_sorted[i1:i2+1] if w.startswith(win)]
log_debug('like({win}) i1={i1} i2={i2} result={ws}'.format(**locals()))
yield from ws
def translate(w):
log_debug('translate({0})'.format(w))
if w.isdigit():
g = (''.join(row) for row in itertools.product(*[keymap[d] for d in w]) if single_language(row))
if debug:
g = tuple(g)
log_debug('translate({0}) g={1}'.format(w, g))
xs = tuple(x for w in g for x in words_like(w))
return xs
else: # word
return (''.join(alpha_num_map.get(l, '') for l in w),)
def run_tests():
assert '2473' in translate('bird')
assert set('abc') == set(translate('2'))
assert 'pig' in translate('744')
def main():
flags = cmdline.parse_args()
if flags.debug:
global debug
debug = True
try:
with open(flags.words, 'rt') as f:
for line in f:
words.update(line.split())
except IOError as e:
log('error reading words from {}'.format(flags.words))
log(e)
return 1
words_sorted.extend(sorted(words))
if flags.test:
return run_tests()
for line in sys.stdin:
for word in line.split():
for t in translate(word):
print('{0}\t{1}'.format(word, t))
if __name__ == '__main__':
try:
sys.exit(main())
except KeyboardInterrupt:
sys.exit(1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment