Skip to content

Instantly share code, notes, and snippets.

@honnibal
Created May 9, 2015 22:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save honnibal/31612136db3e47135198 to your computer and use it in GitHub Desktop.
Save honnibal/31612136db3e47135198 to your computer and use it in GitHub Desktop.
import sys
def shift(words, stack, c):
return words, stack + c
def reduce(words, stack, c):
return words + (stack,), c
def get_valid_moves(vocab, words, stack):
if not stack or stack not in vocab:
return (shift,)
else:
return (shift, reduce)
def read_vocab(loc):
words = set(['i', 'a'])
with open(loc) as file_:
for line in file_:
word = line.strip().lower()
if len(word) >= 2:
words.add(word)
return words
def parse_telegram(vocab, string, beam_width):
beam = [(tuple(), '')]
for c in string:
new_beam = []
for words, stack in beam:
for move in get_valid_moves(vocab, words, stack):
new_beam.append(move(words, stack, c))
new_beam.sort(key=lambda state: len(state[0]))
beam = new_beam[:beam_width]
for words, stack in beam:
if stack in vocab:
return ' '.join(words + (stack,))
else:
return None
def main(vocab_loc, telegram):
vocab = read_vocab(vocab_loc)
print parse_telegram(vocab, telegram.lower(), len(telegram) * 100)
if __name__ == '__main__':
main(*sys.argv[1:])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment