Last active
September 9, 2015 03:09
-
-
Save se4u/742ac170c4ebbb61e3e4 to your computer and use it in GitHub Desktop.
A shift reduce probabilistic PDA
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
| Filename : shift-reduce-ppda.py | |
| Description : A shift Reduce PPDA generator | |
| Author : Pushpendre Rastogi | |
| Created : Tue Sep 8 20:42:22 2015 (-0400) | |
| Last-Updated: Tue Sep 8 23:09:28 2015 (-0400) | |
| By: Pushpendre Rastogi | |
| Update #: 33 | |
The paper "Relating Probabilistic Grammar and Automata" compare PCFGs and PPDAs. | |
They also explain how a PPDA produces a sentence. This class implements a PPDA | |
on a simple grammar. | |
The paper's first main contribution was to show that using the Chelba-Jelinek | |
method it was not possible to encode a specific PCFG as a shift-reduce PPDA. | |
Assuming that the context was bounded. | |
Then they showed a way to get around this problem by creating a Top-Down PPDA | |
that could be arbitrarily inter-converted to PCFG (and any arbitrary PCFG could | |
be converted to Top Down PPDA). | |
Finally they showed that by enriching the context in a shift reduce PPDA with | |
a "richer stack and state alphabets" they could encode any arbitrary PCFG as a | |
left to right Shift Reduce PPDA but unfortunately that involved exponential | |
blowup in the context required. | |
''' | |
import collections | |
import unittest | |
class StackElem(object): | |
__slots__ = ['syn_cat', 'terminal'] | |
def __init__(self, syn_cat, terminal): | |
""" | |
Params | |
------ | |
syn_cat : The syntactic category | |
terminal : The terminal word. | |
""" | |
self.syn_cat = syn_cat | |
self.terminal = terminal | |
def __str__(self): | |
return '%s/%s' % (self.terminal, self.syn_cat) | |
def __repr__(self): | |
return self.__str__() | |
class CantCombine(Exception): | |
pass | |
class ShiftReducePPDA_Rules(object): | |
''' | |
This class is a hack right now: always combines things in the item_stage | |
from left to right. | |
TODO: Make this an abstract class and derive the hack from it. | |
''' | |
def __init__(self): | |
exec '' | |
for e in 'DT N NP JJ V VP S'.split(): | |
locals()[e] = e | |
self.combinations = {(DT, N): NP, | |
(JJ, N): NP, | |
(DT, NP): NP, | |
(V, NP): VP, | |
(NP, VP): S} | |
sentence = 'The_DT small_JJ woman_N lifted_V the_DT fat_JJ man_N'.split() | |
self.terminals = [e.split('_')[0] for e in sentence] | |
self.nonterminals = [e.split('_')[1] for e in sentence] | |
def combine(self, item_stage): | |
assert len(item_stage) > 1 | |
if len(item_stage) == 2: | |
syn_cat_0 = item_stage[0].syn_cat | |
terminal_0 = item_stage[0].terminal | |
syn_cat_1 = item_stage[1].syn_cat | |
terminal_1 = item_stage[1].terminal | |
try: | |
syn_cat = self.combinations[(syn_cat_0, syn_cat_1)] | |
except KeyError: | |
raise CantCombine | |
return StackElem(syn_cat, ' '.join([terminal_0, terminal_1])) | |
else: | |
return self.combine([item_stage[0], self.combine(item_stage[1:])]) | |
def conditional_generate_item(self, top_item): | |
idx = (0 | |
if top_item is None | |
else self.terminals.index( | |
top_item.terminal.split()[-1]) + 1) | |
return StackElem(syn_cat=self.nonterminals[idx], | |
terminal=self.terminals[idx]) | |
def can_combine(self, item_stage): | |
try: | |
self.combine(item_stage) | |
except CantCombine: | |
return False | |
return True | |
class ShiftReducePPDA(object): | |
def __init__(self, ppda_rules=None): | |
self.rules = (ShiftReducePPDA_Rules() | |
if ppda_rules is None | |
else ppda_rules) | |
self.stack = collections.deque() | |
def generate_sentence(self): | |
self.stack.clear() | |
while True: | |
self.shift() | |
self.reduce() | |
if self.stop_shift(): | |
break | |
return list(self.stack) | |
def shift(self): | |
self.stack.append(self.generate_stack_item()) | |
def generate_stack_item(self): | |
# Stochastically generate a new stack item conditioned on top 1 | |
# element of the stack | |
try: | |
top_item = self.stack[-1] | |
except IndexError: | |
top_item = None | |
return self.rules.conditional_generate_item(top_item) | |
def reduce(self): | |
while True: | |
if self.can_reduce(): | |
item_stage = [] | |
item_stage.insert(0, self.stack.pop()) | |
item_stage.insert(0, self.stack.pop()) | |
self.stack.append(self.rules.combine(item_stage)) | |
else: | |
break | |
def stop_shift(self): | |
return self.stack[-1].syn_cat == 'S' | |
def can_reduce(self): | |
if len(self.stack) < 2: | |
return False | |
return self.rules.can_combine( | |
[self.stack[-2], self.stack[-1]]) | |
def generate(self, num_sentence=1): | |
for _ in range(num_sentence): | |
print self.generate_sentence() | |
class TopDownPPDA(object): | |
pass | |
class TestShiftReducePPDA(unittest.TestCase): | |
def test_can_reduce(self): | |
obj = ShiftReducePPDA() | |
obj.shift() | |
obj.shift() | |
obj.shift() | |
self.assertTrue(obj.can_reduce()) | |
def test_generate_sentence(self): | |
obj = ShiftReducePPDA() | |
print obj.generate_sentence() | |
if __name__ == '__main__': | |
unittest.main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment