Skip to content

Instantly share code, notes, and snippets.

@se4u
Last active September 9, 2015 03:09
Show Gist options
  • Save se4u/742ac170c4ebbb61e3e4 to your computer and use it in GitHub Desktop.
Save se4u/742ac170c4ebbb61e3e4 to your computer and use it in GitHub Desktop.
A shift reduce probabilistic PDA
'''
| Filename : shift-reduce-ppda.py
| Description : A shift Reduce PPDA generator
| Author : Pushpendre Rastogi
| Created : Tue Sep 8 20:42:22 2015 (-0400)
| Last-Updated: Tue Sep 8 23:09:28 2015 (-0400)
| By: Pushpendre Rastogi
| Update #: 33
The paper "Relating Probabilistic Grammar and Automata" compare PCFGs and PPDAs.
They also explain how a PPDA produces a sentence. This class implements a PPDA
on a simple grammar.
The paper's first main contribution was to show that using the Chelba-Jelinek
method it was not possible to encode a specific PCFG as a shift-reduce PPDA.
Assuming that the context was bounded.
Then they showed a way to get around this problem by creating a Top-Down PPDA
that could be arbitrarily inter-converted to PCFG (and any arbitrary PCFG could
be converted to Top Down PPDA).
Finally they showed that by enriching the context in a shift reduce PPDA with
a "richer stack and state alphabets" they could encode any arbitrary PCFG as a
left to right Shift Reduce PPDA but unfortunately that involved exponential
blowup in the context required.
'''
import collections
import unittest
class StackElem(object):
__slots__ = ['syn_cat', 'terminal']
def __init__(self, syn_cat, terminal):
"""
Params
------
syn_cat : The syntactic category
terminal : The terminal word.
"""
self.syn_cat = syn_cat
self.terminal = terminal
def __str__(self):
return '%s/%s' % (self.terminal, self.syn_cat)
def __repr__(self):
return self.__str__()
class CantCombine(Exception):
pass
class ShiftReducePPDA_Rules(object):
'''
This class is a hack right now: always combines things in the item_stage
from left to right.
TODO: Make this an abstract class and derive the hack from it.
'''
def __init__(self):
exec ''
for e in 'DT N NP JJ V VP S'.split():
locals()[e] = e
self.combinations = {(DT, N): NP,
(JJ, N): NP,
(DT, NP): NP,
(V, NP): VP,
(NP, VP): S}
sentence = 'The_DT small_JJ woman_N lifted_V the_DT fat_JJ man_N'.split()
self.terminals = [e.split('_')[0] for e in sentence]
self.nonterminals = [e.split('_')[1] for e in sentence]
def combine(self, item_stage):
assert len(item_stage) > 1
if len(item_stage) == 2:
syn_cat_0 = item_stage[0].syn_cat
terminal_0 = item_stage[0].terminal
syn_cat_1 = item_stage[1].syn_cat
terminal_1 = item_stage[1].terminal
try:
syn_cat = self.combinations[(syn_cat_0, syn_cat_1)]
except KeyError:
raise CantCombine
return StackElem(syn_cat, ' '.join([terminal_0, terminal_1]))
else:
return self.combine([item_stage[0], self.combine(item_stage[1:])])
def conditional_generate_item(self, top_item):
idx = (0
if top_item is None
else self.terminals.index(
top_item.terminal.split()[-1]) + 1)
return StackElem(syn_cat=self.nonterminals[idx],
terminal=self.terminals[idx])
def can_combine(self, item_stage):
try:
self.combine(item_stage)
except CantCombine:
return False
return True
class ShiftReducePPDA(object):
def __init__(self, ppda_rules=None):
self.rules = (ShiftReducePPDA_Rules()
if ppda_rules is None
else ppda_rules)
self.stack = collections.deque()
def generate_sentence(self):
self.stack.clear()
while True:
self.shift()
self.reduce()
if self.stop_shift():
break
return list(self.stack)
def shift(self):
self.stack.append(self.generate_stack_item())
def generate_stack_item(self):
# Stochastically generate a new stack item conditioned on top 1
# element of the stack
try:
top_item = self.stack[-1]
except IndexError:
top_item = None
return self.rules.conditional_generate_item(top_item)
def reduce(self):
while True:
if self.can_reduce():
item_stage = []
item_stage.insert(0, self.stack.pop())
item_stage.insert(0, self.stack.pop())
self.stack.append(self.rules.combine(item_stage))
else:
break
def stop_shift(self):
return self.stack[-1].syn_cat == 'S'
def can_reduce(self):
if len(self.stack) < 2:
return False
return self.rules.can_combine(
[self.stack[-2], self.stack[-1]])
def generate(self, num_sentence=1):
for _ in range(num_sentence):
print self.generate_sentence()
class TopDownPPDA(object):
pass
class TestShiftReducePPDA(unittest.TestCase):
def test_can_reduce(self):
obj = ShiftReducePPDA()
obj.shift()
obj.shift()
obj.shift()
self.assertTrue(obj.can_reduce())
def test_generate_sentence(self):
obj = ShiftReducePPDA()
print obj.generate_sentence()
if __name__ == '__main__':
unittest.main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment