se4u/shift-reduce-ppda.py

## shift-reduce-ppda.py
'''
| Filename    : shift-reduce-ppda.py
| Description : A shift Reduce PPDA generator
| Author      : Pushpendre Rastogi
| Created     : Tue Sep  8 20:42:22 2015 (-0400)
| Last-Updated: Tue Sep  8 23:09:28 2015 (-0400)
|           By: Pushpendre Rastogi
|     Update #: 33
The paper "Relating Probabilistic Grammar and Automata" compare PCFGs and PPDAs.
They also explain how a PPDA produces a sentence. This class implements a PPDA
on a simple grammar.

The paper's first main contribution was to show that using the Chelba-Jelinek
method it was not possible to encode a specific PCFG as a shift-reduce PPDA.
Assuming that the context was bounded.

Then they showed a way to get around this problem by creating a Top-Down PPDA
that could be arbitrarily inter-converted to PCFG (and any arbitrary PCFG could
be converted to Top Down PPDA).

Finally they showed that by enriching the context in a shift reduce PPDA with
a "richer stack and state alphabets" they could encode any arbitrary PCFG as a
left to right Shift Reduce PPDA but unfortunately that involved exponential
blowup in the context required.
'''
import collections
import unittest


class StackElem(object):
    __slots__ = ['syn_cat', 'terminal']

    def __init__(self, syn_cat, terminal):
        """
        Params
        ------
        syn_cat  : The syntactic category
        terminal : The terminal word.
        """
        self.syn_cat = syn_cat
        self.terminal = terminal

    def __str__(self):
        return '%s/%s' % (self.terminal, self.syn_cat)

    def __repr__(self):
        return self.__str__()


class CantCombine(Exception):
    pass


class ShiftReducePPDA_Rules(object):

    '''
    This class is a hack right now: always combines things in the item_stage
    from left to right.
    TODO: Make this an abstract class and derive the hack from it.
    '''

    def __init__(self):
        exec ''
        for e in 'DT N NP JJ V VP S'.split():
            locals()[e] = e
        self.combinations = {(DT, N): NP,
                             (JJ, N): NP,
                             (DT, NP): NP,
                             (V, NP): VP,
                             (NP, VP): S}
        sentence = 'The_DT small_JJ woman_N lifted_V the_DT fat_JJ man_N'.split()
        self.terminals = [e.split('_')[0] for e in sentence]
        self.nonterminals = [e.split('_')[1] for e in sentence]

    def combine(self, item_stage):
        assert len(item_stage) > 1
        if len(item_stage) == 2:
            syn_cat_0 = item_stage[0].syn_cat
            terminal_0 = item_stage[0].terminal
            syn_cat_1 = item_stage[1].syn_cat
            terminal_1 = item_stage[1].terminal
            try:
                syn_cat = self.combinations[(syn_cat_0, syn_cat_1)]
            except KeyError:
                raise CantCombine
            return StackElem(syn_cat, ' '.join([terminal_0, terminal_1]))
        else:
            return self.combine([item_stage[0], self.combine(item_stage[1:])])

    def conditional_generate_item(self, top_item):
        idx = (0
               if top_item is None
               else self.terminals.index(
                   top_item.terminal.split()[-1]) + 1)
        return StackElem(syn_cat=self.nonterminals[idx],
                         terminal=self.terminals[idx])

    def can_combine(self, item_stage):
        try:
            self.combine(item_stage)
        except CantCombine:
            return False
        return True


class ShiftReducePPDA(object):

    def __init__(self, ppda_rules=None):
        self.rules = (ShiftReducePPDA_Rules()
                      if ppda_rules is None
                      else ppda_rules)
        self.stack = collections.deque()

    def generate_sentence(self):
        self.stack.clear()
        while True:
            self.shift()
            self.reduce()
            if self.stop_shift():
                break
        return list(self.stack)

    def shift(self):
        self.stack.append(self.generate_stack_item())

    def generate_stack_item(self):
        # Stochastically generate a new stack item conditioned on top 1
        # element of the stack
        try:
            top_item = self.stack[-1]
        except IndexError:
            top_item = None
        return self.rules.conditional_generate_item(top_item)

    def reduce(self):
        while True:
            if self.can_reduce():
                item_stage = []
                item_stage.insert(0, self.stack.pop())
                item_stage.insert(0, self.stack.pop())
                self.stack.append(self.rules.combine(item_stage))
            else:
                break

    def stop_shift(self):
        return self.stack[-1].syn_cat == 'S'

    def can_reduce(self):
        if len(self.stack) < 2:
            return False
        return self.rules.can_combine(
            [self.stack[-2], self.stack[-1]])

    def generate(self, num_sentence=1):
        for _ in range(num_sentence):
            print self.generate_sentence()


class TopDownPPDA(object):
    pass


class TestShiftReducePPDA(unittest.TestCase):

    def test_can_reduce(self):
        obj = ShiftReducePPDA()
        obj.shift()
        obj.shift()
        obj.shift()
        self.assertTrue(obj.can_reduce())

    def test_generate_sentence(self):
        obj = ShiftReducePPDA()
        print obj.generate_sentence()

if __name__ == '__main__':
    unittest.main()
	'''
	\| Filename : shift-reduce-ppda.py
	\| Description : A shift Reduce PPDA generator
	\| Author : Pushpendre Rastogi
	\| Created : Tue Sep 8 20:42:22 2015 (-0400)
	\| Last-Updated: Tue Sep 8 23:09:28 2015 (-0400)
	\| By: Pushpendre Rastogi
	\| Update #: 33
	The paper "Relating Probabilistic Grammar and Automata" compare PCFGs and PPDAs.
	They also explain how a PPDA produces a sentence. This class implements a PPDA
	on a simple grammar.

	The paper's first main contribution was to show that using the Chelba-Jelinek
	method it was not possible to encode a specific PCFG as a shift-reduce PPDA.
	Assuming that the context was bounded.

	Then they showed a way to get around this problem by creating a Top-Down PPDA
	that could be arbitrarily inter-converted to PCFG (and any arbitrary PCFG could
	be converted to Top Down PPDA).

	Finally they showed that by enriching the context in a shift reduce PPDA with
	a "richer stack and state alphabets" they could encode any arbitrary PCFG as a
	left to right Shift Reduce PPDA but unfortunately that involved exponential
	blowup in the context required.
	'''
	import collections
	import unittest


	class StackElem(object):
	__slots__ = ['syn_cat', 'terminal']

	def __init__(self, syn_cat, terminal):
	"""
	Params
	------
	syn_cat : The syntactic category
	terminal : The terminal word.
	"""
	self.syn_cat = syn_cat
	self.terminal = terminal

	def __str__(self):
	return '%s/%s' % (self.terminal, self.syn_cat)

	def __repr__(self):
	return self.__str__()


	class CantCombine(Exception):
	pass


	class ShiftReducePPDA_Rules(object):

	'''
	This class is a hack right now: always combines things in the item_stage
	from left to right.
	TODO: Make this an abstract class and derive the hack from it.
	'''

	def __init__(self):
	exec ''
	for e in 'DT N NP JJ V VP S'.split():
	locals()[e] = e
	self.combinations = {(DT, N): NP,
	(JJ, N): NP,
	(DT, NP): NP,
	(V, NP): VP,
	(NP, VP): S}
	sentence = 'The_DT small_JJ woman_N lifted_V the_DT fat_JJ man_N'.split()
	self.terminals = [e.split('_')[0] for e in sentence]
	self.nonterminals = [e.split('_')[1] for e in sentence]

	def combine(self, item_stage):
	assert len(item_stage) > 1
	if len(item_stage) == 2:
	syn_cat_0 = item_stage[0].syn_cat
	terminal_0 = item_stage[0].terminal
	syn_cat_1 = item_stage[1].syn_cat
	terminal_1 = item_stage[1].terminal
	try:
	syn_cat = self.combinations[(syn_cat_0, syn_cat_1)]
	except KeyError:
	raise CantCombine
	return StackElem(syn_cat, ' '.join([terminal_0, terminal_1]))
	else:
	return self.combine([item_stage[0], self.combine(item_stage[1:])])

	def conditional_generate_item(self, top_item):
	idx = (0
	if top_item is None
	else self.terminals.index(
	top_item.terminal.split()[-1]) + 1)
	return StackElem(syn_cat=self.nonterminals[idx],
	terminal=self.terminals[idx])

	def can_combine(self, item_stage):
	try:
	self.combine(item_stage)
	except CantCombine:
	return False
	return True


	class ShiftReducePPDA(object):

	def __init__(self, ppda_rules=None):
	self.rules = (ShiftReducePPDA_Rules()
	if ppda_rules is None
	else ppda_rules)
	self.stack = collections.deque()

	def generate_sentence(self):
	self.stack.clear()
	while True:
	self.shift()
	self.reduce()
	if self.stop_shift():
	break
	return list(self.stack)

	def shift(self):
	self.stack.append(self.generate_stack_item())

	def generate_stack_item(self):
	# Stochastically generate a new stack item conditioned on top 1
	# element of the stack
	try:
	top_item = self.stack[-1]
	except IndexError:
	top_item = None
	return self.rules.conditional_generate_item(top_item)

	def reduce(self):
	while True:
	if self.can_reduce():
	item_stage = []
	item_stage.insert(0, self.stack.pop())
	item_stage.insert(0, self.stack.pop())
	self.stack.append(self.rules.combine(item_stage))
	else:
	break

	def stop_shift(self):
	return self.stack[-1].syn_cat == 'S'

	def can_reduce(self):
	if len(self.stack) < 2:
	return False
	return self.rules.can_combine(
	[self.stack[-2], self.stack[-1]])

	def generate(self, num_sentence=1):
	for _ in range(num_sentence):
	print self.generate_sentence()


	class TopDownPPDA(object):
	pass


	class TestShiftReducePPDA(unittest.TestCase):

	def test_can_reduce(self):
	obj = ShiftReducePPDA()
	obj.shift()
	obj.shift()
	obj.shift()
	self.assertTrue(obj.can_reduce())

	def test_generate_sentence(self):
	obj = ShiftReducePPDA()
	print obj.generate_sentence()

	if __name__ == '__main__':
	unittest.main()