hghwng/fsa.py

## fsa.py
#!/bin/env python3
g_dbg = False
g_rule = {}


def print_state(state, text):
    print('\t[', end='')
    for i in state['pre']:
        print('{} -> '.format(i), end='')
    print('{}] '.format(state['tag']), end='')

    for i in range(state['index'], len(text)):
        print(text[i] + ' ', end='')
    print('')


def search(text, rules):
    global g_dbg

    text = text.split(' ')
    states = []

    if g_dbg:
        states.append({'index': 0, 'tag': 'start', 'pre': []})
    else:
        states.append({'index': 0, 'tag': 'start'})

    while len(states) > 0:
        state = states.pop()

        # test whether the destination state is found
        if state['tag'] == 'end':
            if state['index'] == len(text):
                # only succeeds on full match
                return True
            else:
                continue

        if state['index'] < 0 and state['index'] >= len(text):
            continue

        # output debug message on demand
        if g_dbg:
            print_state(state, text)

        # get transitions of current state
        if state['tag'] not in rules:
            continue
        else:
            rule = rules[state['tag']]

        to_match = text[state['index']]
        for transition in rule:
            new_state = {'index': -1, 'tag': transition[1]}
            if g_dbg:
                new_state['pre'] = list(state['pre'])
                new_state['pre'].append(state['tag'])

            for candedate in transition[0]:
                if candedate == '':
                    # epsilon transition
                    new_state['index'] = state['index']
                elif candedate == to_match:
                    new_state['index'] = state['index'] + 1

                if new_state['index'] != -1 and new_state not in states:
                    states.insert(0, new_state)

    return False


def build_rule():
    one_plus = ['one', 'two', 'three', 'four', 'five',
                'six', 'seven', 'eight', 'nine']
    two_plus = ['two', 'three', 'four', 'five',
                'six', 'seven', 'eight', 'nine']
    ten_plus = ['eleven', 'twelve', 'thirteen', 'fourteen', 'fifteen',
                'sixteen', 'seventeen', 'eighteen', 'nineteen']
    xx_ty = ['ten', 'twenty', 'thirty', 'forty', 'fifty',
             'sixty', 'seventy', 'eighty', 'ninety']

    no_suffix = []
    no_suffix.extend(one_plus)
    no_suffix.extend(ten_plus)
    has_suffix = xx_ty

    '''
        abbr:
            c       : cent
            d       : dollar

            sg      : singular
            sg?     : singular or plural
            pl      : plural

            hs      : has suffix
            ns      : no suffix

            stg     : stage

        number less than 100:
            $no_suffix | ($has_suffix? $one_plus)
    '''

    rules = {}

    rules['start'] = [
        [[''],         'd stg0'],      # one thousand ...
        [[''],         'd stg1'],      # five hundred ...
        [[''],         'd stg2 sg?'],  # fifty one / one
        [[''],         'c'],           # fifty one / one
    ]

    #
    # stage 0: number for thousand
    #

    rules['d stg0'] = [
        [no_suffix,    'd stg0 ns'],   # eleven thousand ...
        [has_suffix,   'd stg0 hs'],   # fifty thousand ...
    ]

    rules['d stg0 hs'] = [
        [[''],         'd stg0 ns'],   # fifty thousand ...
        [one_plus,     'd stg0 ns'],   # fifty one thousand ...
    ]

    rules['d stg0 ns'] = [
        [['thousand'], 'd stg2 pl'],   # one thousand              eighty ...
        [['thousand'], 'd stg1'],      # one thousand five hundred ...
    ]

    #
    # stage 1: number for hundred
    #

    rules['d stg1'] = [
        [one_plus,     'd stg1 ns'],   # one hundred ...
    ]

    rules['d stg1 ns'] = [
        [['hundred'],  'd stg2 pl'],   # one hundred ...
    ]

    #
    # stage 2 (plural): number less than 100 (for confirmed plural)
    #

    rules['d stg2 pl'] = [
        [[''],         'd pl'],        # one hundred
        [no_suffix,    'd pl'],        # one hundred eleven
        [has_suffix,   'd stg2 hs'],   # one hundred fifty ...
    ]

    rules['d stg2 hs'] = [
        [[''],         'd pl'],        # one hundred fifty
        [one_plus,     'd pl'],        # one hundred fifty one
    ]

    #
    # stage 2 (plural or singural): number less than 100 (detect form)
    #

    rules['d stg2 sg?'] = [
        [['one'],      'd sg'],        # one
        [two_plus,     'd pl'],        # five
        [ten_plus,     'd pl'],        # eleven
        [has_suffix,   'd stg2 hs'],   # fifty ...
    ]

    #
    # accept 'doller'
    #

    rules['d pl'] = [
        [['dollars'],  'end'],
        [['dollars'],  'c'],
    ]

    rules['d sg'] = [
        [['dollar'],   'end'],
        [['dollar'],   'c']
    ]

    #
    # recognize conts
    #

    rules['c'] = [
        [[''],         'end'],         # probably no cent included
        [['one'],      'c sg'],
        [two_plus,     'c pl'],
        [ten_plus,     'c pl'],
        [has_suffix,   'c hs'],
    ]

    rules['c hs'] = [
        [[''],         'c pl'],
        [one_plus,     'c pl'],
    ]

    rules['c pl'] = [
        [['cents'],    'end'],
    ]

    rules['c sg'] = [
        [['cent'],     'end'],
    ]

    return rules


def test_one(answer, text):
    global g_rule

    print('Testing "{}": '.format(text), end='')
    if search(text, g_rule) == answer:
        print('pass')
    else:
        print('fail. {} expected'.format(answer))


def test_all():
    # 11, 12, ...
    test_one(True,  'eleven dollars')
    test_one(True,  'nineteen dollars')

    # 20, 30, ...
    test_one(True,  'fifty dollars')
    test_one(True,  'seventy dollars')
    test_one(False, 'fifty nineteen dollars')
    test_one(False, 'nineteen fifty dollars')
    test_one(True,  'fifty six dollars')

    # 100 - 999
    test_one(True,  'one hundred dollars')
    test_one(True,  'two hundred six dollars')
    test_one(True,  'nine hundred ninety six dollars')

    # 1,000 - 100,000
    test_one(True,  'twenty thousand sixty six dollars')
    test_one(True,  'sixty one thousand five hundred forty six dollars')
    test_one(False, 'thousand five hundred forty six dollars one cent')

    # With 'cent'
    test_one(True,  'one thousand five hundred forty six dollars '
                    'twenty one cents')
    test_one(True,  'one thousand five hundred forty six dollars one cent')
    test_one(False, 'one dollars one cent')
    test_one(False, 'one dollar one cents')
    test_one(False, 'one dollar one thousand cents')

    # Plural vs singular
    test_one(True,  'one dollar')
    test_one(False, 'one dollars')
    test_one(True,  'six cents')
    test_one(False, 'six cent')


def main():
    global g_rule
    g_rule = build_rule()

    global g_dbg

    if False:
        g_dbg = True
        test_one(False, 'one dollar one cents')
    else:
        test_all()

if __name__ == '__main__':
    main()
	#!/bin/env python3
	g_dbg = False
	g_rule = {}


	def print_state(state, text):
	print('\t[', end='')
	for i in state['pre']:
	print('{} -> '.format(i), end='')
	print('{}] '.format(state['tag']), end='')

	for i in range(state['index'], len(text)):
	print(text[i] + ' ', end='')
	print('')


	def search(text, rules):
	global g_dbg

	text = text.split(' ')
	states = []

	if g_dbg:
	states.append({'index': 0, 'tag': 'start', 'pre': []})
	else:
	states.append({'index': 0, 'tag': 'start'})

	while len(states) > 0:
	state = states.pop()

	# test whether the destination state is found
	if state['tag'] == 'end':
	if state['index'] == len(text):
	# only succeeds on full match
	return True
	else:
	continue

	if state['index'] < 0 and state['index'] >= len(text):
	continue

	# output debug message on demand
	if g_dbg:
	print_state(state, text)

	# get transitions of current state
	if state['tag'] not in rules:
	continue
	else:
	rule = rules[state['tag']]

	to_match = text[state['index']]
	for transition in rule:
	new_state = {'index': -1, 'tag': transition[1]}
	if g_dbg:
	new_state['pre'] = list(state['pre'])
	new_state['pre'].append(state['tag'])

	for candedate in transition[0]:
	if candedate == '':
	# epsilon transition
	new_state['index'] = state['index']
	elif candedate == to_match:
	new_state['index'] = state['index'] + 1

	if new_state['index'] != -1 and new_state not in states:
	states.insert(0, new_state)

	return False


	def build_rule():
	one_plus = ['one', 'two', 'three', 'four', 'five',
	'six', 'seven', 'eight', 'nine']
	two_plus = ['two', 'three', 'four', 'five',
	'six', 'seven', 'eight', 'nine']
	ten_plus = ['eleven', 'twelve', 'thirteen', 'fourteen', 'fifteen',
	'sixteen', 'seventeen', 'eighteen', 'nineteen']
	xx_ty = ['ten', 'twenty', 'thirty', 'forty', 'fifty',
	'sixty', 'seventy', 'eighty', 'ninety']

	no_suffix = []
	no_suffix.extend(one_plus)
	no_suffix.extend(ten_plus)
	has_suffix = xx_ty

	'''
	abbr:
	c : cent
	d : dollar

	sg : singular
	sg? : singular or plural
	pl : plural

	hs : has suffix
	ns : no suffix

	stg : stage

	number less than 100:
	$no_suffix \| ($has_suffix? $one_plus)
	'''

	rules = {}

	rules['start'] = [
	[[''], 'd stg0'], # one thousand ...
	[[''], 'd stg1'], # five hundred ...
	[[''], 'd stg2 sg?'], # fifty one / one
	[[''], 'c'], # fifty one / one
	]

	#
	# stage 0: number for thousand
	#

	rules['d stg0'] = [
	[no_suffix, 'd stg0 ns'], # eleven thousand ...
	[has_suffix, 'd stg0 hs'], # fifty thousand ...
	]

	rules['d stg0 hs'] = [
	[[''], 'd stg0 ns'], # fifty thousand ...
	[one_plus, 'd stg0 ns'], # fifty one thousand ...
	]

	rules['d stg0 ns'] = [
	[['thousand'], 'd stg2 pl'], # one thousand eighty ...
	[['thousand'], 'd stg1'], # one thousand five hundred ...
	]

	#
	# stage 1: number for hundred
	#

	rules['d stg1'] = [
	[one_plus, 'd stg1 ns'], # one hundred ...
	]

	rules['d stg1 ns'] = [
	[['hundred'], 'd stg2 pl'], # one hundred ...
	]

	#
	# stage 2 (plural): number less than 100 (for confirmed plural)
	#

	rules['d stg2 pl'] = [
	[[''], 'd pl'], # one hundred
	[no_suffix, 'd pl'], # one hundred eleven
	[has_suffix, 'd stg2 hs'], # one hundred fifty ...
	]

	rules['d stg2 hs'] = [
	[[''], 'd pl'], # one hundred fifty
	[one_plus, 'd pl'], # one hundred fifty one
	]

	#
	# stage 2 (plural or singural): number less than 100 (detect form)
	#

	rules['d stg2 sg?'] = [
	[['one'], 'd sg'], # one
	[two_plus, 'd pl'], # five
	[ten_plus, 'd pl'], # eleven
	[has_suffix, 'd stg2 hs'], # fifty ...
	]

	#
	# accept 'doller'
	#

	rules['d pl'] = [
	[['dollars'], 'end'],
	[['dollars'], 'c'],
	]

	rules['d sg'] = [
	[['dollar'], 'end'],
	[['dollar'], 'c']
	]

	#
	# recognize conts
	#

	rules['c'] = [
	[[''], 'end'], # probably no cent included
	[['one'], 'c sg'],
	[two_plus, 'c pl'],
	[ten_plus, 'c pl'],
	[has_suffix, 'c hs'],
	]

	rules['c hs'] = [
	[[''], 'c pl'],
	[one_plus, 'c pl'],
	]

	rules['c pl'] = [
	[['cents'], 'end'],
	]

	rules['c sg'] = [
	[['cent'], 'end'],
	]

	return rules


	def test_one(answer, text):
	global g_rule

	print('Testing "{}": '.format(text), end='')
	if search(text, g_rule) == answer:
	print('pass')
	else:
	print('fail. {} expected'.format(answer))


	def test_all():
	# 11, 12, ...
	test_one(True, 'eleven dollars')
	test_one(True, 'nineteen dollars')

	# 20, 30, ...
	test_one(True, 'fifty dollars')
	test_one(True, 'seventy dollars')
	test_one(False, 'fifty nineteen dollars')
	test_one(False, 'nineteen fifty dollars')
	test_one(True, 'fifty six dollars')

	# 100 - 999
	test_one(True, 'one hundred dollars')
	test_one(True, 'two hundred six dollars')
	test_one(True, 'nine hundred ninety six dollars')

	# 1,000 - 100,000
	test_one(True, 'twenty thousand sixty six dollars')
	test_one(True, 'sixty one thousand five hundred forty six dollars')
	test_one(False, 'thousand five hundred forty six dollars one cent')

	# With 'cent'
	test_one(True, 'one thousand five hundred forty six dollars '
	'twenty one cents')
	test_one(True, 'one thousand five hundred forty six dollars one cent')
	test_one(False, 'one dollars one cent')
	test_one(False, 'one dollar one cents')
	test_one(False, 'one dollar one thousand cents')

	# Plural vs singular
	test_one(True, 'one dollar')
	test_one(False, 'one dollars')
	test_one(True, 'six cents')
	test_one(False, 'six cent')


	def main():
	global g_rule
	g_rule = build_rule()

	global g_dbg

	if False:
	g_dbg = True
	test_one(False, 'one dollar one cents')
	else:
	test_all()

	if __name__ == '__main__':
	main()