Skip to content

Instantly share code, notes, and snippets.

@ehaliewicz
Created May 24, 2012 03:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ehaliewicz/2779206 to your computer and use it in GitHub Desktop.
Save ehaliewicz/2779206 to your computer and use it in GitHub Desktop.
Python FSM parser
# process() kind of parses Tadoku entries
# Character Classes
# 0. Space
# 1. Hash
# 2. Semicolon
# 3. Number
# 4. Other
# states
# 0 Start (eats spaces until (number -> 1))
# 1 Number (takes spaces until (#media -> 2))
# 2 Media (takes spaces until (#times -> 3) (; -> 0))
# 3 Times (takes spaces until (; - > 0))
# 4 After semicolon (takes everything but hashes and numbers) until (number -> 1)
# [medium-dec/times-read/number-of/comment, start-pos, end-pos]
string = ""
position = 0
state = 0
cur_collection = ["", 0, 0]
collected = [ ["medium", 0, 0] ] #type, start-pos, end-pos
def current_char():
global string, position
return string[position];
def next_char():
global string, position
return string[position+1] if position < len(string) else False;
def hash_p():
global string, position
return True if string.startswith("#", position) else False;
def input_class(c):
if c == ';':
return 2
elif c == '#':
return 1
elif c == ' ' or c == '\t' or c == '\n' or c == '\r':
return 0
elif ('0' <= c <= '9'):
return 3
else:
return 4
def skip():
global position
position = position+1
def error():
global state, string, position
print('Error, skipping misplaced character: {} in state: {}'.format(string[position], state))
skip()
def declaration_p(type_container, prelude_func=True, prelude_lng=0):
global string, position
if prelude_func:
xpos = string.find(" ", position)
if (xpos == -1):
xpos = len(string)
if string[position+prelude_lng:xpos] in type_container:
return xpos
return False
def times_p():
return declaration_p(times, hash_p(), 1)
def media_p():
return declaration_p(media, hash_p(), 1)
def read_num():
global string, position, cur_collection, collected
endpos = string.find(" ", position)
if endpos == -1:
endpos = len(string)
substring = string[position:endpos]
if substring.isdigit():
cur_collection = ["number", position, endpos]
collected.append(cur_collection)
cur_collection = ["", 0, 0]
position = endpos
else:
print("Skipping malformed number {}".format(substring))
position = endpos
return
def read_media():
global string, position, cur_collection, collected
endpos = media_p()
if endpos > -1:
cur_collection = ["media", position, endpos]
collected.append(cur_collection)
position = endpos
else:
print(endpos)
print("Error, malformed media declaration in {}".format(string[position:]))
next_sem = string.find(";", position)
if next_sem:
position = next_sem
else:
position = string.find(" ", position)
return
def read_times():
global string, position, cur_collection, collected
endpos = times_p()
if endpos:
cur_collection = ["times", position, endpos]
collected.append(cur_collection)
position=endpos
else:
print("Error, malformed times declaration")
position = string.find(" ", position)
return
times = ["first", "second", "third", "fourth", "fifth"]
media = [ 'book', 'dr', 'manga', 'fullgame', 'game', 'lyric', 'subs', 'news', 'nico', 'sentences']
states = ["Start", "Number", "Media", "Times", "Semicolon", "End"]
# Space Hash Semicolon Number Other
sm = [ [skip, 0, error, 0, error, 0, read_num, 1, error, 0], # Start
[skip, 1, read_media, 2, error, 1, error, 1, error, 1], # Number
[skip, 2, read_times, 3, skip, 0, error, 2, error, 2], # Media
[skip, 3, error, 3, skip, 0, error, 3, error, 3], # Times
[skip, 4, skip, 4, skip, 4, read_num, 1, skip, 4], # After semicolon
]
def process(incoming):
global position, string, collected, state
string = incoming
position = 0
state = 0
collected = []
while position < len(incoming):
char_class = input_class(current_char())
statefunc = sm[state][2*char_class]
print("Character: {}, Class: {}, State: {}, State Function: {}".format(current_char(), char_class, state, statefunc))
statefunc()
state = sm[state][1+2*char_class]
if (state > 3): break;
return collected
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment