-
-
Save xudshen/8331554 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/python | |
from sys import argv | |
###### define corresponding function ###### | |
NFA = {} | |
closure = {} | |
EmptyMark = "!@#" | |
AllLetter = '$%^' | |
###### ini state ###### | |
startState = 1 | |
endState = 0 | |
def iniState(curState): | |
if(not (curState in NFA)): | |
NFA[curState] = {} | |
closure[curState] = [curState] | |
###### add edge on NFA | |
def addEdge(curState, char): | |
iniState(curState) | |
NFA[curState][char] = [curState + 1] | |
return curState + 1 | |
def addEdge_star(curState): | |
for i in range(0, 5): | |
iniState(curState + i) | |
NFA[curState][EmptyMark] = [curState + 1,curState + 5] | |
NFA[curState + 1][EmptyMark] = [curState + 2] | |
NFA[curState + 2][AllLetter] = [curState + 3] | |
NFA[curState + 3][EmptyMark] = [curState + 4] | |
NFA[curState + 4][EmptyMark] = [curState + 1,curState + 5] | |
return curState + 5 | |
def addEdge_quesMark(curState): | |
iniState(curState) | |
NFA[curState][EmptyMark] = [curState + 1] | |
NFA[curState][AllLetter] = [curState + 1] | |
return curState + 1 | |
####### constructor ####### | |
constructor = { | |
'*': addEdge_star, | |
'?': addEdge_quesMark | |
} | |
def constructNFA_char(curState, char): | |
if(not (char in constructor)): | |
return addEdge(curState, char) | |
else: | |
return constructor[char](curState) | |
def constructNFA(pattern): | |
if(len(pattern) == 0): | |
return | |
print(pattern) | |
curState = startState | |
pre = pattern[0] | |
curState = constructNFA_char(curState, pattern[0]) | |
print(NFA) | |
## more ... | |
for i in range(1, len(pattern)): | |
if(pre == pattern[i] and (pre in constructor)): | |
pre = pattern[i] | |
continue | |
else: | |
curState = constructNFA_char(curState, pattern[i]) | |
pre = pattern[i] | |
print(NFA) | |
## set end state | |
global endState | |
endState = curState | |
iniState(endState) | |
####### closure ####### | |
def getCharSet(pattern): | |
charSet = list(set(pattern)) | |
for i in range(len(charSet)): | |
if(charSet[i] == '*' or charSet[i] == '?'): | |
charSet[i] = AllLetter | |
charSet.append(EmptyMark) | |
return list(set(charSet)) | |
def generateClosure(charSet): | |
global closure | |
print(charSet) | |
for (k, v) in closure.items(): | |
if(EmptyMark in NFA[k]): | |
closure[k] = list(set(closure[k]) | set(NFA[k][EmptyMark])) | |
print(closure) | |
isChange = True | |
while(isChange): | |
isChange = False | |
for (k, v) in closure.items(): | |
newSet = set(closure[k]) | |
for n in v: | |
newSet = newSet | set(closure[n]) | |
if(set(closure[k]) != newSet): | |
isChange = True | |
closure[k] = list(newSet) | |
print(closure) | |
###### match the string | |
def match(string): | |
if(len(string) == 0): | |
return False | |
global closure | |
global endState | |
curSet = closure[startState] | |
for i in range(0, len(string)): | |
newSet = set([]) | |
for state in curSet: | |
if(string[i] in NFA[state]): | |
for n in NFA[state][string[i]]: | |
newSet = newSet | set(closure[n]) | |
elif(AllLetter in NFA[state]): | |
for n in NFA[state][AllLetter]: | |
newSet = newSet | set(closure[n]) | |
curSet = list(newSet) | |
print(curSet) | |
return endState in curSet | |
####### optimize ####### | |
def simplifyPattern(pattern): | |
if(len(pattern) == 0): | |
return None | |
pre = pattern[0] | |
ret = [pre] | |
for i in range(1, len(pattern)): | |
if((pre == pattern[i] and (pre in constructor)) or (pre == '*' and pattern[i] == '?')): | |
continue | |
elif(pre == '?' and pattern[i] == '*'): | |
ret[len(ret) - 1] = pattern[i] | |
pre = pattern[i] | |
else: | |
ret.append(pattern[i]) | |
pre = pattern[i] | |
return ret | |
if __name__ == '__main__': | |
pattern = simplifyPattern('a**********b') | |
constructNFA(pattern) | |
print(endState) | |
generateClosure(getCharSet(pattern)) | |
if match('acbb'): | |
print('yes') | |
else: | |
print('no') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment