Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save bioshazard/30939024e639857d73d1fa5da5a69b2b to your computer and use it in GitHub Desktop.
Save bioshazard/30939024e639857d73d1fa5da5a69b2b to your computer and use it in GitHub Desktop.
occuranceCount = { }
# TY http://algo.pw/algo/64/python <3
class AhoNode:
def __init__(self):
self.goto = {}
self.out = []
self.fail = None
def aho_create_forest(patterns):
root = AhoNode()
for path in patterns:
node = root
for symbol in path:
node = node.goto.setdefault(symbol, AhoNode())
node.out.append(path)
return root
def aho_create_statemachine(patterns):
root = aho_create_forest(patterns)
queue = []
for node in root.goto.itervalues():
queue.append(node)
node.fail = root
while len(queue) > 0:
rnode = queue.pop(0)
for key, unode in rnode.goto.iteritems():
queue.append(unode)
fnode = rnode.fail
while fnode != None and not fnode.goto.has_key(key):
fnode = fnode.fail
unode.fail = fnode.goto[key] if fnode else root
unode.out += unode.fail.out
return root
def aho_find_all(s, root, callback):
node = root
for i in xrange(len(s)):
while node != None and not node.goto.has_key(s[i]):
node = node.fail
if node == None:
node = root
continue
node = node.goto[s[i]]
for pattern in node.out:
callback(i - len(pattern) + 1, pattern)
############################
# Demonstration of work
def on_occurence(pos, pattern):
global occuranceCount
global haystack
if pattern not in occuranceCount:
occuranceCount[pattern] = 0
occuranceCount[pattern] += 1
debugOutput = "%d, At pos %s found pattern: %s\n" % (occuranceCount[pattern], pos, pattern)
sys.stderr.write(haystack)
sys.stderr.write(debugOutput)
import sys
# Prep a list of strings to match
if len(sys.argv) != 2:
sys.stderr.write("Requires 1 arg: file with patterns")
sys.exit(1)
patternSource = sys.argv[1]
needles = []
for line in open(patternSource).readlines():
needles.append(line[:-1]) # Drop the \n off the end
root = aho_create_statemachine(needles)
linecnt = 0
haystack = ''
for haystack in sys.stdin:
linecnt += 1
aho_find_all(haystack, root, on_occurence)
sys.stderr.write("Line %d\n" % linecnt)
for key, val in occuranceCount.items():
print( "%s %d" % (key, val) )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment