-
-
Save edwinyzh/e0baf5300ec184a9d095b09966b73815 to your computer and use it in GitHub Desktop.
AhoCorasick Implementation in Delphi / Pascal
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
unit AhoCorasick; | |
interface | |
uses Classes, Generics.Collections; | |
type | |
ptrNode = ^TNode; | |
TNode = record | |
id: Cardinal; // Node Id for debugging | |
alpha: AnsiChar; // Char at Node | |
isFinal: Boolean; // 0: No; 1: Yes, it is a final node | |
nextNodes: TList; // Transition Nodes | |
failureNode: ptrNode; // Failure Node | |
depth: Cardinal; // Depth of Node | |
result: AnsiString; // Keyword at Node | |
end; | |
type TAhoCorasick = class | |
private | |
root: TNode; | |
public | |
constructor Create; | |
destructor Destroy; override; | |
procedure AddKeyword(keyword: AnsiString); | |
procedure SetFailureNodes; | |
procedure SearchKeywords(content: AnsiString); | |
procedure PrintTrie; | |
end; | |
implementation | |
uses Windows, System.SysUtils; | |
procedure TAhoCorasick.AddKeyword(keyword: AnsiString); | |
var | |
ch: AnsiChar; | |
currentNode, newNode, nextNode: ptrNode; | |
begin | |
currentNode := @root; | |
// Iterate over each character in the keyword | |
for ch in keyword do | |
begin | |
newNode := nil; | |
// Check if the char series already exists in the node chain | |
for nextNode in currentNode^.nextNodes do | |
begin | |
if ch = nextNode.alpha then | |
begin | |
newNode := nextNode; | |
break; | |
end; | |
end; | |
// If char doesn't exist, add it as a new node | |
if newNode = nil then | |
begin | |
New(newNode); | |
newNode^.alpha := ch; | |
newNode^.depth := currentNode^.depth + 1; | |
newNode^.nextNodes := TList.Create; | |
newNode^.failureNode := nil; | |
newNode^.isFinal := False; | |
currentNode^.nextNodes.Add(newNode); | |
end; | |
// Set current node to next | |
currentNode := newNode; | |
end; | |
// Done adding keyword. Set the last node of keyword as final. | |
currentNode^.isFinal := True; | |
// Add the keyword to node | |
currentNode^.result := keyword; | |
end; | |
procedure TAhoCorasick.SetFailureNodes; | |
var | |
Queue: TQueue<ptrNode>; | |
parentNode, currentNode, stateNode, tmpNode: ptrNode; | |
matchFound: Boolean; | |
begin | |
Queue := TQueue<ptrNode>.Create(); | |
// Set failureNode of all node with depth = 1 to root | |
// Also add them to queue | |
for tmpNode in root.nextNodes do | |
begin | |
tmpNode^.failureNode := @root; | |
Queue.Enqueue(tmpNode); | |
end; | |
while Queue.Count > 0 do | |
begin | |
parentNode := Queue.Dequeue(); | |
for currentNode in parentNode^.nextNodes do | |
begin | |
// Enqueue node to be proecessed | |
Queue.Enqueue(currentNode); | |
// Calculate failure function for node | |
// from failure function of parent node | |
stateNode := parentNode^.failureNode; | |
matchFound := False; | |
// Iterate to top until a match for goto function is found | |
while (matchFound = False) do | |
begin | |
// Check if char exist in stateNode transitions | |
for tmpNode in stateNode^.nextNodes do | |
begin | |
if tmpNode^.alpha = currentNode^.alpha then | |
begin | |
matchFound := True; | |
break; | |
end; | |
end; | |
if stateNode = @root then | |
begin | |
break; | |
end; | |
// Move to depth - 1 | |
if not matchFound then | |
begin | |
stateNode := stateNode^.failureNode; | |
end; | |
end; // end while | |
// Matching state found in trie | |
if matchFound then | |
begin | |
currentNode^.failureNode := tmpNode; | |
end | |
else | |
begin | |
currentNode^.failureNode := @root; | |
end; | |
end; | |
end; | |
Queue.Free; | |
end; | |
function FindNextNode(currentNode: ptrNode; alpha: AnsiChar): ptrNode; | |
var | |
tmpNode: ptrNode; | |
begin | |
Result := nil; | |
// Search for node matching the char | |
for tmpNode in currentNode^.nextNodes do | |
begin | |
// If found, return; | |
if alpha = tmpNode^.alpha then | |
begin | |
Result := tmpNode; | |
break; | |
end; | |
end; | |
end; | |
procedure TAhoCorasick.SearchKeywords(content: AnsiString); | |
var | |
c: AnsiChar; | |
currentNode, nextNode: ptrNode; | |
position: Cardinal; | |
begin | |
currentNode := @root; | |
position := 1; | |
// Iterate over the complete text | |
while position <= Length(content) do | |
begin | |
c := content[position]; | |
nextNode := FindNextNode(currentNode, c); | |
// If not found, set current node to failure node | |
if nextNode = nil then | |
begin | |
// Increment only if node is root | |
if currentNode = @root then | |
begin | |
Inc(position); | |
end | |
else | |
begin | |
currentNode := currentNode^.failureNode; | |
end; | |
end | |
else | |
begin | |
currentNode := nextNode; | |
Inc(position); | |
end; | |
if currentNode^.isFinal then | |
begin | |
OutputDebugStringA(PAnsiChar(currentNode^.result)); | |
// Also add the output of its failure node, if it is final | |
if currentNode^.failureNode^.isFinal then | |
begin | |
currentNode := currentNode^.failureNode; | |
OutputDebugStringA(PAnsiChar(currentNode^.result)); | |
end; | |
end; | |
end; | |
end; | |
procedure TAhoCorasick.PrintTrie; | |
var | |
Queue: TQueue<ptrNode>; | |
parentNode, currentNode: ptrNode; | |
dbgInfo: AnsiString; | |
begin | |
Queue := TQueue<ptrNode>.Create(); | |
Queue.Enqueue(@root); | |
while Queue.Count > 0 do | |
begin | |
parentNode := Queue.Dequeue(); | |
for currentNode in parentNode^.nextNodes do | |
begin | |
// Enqueue next nodes | |
Queue.Enqueue(currentNode); | |
dbgInfo := currentNode^.alpha + ', ' + IntToStr(currentNode^.depth) + ' -> ' + | |
currentNode^.failureNode^.alpha + ', ' + IntToStr(currentNode^.failureNode^.depth); | |
OutputDebugStringA(PAnsiChar(dbgInfo)); | |
end; | |
end; | |
end; | |
constructor TAhoCorasick.Create; | |
begin | |
// Set root node | |
root.id := 0; | |
root.alpha := #0; | |
root.failureNode := @root; | |
root.nextNodes := TList.Create; | |
root.depth := 0; | |
root.isFinal := False; | |
end; | |
destructor TAhoCorasick.Destroy; | |
var | |
Stack: TStack<ptrNode>; | |
parentNode, currentNode: ptrNode; | |
begin | |
Stack := TStack<ptrNode>.Create(); | |
Stack.Push(@root); | |
while Stack.Count > 0 do | |
begin | |
parentNode := Stack.Pop; | |
for currentNode in parentNode^.nextNodes do | |
begin | |
Stack.Push(currentNode); | |
end; | |
if Assigned(parentNode) and (parentNode <> @root) then | |
begin | |
parentNode^.nextNodes.Free; | |
Dispose(parentNode); | |
end; | |
end; | |
root.nextNodes.Free; | |
Stack.Free; | |
inherited Destroy; | |
end; | |
end. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment