Skip to content

Instantly share code, notes, and snippets.

@monyone
Created June 12, 2023 13:14
Show Gist options
  • Save monyone/f9d240701fc85c6cac904b04f5d29210 to your computer and use it in GitHub Desktop.
Save monyone/f9d240701fc85c6cac904b04f5d29210 to your computer and use it in GitHub Desktop.
Aho Corasick Implementation in TypeScript
class Trie {
private goto: Map<string, Trie> = new Map<string, Trie>();
public keywords: string[] = []
public failure: Trie | null = null;
public has(s: string) {
return this.goto.has(s);
}
public get(s: string) {
return this.goto.get(s);
}
public set(s: string, next: Trie) {
return this.goto.set(s, next);
}
public entries() {
return this.goto.entries();
}
public empty() {
return this.keywords.length === 0;
}
public add(k: string) {
this.keywords.push(k);
}
public join(t: Trie) {
this.keywords.push(... t.keywords);
}
}
export default class AhoTrie {
private root = new Trie();
constructor(keywords: string[]) {
// build goto
for (const keyword of keywords) {
let current: Trie = this.root;
for (const ch of keyword) {
let next: Trie = current.get(ch) ?? (new Trie())
current.set(ch, next);
current = next;
}
current.add(keyword);
}
// build failure
const queue: [Trie, Trie, string][] = [];
for (const [ch, next] of this.root.entries()) {
queue.push([next, this.root, ch]);
}
while (queue.length > 0) {
const [current, parent, ch] = queue.shift()!;
let failure = parent.failure;
while (failure != null && !failure.has(ch)) {
failure = failure.failure;
}
current.failure = failure?.get(ch) ?? this.root;
current.join(current.failure);
for (const [ch, next] of current.entries()) {
queue.push([next, current, ch]);
}
}
this.root.failure = this.root;
}
parseText(text: string): string[] {
const textLength = text.length;
const collected: string[] = [];
let state: Trie = this.root;
for (const ch of text) {
if (!state.empty()) {
collected.push(... state.keywords);
}
/*
while (!state.has(ch) && state !== this.root) {
state = state.failure ?? this.root;
}
state = state.get(ch) ?? this.root;
//*/
//*
let next = state.get(ch);
while (next == null) {
state = state.failure!;
next = state.get(ch) ?? (state !== this.root ? undefined : this.root);
}
state = next;
//*/
}
if (!state.empty()) {
collected.push(... state.keywords);
}
return collected;
}
}
class Trie {
private goto: Map<string, Trie> = new Map<string, Trie>();
public terminate: boolean = false;
public failure: Trie | null = null;
public has(s: string) {
return this.goto.has(s);
}
public get(s: string) {
return this.goto.get(s);
}
public set(s: string, next: Trie) {
return this.goto.set(s, next);
}
public entries() {
return this.goto.entries();
}
}
export default class NGTrie {
private root = new Trie();
constructor(ngwords: string[]) {
// build goto
for (const ngword of ngwords) {
let current: Trie = this.root;
for (const ch of ngword) {
let next: Trie = current.get(ch) ?? (new Trie())
current.set(ch, next);
current = next;
}
current.terminate = true;
}
// build failure
const queue: [Trie, Trie, string][] = [];
for (const [ch, next] of this.root.entries()) {
queue.push([next, this.root, ch]);
}
while (queue.length > 0) {
const [current, parent, ch] = queue.shift()!;
let failure = parent.failure;
while (failure != null && !failure.has(ch)) {
failure = failure.failure;
}
current.failure = failure?.get(ch) ?? this.root;
current.terminate ||= current.failure.terminate;
for (const [ch, next] of current.entries()) {
queue.push([next, current, ch]);
}
}
this.root.failure = this.root;
}
// validate text
validate(text: string): boolean {
const textLength = text.length;
const collected: string[] = [];
let state: Trie = this.root;
for (const ch of text) {
if (state.terminate) { return true; }
while (!state.has(ch) && state !== this.root) {
state = state.failure ?? this.root;
}
state = state.get(ch) ?? this.root;
}
return state.terminate;
}
}
@monyone
Copy link
Author

monyone commented May 31, 2024

RegExp を Trie っぽくしたが、普通の RegExp の 100 倍以上遅い

export const anyof = (keywords: string[], flags?: string) => {
  const root = new Trie();

  // build trie
  for (const keyword of keywords) {
    let current: Trie = root;
    for (const ch of keyword) {
      let next: Trie = current.get(ch) ?? (new Trie())
      current.set(ch, next);
      current = next;
    }
    current.add(keyword);
  }

  const make = (trie: Trie): string => {
    let result: string[] = [];

    let non_capture = true;
    for (const [ch, child] of trie.entries()) {
      const is_target = child.keywords.length > 0;
      const recursion = make(child);
      const is_optional = is_target && recursion !== ''
      non_capture &&= !is_optional;
      result.push(`${ch}${recursion}${is_optional ? '?' : ''}`);
    }
    const regexp = result.join('|')
    non_capture &&= result.length <= 1;
    return non_capture ? regexp : `(:?${regexp})`;
  }

  return new RegExp(make(root), flags);
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment