Last active
February 8, 2022 04:23
-
-
Save RexSkz/56dce332787503d23e99945918cd49f7 to your computer and use it in GitHub Desktop.
A simple code to show how NFA RegExp engine works.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"fmt" | |
) | |
func match(s string, rules []string, sIndex, ruleIndex int) bool { | |
if ruleIndex == len(rules) { | |
fmt.Println("match: success") | |
return true | |
} | |
if sIndex == len(s) { | |
return false | |
} | |
fmt.Printf("match: %s (using rule #%d: %s)\n", s, ruleIndex, rules[ruleIndex]) | |
for i := 0; i < sIndex+7; i++ { | |
fmt.Print(" ") | |
} | |
fmt.Println("^") | |
switch rules[ruleIndex] { | |
case ".*": | |
// character `.` matches anything | |
if true { | |
// make the greedy choice first, use this rule to match the next position | |
if match(s, rules, sIndex+1, ruleIndex) { | |
return true | |
} | |
fmt.Println("match: backtrack") | |
// ignore this rule, use the next rule to match this position | |
if match(s, rules, sIndex, ruleIndex+1) { | |
return true | |
} | |
} | |
case ".*?": | |
// character `.` matches anything | |
if true { | |
// try not to match character first because this is not greedy | |
if match(s, rules, sIndex, ruleIndex+1) { | |
return true | |
} | |
fmt.Println("match: backtrack") | |
// have to make the greedy choice | |
if match(s, rules, sIndex+1, ruleIndex) { | |
return true | |
} | |
} | |
return false | |
case "=": | |
if s[sIndex] == '=' { | |
if match(s, rules, sIndex+1, ruleIndex+1) { | |
return true | |
} | |
} | |
case ";": | |
if s[sIndex] == ';' { | |
if match(s, rules, sIndex+1, ruleIndex+1) { | |
return true | |
} | |
} | |
} | |
fmt.Println("match: backtrack") | |
return false | |
} | |
func main() { | |
rules := []string{".*", ".*", "=", ".*?", ";"} | |
var matched bool | |
a := "aa=bb;" | |
lenA := len(a) | |
matched = false | |
for index := 0; index < lenA; index++ { | |
fmt.Printf("main: start matching the string \"%s\" at position %d\n", a, index) | |
if match(a, rules, index, 0) { | |
fmt.Println("main: matched") | |
matched = true | |
break | |
} | |
} | |
if !matched { | |
fmt.Println("main: not matched") | |
} | |
b := "aa=bbb" | |
lenB := len(b) | |
matched = false | |
for index := 0; index < lenB; index++ { | |
fmt.Printf("main: start matching the string \"%s\" at position %d\n", b, index) | |
if match(b, rules, index, 0) { | |
fmt.Println("main: matched") | |
matched = true | |
break | |
} | |
} | |
if !matched { | |
fmt.Println("main: not matched") | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ go run match.go | |
main: start matching the string "aa=bb;" at position 0 | |
match: aa=bb; (using rule #0: .*) | |
^ | |
match: aa=bb; (using rule #0: .*) | |
^ | |
match: aa=bb; (using rule #0: .*) | |
^ | |
match: aa=bb; (using rule #0: .*) | |
^ | |
match: aa=bb; (using rule #0: .*) | |
^ | |
match: aa=bb; (using rule #0: .*) | |
^ | |
match: backtrack | |
match: aa=bb; (using rule #1: .*) | |
^ | |
match: backtrack | |
match: aa=bb; (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bb; (using rule #1: .*) | |
^ | |
match: aa=bb; (using rule #1: .*) | |
^ | |
match: backtrack | |
match: aa=bb; (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bb; (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bb; (using rule #1: .*) | |
^ | |
match: aa=bb; (using rule #1: .*) | |
^ | |
match: aa=bb; (using rule #1: .*) | |
^ | |
match: backtrack | |
match: aa=bb; (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bb; (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bb; (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bb; (using rule #1: .*) | |
^ | |
match: aa=bb; (using rule #1: .*) | |
^ | |
match: aa=bb; (using rule #1: .*) | |
^ | |
match: aa=bb; (using rule #1: .*) | |
^ | |
match: backtrack | |
match: aa=bb; (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bb; (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bb; (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bb; (using rule #2: =) | |
^ | |
match: aa=bb; (using rule #3: .*?) | |
^ | |
match: aa=bb; (using rule #4: ;) | |
^ | |
match: backtrack | |
match: backtrack | |
match: aa=bb; (using rule #3: .*?) | |
^ | |
match: aa=bb; (using rule #4: ;) | |
^ | |
match: backtrack | |
match: backtrack | |
match: aa=bb; (using rule #3: .*?) | |
^ | |
match: aa=bb; (using rule #4: ;) | |
^ | |
match: success | |
main: matched | |
main: start matching the string "aa=bbb" at position 0 | |
match: aa=bbb (using rule #0: .*) | |
^ | |
match: aa=bbb (using rule #0: .*) | |
^ | |
match: aa=bbb (using rule #0: .*) | |
^ | |
match: aa=bbb (using rule #0: .*) | |
^ | |
match: aa=bbb (using rule #0: .*) | |
^ | |
match: aa=bbb (using rule #0: .*) | |
^ | |
match: backtrack | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: aa=bbb (using rule #3: .*?) | |
^ | |
match: aa=bbb (using rule #4: ;) | |
^ | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #3: .*?) | |
^ | |
match: aa=bbb (using rule #4: ;) | |
^ | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #3: .*?) | |
^ | |
match: aa=bbb (using rule #4: ;) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: aa=bbb (using rule #3: .*?) | |
^ | |
match: aa=bbb (using rule #4: ;) | |
^ | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #3: .*?) | |
^ | |
match: aa=bbb (using rule #4: ;) | |
^ | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #3: .*?) | |
^ | |
match: aa=bbb (using rule #4: ;) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: aa=bbb (using rule #3: .*?) | |
^ | |
match: aa=bbb (using rule #4: ;) | |
^ | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #3: .*?) | |
^ | |
match: aa=bbb (using rule #4: ;) | |
^ | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #3: .*?) | |
^ | |
match: aa=bbb (using rule #4: ;) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
main: start matching the string "aa=bbb" at position 1 | |
match: aa=bbb (using rule #0: .*) | |
^ | |
match: aa=bbb (using rule #0: .*) | |
^ | |
match: aa=bbb (using rule #0: .*) | |
^ | |
match: aa=bbb (using rule #0: .*) | |
^ | |
match: aa=bbb (using rule #0: .*) | |
^ | |
match: backtrack | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: aa=bbb (using rule #3: .*?) | |
^ | |
match: aa=bbb (using rule #4: ;) | |
^ | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #3: .*?) | |
^ | |
match: aa=bbb (using rule #4: ;) | |
^ | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #3: .*?) | |
^ | |
match: aa=bbb (using rule #4: ;) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: aa=bbb (using rule #3: .*?) | |
^ | |
match: aa=bbb (using rule #4: ;) | |
^ | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #3: .*?) | |
^ | |
match: aa=bbb (using rule #4: ;) | |
^ | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #3: .*?) | |
^ | |
match: aa=bbb (using rule #4: ;) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
main: start matching the string "aa=bbb" at position 2 | |
match: aa=bbb (using rule #0: .*) | |
^ | |
match: aa=bbb (using rule #0: .*) | |
^ | |
match: aa=bbb (using rule #0: .*) | |
^ | |
match: aa=bbb (using rule #0: .*) | |
^ | |
match: backtrack | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: aa=bbb (using rule #3: .*?) | |
^ | |
match: aa=bbb (using rule #4: ;) | |
^ | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #3: .*?) | |
^ | |
match: aa=bbb (using rule #4: ;) | |
^ | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #3: .*?) | |
^ | |
match: aa=bbb (using rule #4: ;) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
main: start matching the string "aa=bbb" at position 3 | |
match: aa=bbb (using rule #0: .*) | |
^ | |
match: aa=bbb (using rule #0: .*) | |
^ | |
match: aa=bbb (using rule #0: .*) | |
^ | |
match: backtrack | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
main: start matching the string "aa=bbb" at position 4 | |
match: aa=bbb (using rule #0: .*) | |
^ | |
match: aa=bbb (using rule #0: .*) | |
^ | |
match: backtrack | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
main: start matching the string "aa=bbb" at position 5 | |
match: aa=bbb (using rule #0: .*) | |
^ | |
match: backtrack | |
match: aa=bbb (using rule #1: .*) | |
^ | |
match: backtrack | |
match: aa=bbb (using rule #2: =) | |
^ | |
match: backtrack | |
match: backtrack | |
match: backtrack | |
main: not matched |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment