Created
April 26, 2019 19:12
-
-
Save packrat386/506b1c0cfa1c431e2ffa4ddb5ef76885 to your computer and use it in GitHub Desktop.
Regex parser because I was bored on a plane
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package re | |
import "fmt" | |
type builderEdge struct { | |
from int64 | |
to int64 | |
on rune | |
} | |
type builderState struct { | |
edges []builderEdge | |
stack *builderStack | |
operable *builderPair | |
current int64 | |
count int64 | |
begin int64 | |
end int64 | |
} | |
func (s *builderState) addEdge(from, to int64, on rune) { | |
// fmt.Printf("ADDING %d -> %d : %s\n", from, to, string(on)) | |
s.edges = append(s.edges, builderEdge{from, to, on}) | |
} | |
func (s *builderState) nextState() int64 { | |
s.count++ | |
return s.count | |
} | |
func (s *builderState) incomingEdges(st int64) []int64 { | |
val := make([]int64, 0) | |
for _, v := range s.edges { | |
if v.to == st { | |
val = append(val, v.from) | |
} | |
} | |
return val | |
} | |
func (s *builderState) outgoingEdges(st int64) []int64 { | |
val := make([]int64, 0) | |
for _, v := range s.edges { | |
if v.from == st { | |
val = append(val, v.from) | |
} | |
} | |
return val | |
} | |
func (s *builderState) start() { | |
s.stack.push(&builderPair{s.nextState(), s.nextState()}) | |
s.current = s.stack.top().begin | |
s.begin = s.current | |
s.end = s.stack.top().end | |
} | |
func (s *builderState) finish() { | |
s.addEdge(s.current, s.stack.top().end, '?') | |
s.stack.pop() | |
} | |
func (s *builderState) character(r rune) { | |
next := s.nextState() | |
s.operable = &builderPair{s.current, next} | |
s.addEdge(s.current, next, r) | |
s.current = next | |
} | |
func (s *builderState) openParen() { | |
s.operable = nil | |
s.stack.push(&builderPair{s.current, s.nextState()}) | |
} | |
func (s *builderState) pipe() { | |
s.operable = nil | |
s.addEdge(s.current, s.stack.top().end, '?') | |
s.current = s.stack.top().begin | |
} | |
func (s *builderState) closeParen() { | |
s.addEdge(s.current, s.stack.top().end, '?') | |
s.operable = &builderPair{s.stack.top().begin, s.stack.top().end} | |
s.current = s.stack.top().end | |
s.stack.pop() | |
} | |
func (s *builderState) star() { | |
next := s.nextState() | |
s.addEdge(s.current, s.operable.begin, '?') | |
s.addEdge(s.operable.begin, next, '?') | |
s.operable = nil | |
s.current = next | |
} | |
func (b *builderState) debug() { | |
return | |
fmt.Println("begin : ", b.begin) | |
fmt.Println("end : ", b.end) | |
for _, e := range b.edges { | |
fmt.Printf("%d -> %d : %s\n", e.from, e.to, string(e.on)) | |
} | |
} | |
func (b *builderState) mergeable() (int64, int64, bool) { | |
for _, e := range b.edges { | |
if e.on == '?' { | |
// fmt.Printf("EMPTY TRANSITION %d -> %d\n", e.from, e.to) | |
return e.from, e.to, true | |
} | |
} | |
return 0, 0, false | |
} | |
func contain(es []builderEdge, e builderEdge) bool { | |
for _, oe := range es { | |
if oe == e { | |
return true | |
} | |
} | |
return false | |
} | |
func (b *builderState) merge(l, r int64) { | |
newEdges := make([]builderEdge, 0) | |
for _, e := range b.edges { | |
if e.to == r { | |
e.to = l | |
} | |
if e.from == r { | |
e.from = l | |
} | |
if e.from == e.to && e.on == '?' { | |
// fmt.Printf("RM SELF EMPTY %d -> %d\n", e.from, e.to) | |
continue | |
} | |
newEdges = append(newEdges, e) | |
} | |
b.edges = newEdges | |
if b.begin == r { | |
b.begin = l | |
} | |
if b.end == r { | |
b.end = l | |
} | |
} | |
func (b *builderState) condense() { | |
// fmt.Println("CONDENSE") | |
for { | |
if l, r, ok := b.mergeable(); ok { | |
// fmt.Printf("MERGING %d with %d\n", l, r) | |
b.merge(l, r) | |
b.debug() | |
} else { | |
break | |
} | |
} | |
// fmt.Println("DONE") | |
} | |
func builderFromString(input string) *builderState { | |
bs := &builderState{ | |
edges: make([]builderEdge, 0), | |
stack: new(builderStack), | |
} | |
bs.start() | |
for _, c := range input { | |
switch c { | |
case '(': | |
bs.openParen() | |
case ')': | |
bs.closeParen() | |
case '|': | |
bs.pipe() | |
case '*': | |
bs.star() | |
default: | |
bs.character(c) | |
} | |
// fmt.Println("CURR: ", bs.current) | |
} | |
bs.finish() | |
return bs | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package re | |
import ( | |
"testing" | |
) | |
func TestSomething(t *testing.T) { | |
e := Compile("a(b|c)*d") | |
if !e.Match("abbcd") { | |
t.Error("Should match abbcd") | |
} | |
if e.Match("ab") { | |
t.Error("Should not match ab") | |
} | |
e = Compile("a(b(b))(bb)*a") | |
if !e.Match("abba") { | |
t.Error("Should match abba") | |
} | |
if !e.Match("abbbba") { | |
t.Error("Should match abbbba") | |
} | |
e = Compile("a(b|)c") | |
if !e.Match("abc") { | |
t.Error("Should match abc") | |
} | |
if !e.Match("ac") { | |
t.Error("Should match ac") | |
} | |
e = Compile("(ab)*(a|b)b") | |
if !e.Match("ababa") { | |
t.Error("should match aaaba") | |
} | |
if e.Match("aa") { | |
t.Error("should not match aa") | |
} | |
e = Compile("a(b|)c") | |
if !e.Match("abc") { | |
t.Error("should match abc") | |
} | |
if !e.Match("abc") { | |
t.Error("should match ac") | |
} | |
} | |
func TestShit(t *testing.T) { | |
e := Compile("(a|)(a|)(a|)(a|)(a|)(a|)(a|)(a|)(a|)(a|)(a|)(a|)(a|)(a|)(a|)(a|)(a|)(a|)(a|)(a|)(a|)(a|)(a|)(a|)(a|)(a|)(a|)(a|)(a|)(a|)aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") | |
if !e.Match("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") { | |
t.Error("sadface") | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package re | |
type Expr struct { | |
edges []builderEdge | |
current []int64 | |
begin int64 | |
end int64 | |
} | |
func Compile(re string) *Expr { | |
b := builderFromString(re) | |
b.condense() | |
b.debug() | |
e := &Expr{ | |
edges: make([]builderEdge, len(b.edges)), | |
begin: b.begin, | |
end: b.end, | |
} | |
copy(e.edges, b.edges) | |
return e | |
} | |
func (e *Expr) Match(input string) bool { | |
e.current = []int64{e.begin} | |
for _, r := range input { | |
// fmt.Println("STATE: ", e.current) | |
e.advance(r) | |
if e.done() { | |
return true | |
} | |
} | |
return false | |
} | |
func (e *Expr) done() bool { | |
return containsint64(e.current, e.end) | |
} | |
func (e *Expr) advance(r rune) { | |
next := make([]int64, 0) | |
for _, ed := range e.edges { | |
// fmt.Printf("CHECKING %d -> %d : %s\n", ed.from, ed.to, string(ed.on)) | |
if containsint64(e.current, ed.from) && (ed.on == r || ed.on == '?') { | |
next = append(next, ed.to) | |
} | |
} | |
e.current = next | |
} | |
func containsint64(s []int64, i int64) bool { | |
for _, v := range s { | |
if v == i { | |
return true | |
} | |
} | |
return false | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package re | |
type builderStack struct { | |
curr *builderStackNode | |
} | |
type builderPair struct { | |
begin int64 | |
end int64 | |
} | |
type builderStackNode struct { | |
val *builderPair | |
prev *builderStackNode | |
} | |
func (s *builderStack) push(p *builderPair) { | |
s.curr = &builderStackNode{ | |
val: p, | |
prev: s.curr, | |
} | |
} | |
func (s *builderStack) top() *builderPair { | |
return s.curr.val | |
} | |
func (s *builderStack) pop() { | |
s.curr = s.curr.prev | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment