Skip to content

Instantly share code, notes, and snippets.

@packrat386
Created April 26, 2019 19:12
Show Gist options
  • Save packrat386/506b1c0cfa1c431e2ffa4ddb5ef76885 to your computer and use it in GitHub Desktop.
Save packrat386/506b1c0cfa1c431e2ffa4ddb5ef76885 to your computer and use it in GitHub Desktop.
Regex parser because I was bored on a plane
package re
import "fmt"
type builderEdge struct {
from int64
to int64
on rune
}
type builderState struct {
edges []builderEdge
stack *builderStack
operable *builderPair
current int64
count int64
begin int64
end int64
}
func (s *builderState) addEdge(from, to int64, on rune) {
// fmt.Printf("ADDING %d -> %d : %s\n", from, to, string(on))
s.edges = append(s.edges, builderEdge{from, to, on})
}
func (s *builderState) nextState() int64 {
s.count++
return s.count
}
func (s *builderState) incomingEdges(st int64) []int64 {
val := make([]int64, 0)
for _, v := range s.edges {
if v.to == st {
val = append(val, v.from)
}
}
return val
}
func (s *builderState) outgoingEdges(st int64) []int64 {
val := make([]int64, 0)
for _, v := range s.edges {
if v.from == st {
val = append(val, v.from)
}
}
return val
}
func (s *builderState) start() {
s.stack.push(&builderPair{s.nextState(), s.nextState()})
s.current = s.stack.top().begin
s.begin = s.current
s.end = s.stack.top().end
}
func (s *builderState) finish() {
s.addEdge(s.current, s.stack.top().end, '?')
s.stack.pop()
}
func (s *builderState) character(r rune) {
next := s.nextState()
s.operable = &builderPair{s.current, next}
s.addEdge(s.current, next, r)
s.current = next
}
func (s *builderState) openParen() {
s.operable = nil
s.stack.push(&builderPair{s.current, s.nextState()})
}
func (s *builderState) pipe() {
s.operable = nil
s.addEdge(s.current, s.stack.top().end, '?')
s.current = s.stack.top().begin
}
func (s *builderState) closeParen() {
s.addEdge(s.current, s.stack.top().end, '?')
s.operable = &builderPair{s.stack.top().begin, s.stack.top().end}
s.current = s.stack.top().end
s.stack.pop()
}
func (s *builderState) star() {
next := s.nextState()
s.addEdge(s.current, s.operable.begin, '?')
s.addEdge(s.operable.begin, next, '?')
s.operable = nil
s.current = next
}
func (b *builderState) debug() {
return
fmt.Println("begin : ", b.begin)
fmt.Println("end : ", b.end)
for _, e := range b.edges {
fmt.Printf("%d -> %d : %s\n", e.from, e.to, string(e.on))
}
}
func (b *builderState) mergeable() (int64, int64, bool) {
for _, e := range b.edges {
if e.on == '?' {
// fmt.Printf("EMPTY TRANSITION %d -> %d\n", e.from, e.to)
return e.from, e.to, true
}
}
return 0, 0, false
}
func contain(es []builderEdge, e builderEdge) bool {
for _, oe := range es {
if oe == e {
return true
}
}
return false
}
func (b *builderState) merge(l, r int64) {
newEdges := make([]builderEdge, 0)
for _, e := range b.edges {
if e.to == r {
e.to = l
}
if e.from == r {
e.from = l
}
if e.from == e.to && e.on == '?' {
// fmt.Printf("RM SELF EMPTY %d -> %d\n", e.from, e.to)
continue
}
newEdges = append(newEdges, e)
}
b.edges = newEdges
if b.begin == r {
b.begin = l
}
if b.end == r {
b.end = l
}
}
func (b *builderState) condense() {
// fmt.Println("CONDENSE")
for {
if l, r, ok := b.mergeable(); ok {
// fmt.Printf("MERGING %d with %d\n", l, r)
b.merge(l, r)
b.debug()
} else {
break
}
}
// fmt.Println("DONE")
}
func builderFromString(input string) *builderState {
bs := &builderState{
edges: make([]builderEdge, 0),
stack: new(builderStack),
}
bs.start()
for _, c := range input {
switch c {
case '(':
bs.openParen()
case ')':
bs.closeParen()
case '|':
bs.pipe()
case '*':
bs.star()
default:
bs.character(c)
}
// fmt.Println("CURR: ", bs.current)
}
bs.finish()
return bs
}
package re
import (
"testing"
)
func TestSomething(t *testing.T) {
e := Compile("a(b|c)*d")
if !e.Match("abbcd") {
t.Error("Should match abbcd")
}
if e.Match("ab") {
t.Error("Should not match ab")
}
e = Compile("a(b(b))(bb)*a")
if !e.Match("abba") {
t.Error("Should match abba")
}
if !e.Match("abbbba") {
t.Error("Should match abbbba")
}
e = Compile("a(b|)c")
if !e.Match("abc") {
t.Error("Should match abc")
}
if !e.Match("ac") {
t.Error("Should match ac")
}
e = Compile("(ab)*(a|b)b")
if !e.Match("ababa") {
t.Error("should match aaaba")
}
if e.Match("aa") {
t.Error("should not match aa")
}
e = Compile("a(b|)c")
if !e.Match("abc") {
t.Error("should match abc")
}
if !e.Match("abc") {
t.Error("should match ac")
}
}
func TestShit(t *testing.T) {
e := Compile("(a|)(a|)(a|)(a|)(a|)(a|)(a|)(a|)(a|)(a|)(a|)(a|)(a|)(a|)(a|)(a|)(a|)(a|)(a|)(a|)(a|)(a|)(a|)(a|)(a|)(a|)(a|)(a|)(a|)(a|)aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
if !e.Match("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") {
t.Error("sadface")
}
}
package re
type Expr struct {
edges []builderEdge
current []int64
begin int64
end int64
}
func Compile(re string) *Expr {
b := builderFromString(re)
b.condense()
b.debug()
e := &Expr{
edges: make([]builderEdge, len(b.edges)),
begin: b.begin,
end: b.end,
}
copy(e.edges, b.edges)
return e
}
func (e *Expr) Match(input string) bool {
e.current = []int64{e.begin}
for _, r := range input {
// fmt.Println("STATE: ", e.current)
e.advance(r)
if e.done() {
return true
}
}
return false
}
func (e *Expr) done() bool {
return containsint64(e.current, e.end)
}
func (e *Expr) advance(r rune) {
next := make([]int64, 0)
for _, ed := range e.edges {
// fmt.Printf("CHECKING %d -> %d : %s\n", ed.from, ed.to, string(ed.on))
if containsint64(e.current, ed.from) && (ed.on == r || ed.on == '?') {
next = append(next, ed.to)
}
}
e.current = next
}
func containsint64(s []int64, i int64) bool {
for _, v := range s {
if v == i {
return true
}
}
return false
}
package re
type builderStack struct {
curr *builderStackNode
}
type builderPair struct {
begin int64
end int64
}
type builderStackNode struct {
val *builderPair
prev *builderStackNode
}
func (s *builderStack) push(p *builderPair) {
s.curr = &builderStackNode{
val: p,
prev: s.curr,
}
}
func (s *builderStack) top() *builderPair {
return s.curr.val
}
func (s *builderStack) pop() {
s.curr = s.curr.prev
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment