huydx/simple_regex.go

## simple_regex.go
package main

import (
	"fmt"
)

func match(regex string, text string) bool {
	if string(regex[0]) == "^" {
		return matchhere(regex[1:], text)
	}
	for {
		if matchhere(regex, text) {
			return true
		} else {
			if end(text) {
				return false
			}
			text = text[1:]
		}
	}
	return false
}

func matchhere(regex string, text string) bool {
	fmt.Printf("function %s called params %s %s\n", "matchhere", regex, text)
	if end(regex) {
		return false
	}
	if string(regex[0]) == "$" && len(regex) == 1 {
		return len(text) == 0
	}

	if len(regex) > 2 && string(regex[1]) == "*" {
		return matchstar(string(regex[0]), regex[2:], text)
	}

	if len(text) > 0 && (string(regex[0]) == "." || regex[0] == text[0]) {
		return matchhere(regex[1:], text[1:])
	}
	return false
}

func end(text string) bool {
	return len(text) == 0
}

func matchstar(c string, regex string, text string) bool {
	fmt.Printf("function %s called params %s %s %s\n", "matchstar", c, regex, text)
	for {
		fmt.Printf("  loop inside matchstar %s\n", text)
		if len(text) > 0 && string(text[0]) == c {
			text = text[1:]
			continue
		}
		return matchhere(regex, text)
	}
}

func main() {
	fmt.Println(match("^fo*$", "foo"))
	fmt.Println(match("^fo*bar$", "foo"))
	fmt.Println(match("^fo*bar$", "foooooooooooobar"))
}

////////////////////////
Some note about parsing technique
Reference link:
- http://math.hws.edu/javanotes/c9/s5.html
- http://perl.plover.com/Regex/article.html
- http://matt.might.net/articles/parsing-regex-with-recursive-descent/
- https://swtch.com/~rsc/regexp/regexp1.html
- http://matt.might.net/articles/nonblocking-lexing-toolkit-based-on-regex-derivatives/
- http://genius.cat-v.org/brian-kernighan/articles/beautiful

lexer : recognize token
parser : from token to build structure tree

3 types of regex:
- basic regular expression (BRE)
- extended regular expression (ERE)
- Perl compatible regular expression (PCRE)

regex symbol has math equivalent meaning

http://web.cse.ohio-state.edu/software/2231/web-sw2/extras/slides/27.Recursive-Descent-Parsing.pdf
a recursive descent parser for regex. NOT all grammar suit for recursive descent
- idea is to construct procedure for each kind of grammar
- peek(), next(), eat(item)
- LL(k) type : determistic by examining next k tokens

Context free grammar
- production rules describe "all" possible strings
- production rules == simple replacement

EBNF, ABNF: http://matt.might.net/articles/grammars-bnf-ebnf/

- NFA:
     - special kind of finite machine
     - include of:
          - set of symbols
          - set of states
          - next-state function
          - subset of states which accept state
          - initial state s0
     - NFA represent: table or graph
     - epsilon transition: transition state on an empty string??

- DFA:
     - NO epsilon transition

- Problem: convert regex string to a datastructure to easy manipulate and pattern matching
     - convert regex to NFA
          - using thompson algorithm
          - it's like arithmetic evaluation

YACC
%{
     header
}
%union
%token
%type

%%
rules
%%

user setting

http://loveruby.net/ja/rhg/book/yacc.html
http://qiita.com/k0kubun/items/1b641dfd186fe46feb65
	package main

	import (
	"fmt"
	)

	func match(regex string, text string) bool {
	if string(regex[0]) == "^" {
	return matchhere(regex[1:], text)
	}
	for {
	if matchhere(regex, text) {
	return true
	} else {
	if end(text) {
	return false
	}
	text = text[1:]
	}
	}
	return false
	}

	func matchhere(regex string, text string) bool {
	fmt.Printf("function %s called params %s %s\n", "matchhere", regex, text)
	if end(regex) {
	return false
	}
	if string(regex[0]) == "$" && len(regex) == 1 {
	return len(text) == 0
	}

	if len(regex) > 2 && string(regex[1]) == "*" {
	return matchstar(string(regex[0]), regex[2:], text)
	}

	if len(text) > 0 && (string(regex[0]) == "." \|\| regex[0] == text[0]) {
	return matchhere(regex[1:], text[1:])
	}
	return false
	}

	func end(text string) bool {
	return len(text) == 0
	}

	func matchstar(c string, regex string, text string) bool {
	fmt.Printf("function %s called params %s %s %s\n", "matchstar", c, regex, text)
	for {
	fmt.Printf(" loop inside matchstar %s\n", text)
	if len(text) > 0 && string(text[0]) == c {
	text = text[1:]
	continue
	}
	return matchhere(regex, text)
	}
	}

	func main() {
	fmt.Println(match("^fo*$", "foo"))
	fmt.Println(match("^fo*bar$", "foo"))
	fmt.Println(match("^fo*bar$", "foooooooooooobar"))
	}

	////////////////////////
	Some note about parsing technique
	Reference link:
	- http://math.hws.edu/javanotes/c9/s5.html
	- http://perl.plover.com/Regex/article.html
	- http://matt.might.net/articles/parsing-regex-with-recursive-descent/
	- https://swtch.com/~rsc/regexp/regexp1.html
	- http://matt.might.net/articles/nonblocking-lexing-toolkit-based-on-regex-derivatives/
	- http://genius.cat-v.org/brian-kernighan/articles/beautiful

	lexer : recognize token
	parser : from token to build structure tree

	3 types of regex:
	- basic regular expression (BRE)
	- extended regular expression (ERE)
	- Perl compatible regular expression (PCRE)

	regex symbol has math equivalent meaning

	http://web.cse.ohio-state.edu/software/2231/web-sw2/extras/slides/27.Recursive-Descent-Parsing.pdf
	a recursive descent parser for regex. NOT all grammar suit for recursive descent
	- idea is to construct procedure for each kind of grammar
	- peek(), next(), eat(item)
	- LL(k) type : determistic by examining next k tokens

	Context free grammar
	- production rules describe "all" possible strings
	- production rules == simple replacement

	EBNF, ABNF: http://matt.might.net/articles/grammars-bnf-ebnf/

	- NFA:
	- special kind of finite machine
	- include of:
	- set of symbols
	- set of states
	- next-state function
	- subset of states which accept state
	- initial state s0
	- NFA represent: table or graph
	- epsilon transition: transition state on an empty string??

	- DFA:
	- NO epsilon transition

	- Problem: convert regex string to a datastructure to easy manipulate and pattern matching
	- convert regex to NFA
	- using thompson algorithm
	- it's like arithmetic evaluation

	YACC
	%{
	header
	}
	%union
	%token
	%type

	%%
	rules
	%%

	user setting

	http://loveruby.net/ja/rhg/book/yacc.html
	http://qiita.com/k0kubun/items/1b641dfd186fe46feb65