Maki-Daisuke/digists_test.go

## digists_test.go
package regexp

import "testing"

func TestCompile(t *testing.T) {
	// 正規表現パターンをコンパイルする
	re, err := Compile("^[0-9]+$")
	if err != nil {
		// コンパイルに失敗した場合はテスト失敗
		t.Errorf("Compile failed: %v", err)
	}

	// 文字列 "123456" が正規表現パターン "^[0-9]+$" にマッチすることを確認する
	matched := re.Match([]byte("123456"))
	if !matched {
		// マッチしない場合はテスト失敗
		t.Errorf("Match failed")
	}
}

## regexp.go
package regexp

import (
	"errors"
	"regexp/syntax"
)

// Regexp represents a compiled regular expression.
type Regexp struct {
	// NFA のステートの集合
	states []state

	// 入力文字列のインデックス
	inputIdx int

	// マッチした文字列
	matched string

	// マッチした部分文字列の位置
	matchedIdx int
}

// Compile compiles a regular expression pattern into a regular expression object.
func Compile(pattern string) (*Regexp, error) {
	// 正規表現パターンをパースする
	parsed, err := syntax.Parse(pattern, syntax.Perl)
	if err != nil {
		return nil, err
	}

	// NFA を構築する
	nfa, err := buildNFA(parsed)
	if err != nil {
		return nil, err
	}

	// NFA を DFA に変換する
	dfa := convertToDFA(nfa)

	// Regexp オブジェクトを作成して返す
	return &Regexp{
		states: dfa,
	}, nil
}

// Match matches the regular expression against the input string.
func (re *Regexp) Match(b []byte) bool {
	// 入力文字列のインデックスを初期化する
	re.inputIdx = 0

	// マッチした文字列を初期化する
	re.matched = ""

	// マッチした部分文字列の位置を初期化する
	re.matchedIdx = -1

	// DFA のステートの集合
	states := make([]int, len(re.states))
	for i := range states {
		states[i] = i
	}

	// DFA を実行し、マッチするかどうかを判定する
	return re.runDFA(b, states)
}

// Find finds the regular expression in the input string and returns the matched string.
func (re *Regexp) Find(b []byte) []byte {
	// 入力文字列のインデックスを初期化する
	re.inputIdx = 0

	// マッチした文字列を初期化する
	re.matched = ""

	// マッチした部分文字列の位置を初期化する
	re.matchedIdx = -1

	// DFA のステートの集合
	states := make([]int, len(re.states))
	for i := range states {
		states[i] = i
	}

	// DFA を実行し、マッチするかどうかを判定する
	if !re.runDFA(b, states) {
		// マッチしない場合は、空文字列を返す
		return []byte("")
	}

	// マッチした文字列を返す
	return []byte(re.matched)
}

// Split splits the input string by the regular expression and returns the splitted string array.
func (re *Regexp) Split(b []byte, limit int) [][]byte {
	// 入力文字列のインデックスを初期化する
	re.inputIdx = 0
	// マッチした文字列を初期化する
	re.matched = ""

	// マッチした部分文字列の位置を初期化する
	re.matchedIdx = -1

	// 分割された文字列の配列
	var result [][]byte

	// DFA のステートの集合
	states := make([]int, len(re.states))
	for i := range states {
		states[i] = i
	}

	// 入力文字列を分割する
	for re.inputIdx < len(b) {
		// DFA を実行し、マッチするかどうかを判定する
		if !re.runDFA(b, states) {
			// マッチしない場合は、残りの文字列を分割された文字列の配列に追加する
			result = append(result, b[re.inputIdx:])
			break
		}

		// 分割された文字列を分割された文字列の配列に追加する
		result = append(result, b[re.inputIdx:re.matchedIdx])

		// 入力文字列のインデックスを更新する
		re.inputIdx = re.matchedIdx + len(re.matched)

		// 分割された文字列の数が制限値に達した場合は、残りの文字列を分割された文字列の配列に追加する
		if limit > 0 && len(result) == limit-1 {
			result = append(result, b[re.inputIdx:])
			break
		}
	}
	return result
}

// runDFA runs the DFA and returns whether the input string matches the regular expression or not.
func (re *Regexp) runDFA(b []byte, states []int) bool {
	for {
		// 現在のステートの集合から、次に遷移できるステートの集合を求める
		nextStates := re.getNextStates(states, b[re.inputIdx])

		// 次に遷移できるステートがない場合は、終了する
		if len(nextStates) == 0 {
			return false
		}

		// 次に遷移できるステートから、マッチするかどうかを判定する
		if re.isMatchState(nextStates) {
			// マッチした場合は、マッチした文字列とマッチした部分文字列の位置を更新する
			re.matched = string(b[re.inputIdx : re.inputIdx+1])
			re.matchedIdx = re.inputIdx

			// 入力文字列のインデックスを更新する
			re.inputIdx++

			// マッチした場合は、true を返す
			return true
		}

		// 現在のステートの集合を更新する
		states = nextStates

		// 入力文字列のインデックスを更新する
		re.inputIdx++
	}

}

// getNextStates returns the next states which can be transitioned from the given states by the given byte.
func (re *Regexp) getNextStates(states []int, b byte) []int {
	// 次に遷移できるステートの集合
	var nextStates []int
	// 現在のステートの集合から、次に遷移できるステートの集合を求める
	for _, state := range states {
		if state < 0 || state >= len(re.states) {
			// 無効なステートの場合は、処理をスキップする
			continue
		}

		// 現在のステートが指定された文字に遷移可能かどうかを判定する
		if re.states[state].canTransit(b) {
			// 遷移可能な場合は、次に遷移できるステートの集合に追加する
			nextStates = append(nextStates, re.states[state].next)
		}
	}

	return nextStates

}

// isMatchState returns whether the given states include the match state or not.
func (re *Regexp) isMatchState(states []int) bool {
	// 現在のステートの集合から、マッチするかどうかを判定する
	for _, state := range states {
		if state < 0 || state >= len(re.states) {
			// 無効なステートの場合は、処理をスキップする
			continue
		}

		// 現在のステートがマッチするかどうかを判定する
		if re.states[state].match {
			// マッチする場合は、true を返す
			return true
		}
	}

	// マッチしない場合は、false を返す
	return false
}

// state represents a state of the NFA.
type state struct {
	// 次に遷移するステート
	next int
	// 指定された文字に遷移するかどうか
	transit byte

	// マッチするかどうか
	match bool
}

// canTransit returns whether the state can be transitioned by the given byte or not.
func (s *state) canTransit(b byte) bool {
	// 次に遷移するステートが未定義の場合は、遷移不可能
	if s.next < 0 {
		return false
	}

	// 指定された文字以外で遷移する場合は、遷移不可能
	if s.transit != b && s.transit != 0 {
		return false
	}
	// 指定された文字に遷移可能な場合は、true を返す
	return true

}

// buildNFA builds the NFA from the parsed regular expression.
func buildNFA(parsed *syntax.Regexp) ([]state, error) {
	// TODO: NFA を構築する

	return nil, errors.New("not implemented yet")

}

// convertToDFA converts the NFA to the DFA.
func convertToDFA(nfa []state) []state {
	// TODO: NFA を DFA に変換する

	return nil
}

// buildNFA builds the NFA from the parsed regular expression.
func buildNFA(parsed *syntax.Regexp) ([]state, error) {
	// 正規表現パターンから、AST を生成する
	ast, err := syntax.Parse(string(parsed), syntax.Perl)
	if err != nil {
		return nil, err
	}

	// AST のノードを走査し、NFA のステートを作成する
	var nfa []state
	if err := buildNFANode(ast, &nfa); err != nil {
		return nil, err
	}

	return nfa, nil
}

// buildNFANode creates the NFA states from the given AST node.
func buildNFANode(node *syntax.Regexp, nfa *[]state) error {
	// TODO: AST のノードから、NFA のステートを作成する

	return errors.New("not implemented yet")
}
	package regexp

	import "testing"

	func TestCompile(t *testing.T) {
	// 正規表現パターンをコンパイルする
	re, err := Compile("^[0-9]+$")
	if err != nil {
	// コンパイルに失敗した場合はテスト失敗
	t.Errorf("Compile failed: %v", err)
	}

	// 文字列 "123456" が正規表現パターン "^[0-9]+$" にマッチすることを確認する
	matched := re.Match([]byte("123456"))
	if !matched {
	// マッチしない場合はテスト失敗
	t.Errorf("Match failed")
	}
	}
	package regexp

	import (
	"errors"
	"regexp/syntax"
	)

	// Regexp represents a compiled regular expression.
	type Regexp struct {
	// NFA のステートの集合
	states []state

	// 入力文字列のインデックス
	inputIdx int

	// マッチした文字列
	matched string

	// マッチした部分文字列の位置
	matchedIdx int
	}

	// Compile compiles a regular expression pattern into a regular expression object.
	func Compile(pattern string) (*Regexp, error) {
	// 正規表現パターンをパースする
	parsed, err := syntax.Parse(pattern, syntax.Perl)
	if err != nil {
	return nil, err
	}

	// NFA を構築する
	nfa, err := buildNFA(parsed)
	if err != nil {
	return nil, err
	}

	// NFA を DFA に変換する
	dfa := convertToDFA(nfa)

	// Regexp オブジェクトを作成して返す
	return &Regexp{
	states: dfa,
	}, nil
	}

	// Match matches the regular expression against the input string.
	func (re *Regexp) Match(b []byte) bool {
	// 入力文字列のインデックスを初期化する
	re.inputIdx = 0

	// マッチした文字列を初期化する
	re.matched = ""

	// マッチした部分文字列の位置を初期化する
	re.matchedIdx = -1

	// DFA のステートの集合
	states := make([]int, len(re.states))
	for i := range states {
	states[i] = i
	}

	// DFA を実行し、マッチするかどうかを判定する
	return re.runDFA(b, states)
	}

	// Find finds the regular expression in the input string and returns the matched string.
	func (re *Regexp) Find(b []byte) []byte {
	// 入力文字列のインデックスを初期化する
	re.inputIdx = 0

	// マッチした文字列を初期化する
	re.matched = ""

	// マッチした部分文字列の位置を初期化する
	re.matchedIdx = -1

	// DFA のステートの集合
	states := make([]int, len(re.states))
	for i := range states {
	states[i] = i
	}

	// DFA を実行し、マッチするかどうかを判定する
	if !re.runDFA(b, states) {
	// マッチしない場合は、空文字列を返す
	return []byte("")
	}

	// マッチした文字列を返す
	return []byte(re.matched)
	}

	// Split splits the input string by the regular expression and returns the splitted string array.
	func (re *Regexp) Split(b []byte, limit int) [][]byte {
	// 入力文字列のインデックスを初期化する
	re.inputIdx = 0
	// マッチした文字列を初期化する
	re.matched = ""

	// マッチした部分文字列の位置を初期化する
	re.matchedIdx = -1

	// 分割された文字列の配列
	var result [][]byte

	// DFA のステートの集合
	states := make([]int, len(re.states))
	for i := range states {
	states[i] = i
	}

	// 入力文字列を分割する
	for re.inputIdx < len(b) {
	// DFA を実行し、マッチするかどうかを判定する
	if !re.runDFA(b, states) {
	// マッチしない場合は、残りの文字列を分割された文字列の配列に追加する
	result = append(result, b[re.inputIdx:])
	break
	}

	// 分割された文字列を分割された文字列の配列に追加する
	result = append(result, b[re.inputIdx:re.matchedIdx])

	// 入力文字列のインデックスを更新する
	re.inputIdx = re.matchedIdx + len(re.matched)

	// 分割された文字列の数が制限値に達した場合は、残りの文字列を分割された文字列の配列に追加する
	if limit > 0 && len(result) == limit-1 {
	result = append(result, b[re.inputIdx:])
	break
	}
	}
	return result
	}

	// runDFA runs the DFA and returns whether the input string matches the regular expression or not.
	func (re *Regexp) runDFA(b []byte, states []int) bool {
	for {
	// 現在のステートの集合から、次に遷移できるステートの集合を求める
	nextStates := re.getNextStates(states, b[re.inputIdx])

	// 次に遷移できるステートがない場合は、終了する
	if len(nextStates) == 0 {
	return false
	}

	// 次に遷移できるステートから、マッチするかどうかを判定する
	if re.isMatchState(nextStates) {
	// マッチした場合は、マッチした文字列とマッチした部分文字列の位置を更新する
	re.matched = string(b[re.inputIdx : re.inputIdx+1])
	re.matchedIdx = re.inputIdx

	// 入力文字列のインデックスを更新する
	re.inputIdx++

	// マッチした場合は、true を返す
	return true
	}

	// 現在のステートの集合を更新する
	states = nextStates

	// 入力文字列のインデックスを更新する
	re.inputIdx++
	}

	}

	// getNextStates returns the next states which can be transitioned from the given states by the given byte.
	func (re *Regexp) getNextStates(states []int, b byte) []int {
	// 次に遷移できるステートの集合
	var nextStates []int
	// 現在のステートの集合から、次に遷移できるステートの集合を求める
	for _, state := range states {
	if state < 0 \|\| state >= len(re.states) {
	// 無効なステートの場合は、処理をスキップする
	continue
	}

	// 現在のステートが指定された文字に遷移可能かどうかを判定する
	if re.states[state].canTransit(b) {
	// 遷移可能な場合は、次に遷移できるステートの集合に追加する
	nextStates = append(nextStates, re.states[state].next)
	}
	}

	return nextStates

	}

	// isMatchState returns whether the given states include the match state or not.
	func (re *Regexp) isMatchState(states []int) bool {
	// 現在のステートの集合から、マッチするかどうかを判定する
	for _, state := range states {
	if state < 0 \|\| state >= len(re.states) {
	// 無効なステートの場合は、処理をスキップする
	continue
	}

	// 現在のステートがマッチするかどうかを判定する
	if re.states[state].match {
	// マッチする場合は、true を返す
	return true
	}
	}

	// マッチしない場合は、false を返す
	return false
	}

	// state represents a state of the NFA.
	type state struct {
	// 次に遷移するステート
	next int
	// 指定された文字に遷移するかどうか
	transit byte

	// マッチするかどうか
	match bool
	}

	// canTransit returns whether the state can be transitioned by the given byte or not.
	func (s *state) canTransit(b byte) bool {
	// 次に遷移するステートが未定義の場合は、遷移不可能
	if s.next < 0 {
	return false
	}

	// 指定された文字以外で遷移する場合は、遷移不可能
	if s.transit != b && s.transit != 0 {
	return false
	}
	// 指定された文字に遷移可能な場合は、true を返す
	return true

	}

	// buildNFA builds the NFA from the parsed regular expression.
	func buildNFA(parsed *syntax.Regexp) ([]state, error) {
	// TODO: NFA を構築する

	return nil, errors.New("not implemented yet")

	}

	// convertToDFA converts the NFA to the DFA.
	func convertToDFA(nfa []state) []state {
	// TODO: NFA を DFA に変換する

	return nil
	}

	// buildNFA builds the NFA from the parsed regular expression.
	func buildNFA(parsed *syntax.Regexp) ([]state, error) {
	// 正規表現パターンから、AST を生成する
	ast, err := syntax.Parse(string(parsed), syntax.Perl)
	if err != nil {
	return nil, err
	}

	// AST のノードを走査し、NFA のステートを作成する
	var nfa []state
	if err := buildNFANode(ast, &nfa); err != nil {
	return nil, err
	}

	return nfa, nil
	}

	// buildNFANode creates the NFA states from the given AST node.
	func buildNFANode(node syntax.Regexp, nfa []state) error {
	// TODO: AST のノードから、NFA のステートを作成する

	return errors.New("not implemented yet")
	}