Skip to content

Instantly share code, notes, and snippets.

@ken39arg
Created December 3, 2020 07:31
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save ken39arg/9991a8fe06a398f06eece450cbb86cb8 to your computer and use it in GitHub Desktop.
Save ken39arg/9991a8fe06a398f06eece450cbb86cb8 to your computer and use it in GitHub Desktop.
package phrasetrie
import (
"strings"
"unicode"
"unicode/utf8"
)
type node struct {
value string
children []*node
}
func newNode(size int) *node {
nodes := make([]*node, size)
return &node{
children: nodes,
}
}
func (n *node) add(key, value string, t *Trie) {
if key == "" {
n.value = value
return
}
m := t.mapping[key[0]]
if n.children[m] == nil {
n.children[m] = newNode(t.size)
}
n.children[m].add(key[1:], value, t)
}
type Trie struct {
root *node
size int
mapping [256]byte
}
func NewTrie(keys ...string) *Trie {
t := new(Trie)
// inspire strings.Replacer
for i := range keys {
k := strings.ToLower(strings.TrimLeftFunc(keys[i], unicode.IsSpace))
for i := 0; i < len(k); i++ {
t.mapping[k[i]] = 1
}
keys[i] = k
}
for _, b := range t.mapping {
t.size += int(b)
}
var index byte
for i, b := range t.mapping {
if b == 0 {
t.mapping[i] = byte(t.size)
} else {
t.mapping[i] = index
index++
}
}
t.root = newNode(t.size)
for _, k := range keys {
t.root.add(k, k, t)
}
return t
}
func (t *Trie) Match(word string) bool {
_, idx, _ := t.search(strings.ToLower(word), 0, false)
return 0 <= idx
}
func (t *Trie) FindString(word string) string {
v, _, _ := t.search(strings.ToLower(word), 0, false)
return v
}
func (t *Trie) FindLongestString(word string) string {
v, _, _ := t.search(strings.ToLower(word), 0, true)
return v
}
func (t *Trie) ReplaceAll(word, repl string) string {
return t.ReplaceAllFunc(word, func(_ string) string { return repl })
}
func (t *Trie) ReplaceAllFunc(word string, repl func(string) string) string {
s := strings.ToLower(word)
dist := new(strings.Builder)
dist.Grow(len(word))
start := 0
for {
found, idx, end := t.search(s, start, true)
if idx < 0 {
dist.WriteString(word[start:])
break
}
dist.WriteString(word[start:idx])
dist.WriteString(repl(found))
start = end + 1
}
return dist.String()
}
func (t *Trie) search(word string, start int, longest bool) (string, int, int) {
end := len(word)
var v string
var s, idx byte
var e, step int
var r rune
var wasSpace bool = true
for i := start; i < end; i += step {
s = word[i]
r, step = utf8.DecodeRuneInString(word[i:])
if unicode.IsSpace(r) {
wasSpace = true
continue
} else if !wasSpace {
// wordの途中
continue
}
wasSpace = false
idx = t.mapping[s]
if int(idx) == t.size {
continue
}
var wasSpaceChild bool
if n := t.root.children[idx]; n != nil {
for j := i + 1; j < end; j++ {
s = word[j]
if utf8.RuneStart(s) {
r2, s2 := utf8.DecodeRuneInString(word[j:])
if unicode.IsSpace(r2) {
if wasSpaceChild {
j += s2 - 1
continue
}
wasSpaceChild = true
} else {
wasSpaceChild = false
}
}
idx = t.mapping[s]
if int(idx) == t.size {
break
}
if n = n.children[idx]; n != nil {
if n.value != "" {
v = n.value
e = j
if !longest {
break
}
}
} else {
break
}
}
if v != "" {
return v, i, e
}
}
}
return "", -1, -1
}
@ken39arg
Copy link
Author

ken39arg commented Dec 3, 2020

goos: linux
goarch: amd64
BenchmarkReplace_30/regexp-2                3352            364878 ns/op            5392 B/op          3 allocs/op
BenchmarkReplace_30/Replacer-2             61447             19417 ns/op            5408 B/op          3 allocs/op
BenchmarkReplace_30/Trie-2                 19590             64318 ns/op            5376 B/op          2 allocs/op
BenchmarkReplace_Many/regexp-2               439           2412237 ns/op           10081 B/op         10 allocs/op
BenchmarkReplace_Many/Replacer-2           36723             27773 ns/op            5032 B/op          3 allocs/op
BenchmarkReplace_Many/Trie-2               16647             73215 ns/op            5376 B/op          2 allocs/op

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment