Skip to content

Instantly share code, notes, and snippets.

@ken39arg
Created December 2, 2020 16:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ken39arg/2fddbb4432afe5d53fb6e6f098be058c to your computer and use it in GitHub Desktop.
Save ken39arg/2fddbb4432afe5d53fb6e6f098be058c to your computer and use it in GitHub Desktop.
package phrasetrie
import "strings"
type node struct {
value string
children []*node
}
func newNode(size int) *node {
nodes := make([]*node, size)
return &node{
children: nodes,
}
}
func (n *node) add(key, value string, t *Trie) {
if key == "" {
n.value = value
return
}
m := t.mapping[key[0]]
if n.children[m] == nil {
n.children[m] = newNode(t.size)
}
n.children[m].add(key[1:], value, t)
}
type Trie struct {
root *node
size int
mapping [256]byte
}
func NewTrie(keys ...string) *Trie {
t := new(Trie)
// inspire strings.Replacer
for _, k := range keys {
for i := 0; i < len(k); i++ {
t.mapping[k[i]] = 1
}
}
for _, b := range t.mapping {
t.size += int(b)
}
var index byte
for i, b := range t.mapping {
if b == 0 {
t.mapping[i] = byte(t.size)
} else {
t.mapping[i] = index
index++
}
}
t.root = newNode(t.size)
for _, k := range keys {
t.root.add(k, k, t)
}
return t
}
func (t *Trie) Match(word string) bool {
_, idx, _ := t.search(word, 0, false)
return 0 <= idx
}
func (t *Trie) FindString(word string) string {
v, _, _ := t.search(word, 0, false)
return v
}
func (t *Trie) FindLongestString(word string) string {
v, _, _ := t.search(word, 0, true)
return v
}
func (t *Trie) ReplaceAll(word, repl string) string {
return t.ReplaceAllFunc(word, func(_ string) string { return repl })
}
func (t *Trie) ReplaceAllFunc(word string, repl func(string) string) string {
dist := new(strings.Builder)
dist.Grow(len(word))
start := 0
for {
found, idx, end := t.search(word, start, true)
if idx < 0 {
dist.WriteString(word[start:])
break
}
dist.WriteString(word[start:idx])
dist.WriteString(repl(found))
start = end + 1
}
return dist.String()
}
func (t *Trie) search(word string, start int, longest bool) (string, int, int) {
end := len(word)
var v string
var e int
for i := start; i < end; i++ {
idx := t.mapping[word[i]]
if int(idx) == t.size {
continue
}
if n := t.root.children[idx]; n != nil {
for j := i + 1; j < end; j++ {
idx = t.mapping[word[j]]
if int(idx) == t.size {
break
}
if n = n.children[t.mapping[word[j]]]; n != nil {
if n.value != "" {
v = n.value
e = j
if !longest {
break
}
}
} else {
break
}
}
if v != "" {
return v, i, e
}
}
}
return "", -1, -1
}
@ken39arg
Copy link
Author

ken39arg commented Dec 2, 2020

goos: linux
goarch: amd64
BenchmarkReplace_30/regexp-2                2829            437092 ns/op            5392 B/op          3 allocs/op
BenchmarkReplace_30/Replacer-2             49702             24842 ns/op            5408 B/op          3 allocs/op
BenchmarkReplace_30/Trie-2                109918             13995 ns/op            2688 B/op          1 allocs/op
BenchmarkReplace_Many/regexp-2               428           2870166 ns/op           11627 B/op         10 allocs/op
BenchmarkReplace_Many/Replacer-2           40664             32979 ns/op            5031 B/op          3 allocs/op
BenchmarkReplace_Many/Trie-2               47100             23681 ns/op            2688 B/op          1 allocs/op

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment