Last active
January 5, 2017 07:25
-
-
Save cuixin/18bdbdecead852eecf73519427e960e0 to your computer and use it in GitHub Desktop.
filter dirty words use trie tree.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"fmt" | |
"unicode" | |
) | |
type Trie struct { | |
Root *TrieNode | |
} | |
type TrieNode struct { | |
Children map[rune]*TrieNode | |
End bool | |
} | |
func New() Trie { | |
var r Trie | |
r.Root = NewTrieNode() | |
return r | |
} | |
func NewTrieNode() *TrieNode { | |
n := new(TrieNode) | |
n.Children = make(map[rune]*TrieNode) | |
return n | |
} | |
func (this *Trie) Append(txt string) { | |
if len(txt) < 1 { | |
return | |
} | |
node := this.Root | |
key := []rune(txt) | |
for i := 0; i < len(key); i++ { | |
if _, exists := node.Children[key[i]]; !exists { | |
node.Children[key[i]] = NewTrieNode() | |
} | |
node = node.Children[key[i]] | |
} | |
node.End = true | |
} | |
func isNoneChar(r rune) bool { | |
return (unicode.In(r, unicode.Han) || unicode.IsLetter(r) || unicode.IsDigit(r)) && | |
!unicode.IsPunct(r) && !unicode.IsSpace(r) | |
} | |
func replaceChars(words []rune, start, end int) { | |
for ; start < end; start++ { | |
words[start] = '*' | |
} | |
} | |
func (this *Trie) Replace(txt string) (string, bool) { | |
if txt == "" { | |
return "", false | |
} | |
origin := []rune(txt) | |
words := []rune(txt) | |
replace := false | |
var ( | |
ok bool | |
node *TrieNode | |
) | |
for i, word := range origin { | |
if node, ok = this.Root.Children[word]; !ok { | |
continue | |
} | |
j := i + 1 | |
if node.End { | |
replaceChars(words, i, j) | |
} | |
for ; j < len(origin); j++ { | |
if !isNoneChar(origin[j]) { | |
continue | |
} | |
if v, ok := node.Children[origin[j]]; !ok { | |
break | |
} else { | |
node = v | |
} | |
} | |
if node.End { | |
replace = true | |
replaceChars(words, i, j) | |
} | |
} | |
return string(words), replace | |
} | |
func (this *Trie) Print() { | |
node := this.Root | |
for k, v := range node.Children { | |
for k1, v1 := range v.Children { | |
for k2, _ := range v1.Children { | |
fmt.Printf("%s%s%s", string(k), string(k1), string(k2)) | |
} | |
} | |
} | |
} | |
func main() { | |
dw := New() | |
dw.Append("共产党") | |
dw.Append("系统") | |
dw.Append("sb") | |
dw.Append("比") | |
dw.Append("比比的") | |
dw.Append("比比的的的") | |
dw.Append("比较") | |
dw.Append("比较的") | |
testFunc := dw.Replace | |
// dw.Print() | |
fmt.Println(testFunc("共产党系统")) | |
fmt.Println(testFunc("共产党,系统")) | |
fmt.Println(testFunc("比")) | |
fmt.Println(testFunc("都 比")) | |
fmt.Println(testFunc("xx都 比 ")) | |
fmt.Println(testFunc("xx都 比")) | |
fmt.Println(testFunc("xx都 比xx")) | |
fmt.Println(testFunc("xx都比xx")) | |
fmt.Println(testFunc("xx s b xx")) | |
fmt.Println(testFunc("s b")) | |
fmt.Println(testFunc("比douxx 都 ")) | |
fmt.Println(testFunc("比douxx 都 s b 哈哈")) | |
fmt.Println(testFunc("比douxx 都 s b")) | |
fmt.Println(testFunc(".s比douxx都 s.b")) | |
fmt.Println(testFunc(".x比douxx 都 s.b")) | |
fmt.Println(testFunc("比比的douxx 都 s.b")) | |
fmt.Println(testFunc("共产党的douxx 都 s.b")) | |
fmt.Println(testFunc("比比较douxx 都 s.b")) | |
fmt.Println(testFunc("比比比比x比的douxx 都 s.b")) | |
fmt.Println(testFunc("比比比共产共产党比比比比比比比比比比比比比比比")) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment