Skip to content

Instantly share code, notes, and snippets.

@oblank
Last active May 14, 2024 15:57
Show Gist options
  • Save oblank/73f31be226bf0680ddeb to your computer and use it in GitHub Desktop.
Save oblank/73f31be226bf0680ddeb to your computer and use it in GitHub Desktop.
bleve结合 jieba 分词实现中文分词
package main
import (
"fmt"
"github.com/blevesearch/bleve"
_ "github.com/wangbin/jiebago/analyse/tokenizers"
"log"
)
func main() {
// open a new index
indexMapping := bleve.NewIndexMapping()
err := indexMapping.AddCustomTokenizer("jieba",
map[string]interface{}{
"file": "/Users/wangbin/mygo/src/github.com/wangbin/jiebago/dict.txt",
"type": "jieba",
})
if err != nil {
log.Fatal(err)
}
err = indexMapping.AddCustomAnalyzer("jieba",
map[string]interface{}{
"type": "custom",
"tokenizer": "jieba",
"token_filters": []string{
"possessive_en",
"to_lower",
"stop_en",
},
})
if err != nil {
log.Fatal(err)
}
indexMapping.DefaultAnalyzer = "jieba"
index, err := bleve.New("example.bleve", indexMapping)
if err != nil {
log.Fatal(err)
}
docs := []struct {
Title string
Name string
}{
{
Title: "Doc 1",
Name: "This is the first document we’ve added",
},
{
Title: "Doc 2",
Name: "The second one 你 中文测试中文 is even more interesting! 吃水果",
},
{
Title: "Doc 3",
Name: "买水果然后来世博园。",
},
{
Title: "Doc 4",
Name: "工信处女干事每月经过下属科室都要亲口交代24口交换机等技术性器件的安装工作",
},
{
Title: "Doc 5",
Name: "咱俩交换一下吧。",
},
}
// index docs
for _, doc := range docs {
index.Index(doc.Title, doc)
}
// search for some text
for _, keyword := range []string{"水果世博园", "你", "first", "中文", "交换机", "交换"} {
query := bleve.NewMatchQuery(keyword)
search := bleve.NewSearchRequest(query)
search.Highlight = bleve.NewHighlight()
searchResults, err := index.Search(search)
if err != nil {
log.Fatal(err)
}
fmt.Printf("Result of %s: %s\n", keyword, searchResults)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment