Skip to content

Instantly share code, notes, and snippets.

@kaakaa
Created August 24, 2022 05:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kaakaa/ca8c20821ef610b098851f487eb61ea5 to your computer and use it in GitHub Desktop.
Save kaakaa/ca8c20821ef610b098851f487eb61ea5 to your computer and use it in GitHub Desktop.
package main
import (
"log"
"github.com/blevesearch/bleve/v2"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/lang/cjk"
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/bleve/v2/search/query"
)
type Document struct {
Text string
}
const customCJKAnalyzerName = "custom_cjk_analyzer"
const filterName = "custom_cjk_filter"
func init() {
registry.RegisterTokenFilter(filterName, CJKCustomBigramFilterConstructor)
registry.RegisterAnalyzer(customCJKAnalyzerName, AnalyzerConstructor)
}
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
tokenizer, err := cache.TokenizerNamed(unicode.Name)
if err != nil {
return nil, err
}
widthFilter, err := cache.TokenFilterNamed(cjk.WidthName)
if err != nil {
return nil, err
}
toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
if err != nil {
return nil, err
}
bigramFilter, err := cache.TokenFilterNamed(filterName)
if err != nil {
return nil, err
}
rv := analysis.Analyzer{
Tokenizer: tokenizer,
TokenFilters: []analysis.TokenFilter{
widthFilter,
toLowerFilter,
bigramFilter,
},
}
return &rv, nil
}
func CJKCustomBigramFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
// Set `true` to `output_unigram`
return cjk.NewCJKBigramFilter(true), nil
}
func main() {
// setup
textMapping := bleve.NewTextFieldMapping()
textMapping.Analyzer = customCJKAnalyzerName
docMapping := bleve.NewDocumentMapping()
docMapping.AddFieldMappingsAt("Text", textMapping)
mapping := bleve.NewIndexMapping()
mapping.AddDocumentMapping("_default", docMapping)
index, err := bleve.New("example.bleve", mapping)
if err != nil {
log.Fatal(err)
}
// index
err = index.Index("id1", Document{"こんにちは、世界"})
if err != nil {
log.Fatal(err)
}
// search
q := bleve.NewMatchQuery("ちは")
q.SetField("Text")
q.SetOperator(query.MatchQueryOperatorAnd)
search := bleve.NewSearchRequest(q)
searchResults, err := index.Search(search)
log.Printf("Found: %d, MaxScore: %f", searchResults.Total, searchResults.MaxScore)
// => 2022/08/24 14:24:46 Found: 0, MaxScore: 0.000000
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment