Skip to content

Instantly share code, notes, and snippets.

@kosho
Last active November 27, 2018 01:37
Show Gist options
  • Save kosho/2250bb3e9445786f7c3788541dcee337 to your computer and use it in GitHub Desktop.
Save kosho/2250bb3e9445786f7c3788541dcee337 to your computer and use it in GitHub Desktop.
##################################################
# Kuromoji/ICU Tokenizer comparison Test
##################################################
# config/userdict_ja.txt
# ルークスカイウォーカー,ルーク スカイウォーカー,ルーク スカイウォーカー,カスタム名詞
DELETE app-search-test
PUT app-search-test
{
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0,
"analysis": {
"analyzer": {
"iq_text_base": {
"tokenizer": "icu_tokenizer",
"filter": [
"icu_folding",
"ja-stop-words-filter"
]
},
"my_kuromoji": {
"type": "custom",
"tokenizer": "my_kuromoji_tokenizer"
}
},
"filter": {
"ja-stop-words-filter": {
"type": "stop",
"stopwords": "_english_"
}
},
"tokenizer": {
"my_kuromoji_tokenizer": {
"type": "kuromoji_tokenizer",
"user_dictionary": "userdict_ja.txt",
"mode": "search"
}
}
}
},
"mappings": {
"doc": {
"properties": {
"title": {
"type": "text",
"fields": {
"kuromoji": {
"type": "text",
"analyzer": "kuromoji",
"fielddata": true
},
"my_kuromoji": {
"type": "text",
"analyzer": "my_kuromoji",
"fielddata": true
},
"iq_text_base": {
"type": "text",
"analyzer": "iq_text_base",
"fielddata": true
}
}
}
}
}
}
}
GET app-search-test/_analyze
{
"explain": true,
"analyzer": "kuromoji",
"text": "ルークスカイウォーカー"
}
GET app-search-test/_analyze
{
"explain": true,
"analyzer": "my_kuromoji",
"text": "ルークスカイウォーカー"
}
GET app-search-test/_analyze
{
"explain": true,
"analyzer": "iq_text_base",
"text": "ルークスカイウォーカー"
}
# Search actual document
PUT app-search-test/doc/1
{
"title": "ルーク・スカイウォーカー"
}
GET app-search-test/_search
{
"query": {
"match": {
"title.kuromoji": "ルークスカイウォーカー"
}
}
}
GET app-search-test/_search
{
"query": {
"match": {
"title.my_kuromoji": "ルークスカイウォーカー"
}
}
}
GET app-search-test/_search
{
"query": {
"match": {
"title.iq_text_base": "ルークスカイウォーカー"
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment