Skip to content

Instantly share code, notes, and snippets.

@nono
Last active August 29, 2015 14:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nono/40bc5a2f6fd5c0fd86f2 to your computer and use it in GitHub Desktop.
Save nono/40bc5a2f6fd5c0fd86f2 to your computer and use it in GitHub Desktop.
I can't understand why Elasticsearch can't find Europe with a snowball analyzer...
text:
- "Western Europe antiquities"
- "Antiquités de l'Europe occidentale"
analyzer:
snowball_en:
type: snowball
language: English
snowball_fr:
type: custom
tokenizer: standard
filter:
- standard
- lowercase
- elision_fr
- stop_fr
- my_asciifolding
- snowball_fr
filter:
my_asciifolding:
type: asciifolding
preserve_original: true
elision_fr:
type: elision
articles:
- l
- d
- m
stop_fr:
type: stop
language: _french_
snowball_fr:
type: snowball
langugage: French
_index: collections
_type: collection
_id: 6
name:
en: "Western Europe antiquities"
fr: "Antiquités de l'Europe occidentale"
---
_index: collections
_type: collection
_id: 13
name:
en: "Islamic art"
fr: "Arts de l'Islam"
collection:
properties:
name:
properties:
en:
type: string
analyzer: snowball
fr:
type: string
analyzer: snowball_fr
#!/bin/bash
export ELASTICSEARCH_ENDPOINT="http://localhost:9200"
# Create indexes
curl -XPUT "$ELASTICSEARCH_ENDPOINT/collections" -d '{
"settings": {
"analysis": {
"text": [
"Western Europe antiquities",
"Antiquités de l\'Europe occidentale"
],
"analyzer": {
"snowball_en": {
"type": "snowball",
"language": "English"
},
"snowball_fr": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"standard",
"lowercase",
"elision_fr",
"stop_fr",
"my_asciifolding",
"snowball_fr"
]
}
},
"filter": {
"my_asciifolding": {
"type": "asciifolding",
"preserve_original": true
},
"elision_fr": {
"type": "elision",
"articles": [
"l",
"d",
"m"
]
},
"stop_fr": {
"type": "stop",
"language": "_french_"
},
"snowball_fr": {
"type": "snowball",
"langugage": "French"
}
}
}
},
"mappings": {
"collection": {
"properties": {
"name": {
"properties": {
"en": {
"type": "string",
"analyzer": "snowball"
},
"fr": {
"type": "string",
"analyzer": "snowball_fr"
}
}
}
}
}
}
}'
# Index documents
curl -XPOST "$ELASTICSEARCH_ENDPOINT/_bulk?refresh=true" -d '
{"index":{"_index":"collections","_type":"collection","_id":6}}
{"name":{"en":"Western Europe antiquities","fr":"Antiquités de l\'Europe occidentale"}}
{"index":{"_index":"collections","_type":"collection","_id":13}}
{"name":{"en":"Islamic art","fr":"Arts de l\'Islam"}}
'
# Do searches
curl -XPOST "$ELASTICSEARCH_ENDPOINT/_search?pretty" -d '
{
"query": {
"query_string": {
"query": "Europe",
"analyzer": "snowball_fr"
}
},
"explain": true
}
'
curl -XPOST "$ELASTICSEARCH_ENDPOINT/_search?pretty" -d '
{
"query": {
"query_string": {
"query": "Europe",
"analyzer": "snowball"
}
},
"explain": true
}
'
curl -XPOST "$ELASTICSEARCH_ENDPOINT/_search?pretty" -d '
{
"query": {
"query_string": {
"query": "Europe",
"analyzer": "standard"
}
},
"explain": true
}
'
curl -XPOST "$ELASTICSEARCH_ENDPOINT/_search?pretty" -d '
{
"query": {
"query_string": {
"query": "name.en:Europe",
"analyzer": "snowball"
}
},
"explain": true
}
'
curl -XPOST "$ELASTICSEARCH_ENDPOINT/_search?pretty" -d '
{
"query": {
"query_string": {
"query": "Europe*",
"analyzer": "snowball"
}
},
"explain": true
}
'
# Auto generated by Found's Play-tool at 2014-10-23T09:23:09+02:00
version: 0
title: "Why ES can't find Europe?"
description: "I can't understand why Elasticsearch can't find Europe with a snowball analyzer..."
# Sample searches. Press Ctrl-Enter to run.
# Press Shift-Enter to go to focused mode for the selected editor.
# Click "Help" for more shortcuts.
query:
query_string:
query: Europe
analyzer: snowball_fr
explain: true
# -> Not found
---
# This is search #2.
query:
query_string:
query: Art
analyzer: snowball
explain: true
# -> Not found
---
# This is search #3.
query:
query_string:
query: Europe
analyzer: standard
explain: true
# -> Found
---
# This is search #4.
query:
query_string:
query: "name.en:Europe"
analyzer: snowball
explain: true
# -> Found
---
# This is search #5.
query:
query_string:
query: "Europe*"
analyzer: snowball
explain: true
# -> Found
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment