Skip to content

Instantly share code, notes, and snippets.

@ivoscc
Created September 11, 2013 02:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ivoscc/6518829 to your computer and use it in GitHub Desktop.
Save ivoscc/6518829 to your computer and use it in GitHub Desktop.
#!/bin/bash
# Version info from elasticsearch:9200
#{
# "ok" : true,
# "status" : 200,
# "name" : "Tiger Shark",
# "version" : {
# "number" : "0.90.2",
# "snapshot_build" : false,
# "lucene_version" : "4.3.1"
# },
# "tagline" : "You Know, for Search"
#}
ES='http://elasticsearch:9200'
INDEX='my_new_index'
MAPPING='my_mapping'
# create index
curl -XDELETE $ES/$INDEX
curl -XPUT $ES/$INDEX/ -d '{
"settings": {
"index": {
"analysis": {
"analyzer": {
"my_custom_analyzer": {
"type": "custom",
"tokenizer": "keyword",
"filter": ["asciifolding", "lowercase"]
}
}
}
}
}
}'
# create mapping
curl -XPUT $ES/$INDEX/$MAPPING/_mapping -d '{
"properties": {
"guid": {"type": "string", "index": "no"},
"name": {
"type": "string",
"index": "analyzed",
"analyzer": "my_custom_analyzer"
}
}
}'
# put some data
curl -XPUT $ES/$INDEX/$MAPPING/1 -d '{
"guid": "some_guid",
"name": "politica"
}'
curl -XPUT $ES/$INDEX/$MAPPING/2 -d '{
"guid": "another_guid",
"name": "política"
}'
# get data
# this matches only "politica" and not "política"
curl -XPOST $ES/$INDEX/$MAPPING/_search?pretty=true -d '{
"query": {
"prefix": {
"name" : {
"prefix": "poli"
}
}
}
}'
# Both the following queries show this result:
# (which proves my analyzer is working):
#{
# "tokens" : [ {
# "token" : "politica",
# "start_offset" : 0,
# "end_offset" : 8,
# "type" : "word",
# "position" : 1
# } ]
#}
curl -XGET 'elasticsearch:9200/my_new_index/_analyze?analyzer=my_custom_analyzer' -d 'política'
curl -XGET 'elasticsearch:9200/my_new_index/_analyze?analyzer=my_custom_analyzer' -d 'politica'
#But when I run
curl -XGET 'elasticsearch:9200/my_new_index/_analyze?field=my_mapping.name&pretty=true' -d 'política'
# It doesn't work (the accented character is still there)
#{
# "tokens" : [ {
# "token" : "política",
# "start_offset" : 0,
# "end_offset" : 8,
# "type" : "<ALPHANUM>",
# "position" : 1
# } ]
#}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment