Last active
August 29, 2015 14:05
-
-
Save leegee/a0d9322abd0fd797f2a0 to your computer and use it in GitHub Desktop.
ElasticSearch configured for named people with addresses, with combined name field with "auto-suggestion" that uses synonyms
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# set -o verbose | |
# set -x | |
export ES_HOST=localhost:9200 | |
export INDEX_NAME="people" | |
export SUGGEST_INDEX_NAME="suggestions" | |
export DELETE="curl -XDELETE $ES_HOST" | |
export POST="curl -XPOST $ES_HOST" | |
export PUT="curl -XPUT $ES_HOST" | |
export GET="curl -XGET $ES_HOST" | |
echo | |
$DELETE/$INDEX_NAME | |
echo | |
$DELETE/$SUGGEST_INDEX_NAME | |
echo | |
$POST/$INDEX_NAME --data-binary @scheme.yml | |
echo | |
$POST/$SUGGEST_INDEX_NAME --data-binary @suggest_scheme.yml | |
echo | |
echo 'Populate' | |
echo | |
$POST/$INDEX_NAME/person/ -d '{ | |
"roles" : ["son", "father"], | |
"honourific": "MR", | |
"given_name": "Lee", | |
"middle_names": "Isaac", | |
"family_name": "Goddard", | |
"address": { | |
"property": 40, | |
"street": "Fenyvesi Nagyut", | |
"town": "Gödöllő", | |
"postcode": "H2100", | |
"country": "HU" | |
} | |
}' | |
echo | |
# name_suggest.payload.person should be the ID created | |
# by the previous POST request | |
$POST/$SUGGEST_INDEX_NAME/person/ -d '{ | |
"name_suggest": { | |
"payload": { "person": "_id" }, | |
"output": "Lee Output text", | |
"input": [ | |
"lee", "isaac", "goddard", "ike" | |
] | |
} | |
}' | |
echo | |
$POST/$INDEX_NAME/person/ -d '{ | |
"roles" : "son", | |
"honourific": "MASTER", | |
"given_name": "Jacob", | |
"middle_names": "William", | |
"family_name" : "Goddard", | |
"address": { | |
"property": 40, | |
"street": "Fenyvesi Nagyut", | |
"town": "Gödöllő", | |
"postcode": "2100", | |
"country": "HU" | |
} | |
}' | |
echo | |
# name_suggest.payload.person should be the ID created | |
# by the previous POST request | |
$POST/$SUGGEST_INDEX_NAME/person/ -d '{ | |
"name_suggest": { | |
"payload": { "person": "_id" }, | |
"output": "Jake Output text", | |
"input": [ | |
"jake", "bill", "jakab", "vilmos", "william", "jacob", "goddard" | |
] | |
} | |
}' | |
echo | |
sleep 2; # ES needs time to process | |
$GET/$INDEX/_mapping?pretty | |
echo | |
# echo "ALL:" | |
# curl -XGET "$ES_HOST/$INDEX_NAME/person/_search?pretty" | |
# echo | |
# echo "lee:" | |
# curl -XGET "$ES_HOST/$INDEX_NAME/person/_search?q=any_name:lee&pretty" | |
# echo | |
# echo "isaac:" | |
# curl -XGET "$ES_HOST/$INDEX_NAME/person/_search?q=any_name:isaac&pretty" | |
# echo | |
# echo "gOdd:" | |
# curl -XGET "$ES_HOST/$INDEX_NAME/person/_search?q=any_name:gOdd*&pretty" | |
# echo | |
# echo "role:son:" | |
# curl -XGET "$ES_HOST/$INDEX_NAME/person/_search?q=role:son*&pretty" | |
# echo | |
$POST/$SUGGEST_INDEX_NAME/_suggest?pretty -d ' | |
{ | |
"names" : { | |
"text" : "god", | |
"completion" : { | |
"field" : "name_suggest" | |
} | |
} | |
}' | |
$POST/$SUGGEST_INDEX_NAME/_suggest?pretty -d ' | |
{ | |
"names" : { | |
"text" : "steak", | |
"completion" : { | |
"field" : "name_suggest" | |
} | |
} | |
}' | |
$POST/$SUGGEST_INDEX_NAME/_suggest?pretty -d ' | |
{ | |
"names" : { | |
"text" : "steok", | |
"completion" : { | |
"field" : "name_suggest", | |
"fuzzy" : { | |
"edit_distance" : 1 | |
} | |
} | |
} | |
}' | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- | |
# | |
# People with addresses, company, role | |
# | |
settings: | |
number_of_shards: 1 | |
number_of_replicas: 1 | |
query.default_field: any_name | |
analysis: | |
analyzer: | |
name_analyser: | |
type: standard | |
tokenizer: standard | |
filter: [ asciifolding standard ] | |
mappings: | |
person: | |
_source: | |
enabled: true | |
properties: | |
roles: | |
type: string | |
index: not_analyzed | |
index_options: docs | |
include_in_all: false | |
# norms: | |
# enabled: false | |
honourific: | |
type: string | |
index: not_analyzed | |
include_in_all: false | |
given_name: | |
type: multi_field | |
path: just_name | |
index_analyzer: name_analyser | |
search_analyzer: name_analyser | |
include_in_all: true | |
fields: | |
given_name: | |
type: string | |
index: analyzed | |
any_name: | |
type: string | |
index: analyzed | |
middle_names: | |
type: multi_field | |
path: just_name | |
index_analyzer: name_analyser | |
search_analyzer: name_analyser | |
include_in_all: true | |
fields: | |
given_name: | |
type: string | |
index: analyzed | |
any_name: | |
type: string | |
index: analyzed | |
family_name: | |
type: multi_field | |
path: just_name | |
index_analyzer: name_analyser | |
search_analyzer: name_analyser | |
include_in_all: true | |
fields: | |
given_name: | |
type: string | |
index: analyzed | |
any_name: | |
type: string | |
index: analyzed | |
company: | |
type: string | |
index: not_analyzed | |
include_in_all: false | |
index_options: docs | |
address: | |
properties: | |
property: | |
index_analyzer: name_analyser | |
search_analyzer: name_analyser | |
include_in_all: false | |
type: multi_field | |
fields: | |
property: | |
type: string | |
index: analyzed | |
any_address: | |
type: string | |
index: analyzed | |
street: | |
index_analyzer: name_analyser | |
search_analyzer: name_analyser | |
include_in_all: false | |
type: multi_field | |
fields: | |
property: | |
type: string | |
index: analyzed | |
any_address: | |
type: string | |
index: analyzed | |
street2: | |
index_analyzer: name_analyser | |
search_analyzer: name_analyser | |
include_in_all: false | |
type: multi_field | |
fields: | |
property: | |
type: string | |
index: analyzed | |
any_address: | |
type: string | |
index: analyzed | |
region: | |
index_analyzer: name_analyser | |
search_analyzer: name_analyser | |
include_in_all: false | |
type: multi_field | |
fields: | |
property: | |
type: string | |
index: analyzed | |
any_address: | |
type: string | |
index: analyzed | |
country: | |
index: not_analyzed | |
include_in_all: false | |
type: multi_field | |
fields: | |
property: | |
type: string | |
index: analyzed | |
any_address: | |
type: string | |
index: analyzed | |
postcode: | |
index_analyzer: name_analyser | |
search_analyzer: name_analyser | |
include_in_all: false | |
type: multi_field | |
fields: | |
property: | |
type: string | |
index: analyzed | |
any_address: | |
type: string | |
index: analyzed |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- | |
# | |
# SUGGESTIONS | |
# | |
settings: | |
number_of_shards: 1 | |
number_of_replicas: 1 | |
query.default_field: any_name | |
analysis: | |
analyzer: | |
name_suggest_synonyms: | |
type: custom | |
tokenizer: lowercase | |
filter: [ my_name_synonyms ] | |
filter: | |
my_name_synonyms: | |
type: synonym | |
synonyms_path: /Users/lee/src/elasticsearch/name_suggest_synonyms_map.txt | |
mappings: | |
person: | |
_source: | |
enabled: true | |
properties: | |
name_suggest: | |
type: completion | |
search_analyzer: simple | |
payloads: true | |
analyzer: name_suggest_synonyms |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment