Created
January 13, 2013 20:42
-
-
Save karmi/4526141 to your computer and use it in GitHub Desktop.
Multiple analyzers and query fields in Elasticsearch for auto-completion
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'tire' | |
# Tire.configure { logger STDERR, level: 'debug' } | |
Tire.index('movie-titles') do | |
delete | |
create \ | |
settings: { | |
index: { | |
analysis: { | |
tokenizer: { | |
title_tokenizer: { | |
type: 'whitespace' | |
}, | |
}, | |
filter: { | |
title_ngram_filter: { | |
type: 'edgeNGram', | |
min_gram: 1, | |
max_gram: 16, | |
side: 'front' | |
} | |
}, | |
analyzer: { | |
title_default_analyzer: { | |
type: 'custom', | |
tokenizer: 'title_tokenizer', | |
filter: ['lowercase', 'asciifolding'] | |
}, | |
title_snowball_analyzer: { | |
type: 'custom', | |
tokenizer: 'title_tokenizer', | |
filter: ['lowercase', 'asciifolding', 'snowball'] | |
}, | |
title_shingle_analyzer: { | |
type: 'custom', | |
tokenizer: 'title_tokenizer', | |
filter: ['shingle', 'lowercase', 'asciifolding'] | |
}, | |
title_ngram_analyzer: { | |
type: 'custom', | |
tokenizer: 'title_tokenizer', | |
filter: ['lowercase', 'asciifolding', 'title_ngram_filter'] | |
}, | |
title_search_analyzer: { | |
type: 'custom', | |
tokenizer: 'title_tokenizer', | |
filter: ['lowercase', 'asciifolding'] | |
} | |
} | |
} | |
}, | |
}, | |
mappings: { | |
document: { | |
properties: { | |
title: { | |
type: 'multi_field', | |
fields: { | |
title: { type: 'string', analyzer: 'title_default_analyzer' }, | |
stemmed: { type: 'string', analyzer: 'title_snowball_analyzer' }, | |
shingles: { type: 'string', analyzer: 'title_shingle_analyzer' }, | |
ngrams: { type: 'string', index_analyzer: 'title_ngram_analyzer', search_analyzer: 'title_search_analyzer' } | |
} | |
} | |
} | |
} | |
} | |
store id: 1, title: "The Lord of the Rings" | |
store id: 2, title: "Lord of the Flies" | |
store id: 3, title: "The Ring" | |
store id: 4, title: "Lords of Dogtown" | |
store id: 5, title: "The Postman Always Rings Twice" | |
refresh | |
end | |
queries = [ | |
'lord of the rin', | |
'lord of the r', | |
'lord of r', | |
'lord ring', | |
'lord rin', | |
'lo ri', | |
'lord', | |
'lo', | |
'rings', | |
'ring', | |
'rin', | |
'ri', | |
'r' | |
] | |
queries.each do |q| | |
puts '='*80, | |
"QUERY: '#{q}'", | |
'='*80 | |
s = Tire.search('movie-titles') do | |
query do | |
match ['title^10', 'title.snowball^2', 'title.shingles^2', 'title.ngrams'], q, operator: 'and' | |
end | |
explain true | |
end | |
s.results.each do |result| | |
puts "#{result.title.ljust(30)} (Score: #{result._score})", | |
# result._explanation, | |
'-'*80 | |
end | |
puts | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
================================================================================ | |
QUERY: 'lord of the rin' | |
================================================================================ | |
The Lord of the Rings (Score: 0.008028548) | |
-------------------------------------------------------------------------------- | |
================================================================================ | |
QUERY: 'lord of the r' | |
================================================================================ | |
The Lord of the Rings (Score: 0.008028548) | |
-------------------------------------------------------------------------------- | |
================================================================================ | |
QUERY: 'lord of r' | |
================================================================================ | |
The Lord of the Rings (Score: 0.005668437) | |
-------------------------------------------------------------------------------- | |
================================================================================ | |
QUERY: 'lord ring' | |
================================================================================ | |
The Lord of the Rings (Score: 0.0039382037) | |
-------------------------------------------------------------------------------- | |
================================================================================ | |
QUERY: 'lord rin' | |
================================================================================ | |
The Lord of the Rings (Score: 0.0039382037) | |
-------------------------------------------------------------------------------- | |
================================================================================ | |
QUERY: 'lo ri' | |
================================================================================ | |
The Lord of the Rings (Score: 0.002912885) | |
-------------------------------------------------------------------------------- | |
================================================================================ | |
QUERY: 'lord' | |
================================================================================ | |
Lord of the Flies (Score: 0.15342641) | |
-------------------------------------------------------------------------------- | |
The Lord of the Rings (Score: 0.13424811) | |
-------------------------------------------------------------------------------- | |
Lords of Dogtown (Score: 0.0023539662) | |
-------------------------------------------------------------------------------- | |
================================================================================ | |
QUERY: 'lo' | |
================================================================================ | |
Lords of Dogtown (Score: 0.0023539662) | |
-------------------------------------------------------------------------------- | |
Lord of the Flies (Score: 0.0023539662) | |
-------------------------------------------------------------------------------- | |
The Lord of the Rings (Score: 0.0020597205) | |
-------------------------------------------------------------------------------- | |
================================================================================ | |
QUERY: 'rings' | |
================================================================================ | |
The Postman Always Rings Twice (Score: 0.13424811) | |
-------------------------------------------------------------------------------- | |
The Lord of the Rings (Score: 0.13424811) | |
-------------------------------------------------------------------------------- | |
================================================================================ | |
QUERY: 'ring' | |
================================================================================ | |
The Ring (Score: 0.19178301) | |
-------------------------------------------------------------------------------- | |
The Lord of the Rings (Score: 0.0020597205) | |
-------------------------------------------------------------------------------- | |
The Postman Always Rings Twice (Score: 0.0017654747) | |
-------------------------------------------------------------------------------- | |
================================================================================ | |
QUERY: 'rin' | |
================================================================================ | |
The Ring (Score: 0.0035309494) | |
-------------------------------------------------------------------------------- | |
The Lord of the Rings (Score: 0.0020597205) | |
-------------------------------------------------------------------------------- | |
The Postman Always Rings Twice (Score: 0.0017654747) | |
-------------------------------------------------------------------------------- | |
================================================================================ | |
QUERY: 'ri' | |
================================================================================ | |
The Ring (Score: 0.0035309494) | |
-------------------------------------------------------------------------------- | |
The Lord of the Rings (Score: 0.0020597205) | |
-------------------------------------------------------------------------------- | |
The Postman Always Rings Twice (Score: 0.0017654747) | |
-------------------------------------------------------------------------------- | |
================================================================================ | |
QUERY: 'r' | |
================================================================================ | |
The Ring (Score: 0.0035309494) | |
-------------------------------------------------------------------------------- | |
The Lord of the Rings (Score: 0.0020597205) | |
-------------------------------------------------------------------------------- | |
The Postman Always Rings Twice (Score: 0.0017654747) | |
-------------------------------------------------------------------------------- | |
[Finished in 4.1s] |
Thank you, this was helpful, but might need to be updated since there are many changes in current version of ES.
Thank God!. I searched "elasticsearch mapping multiple custom analyzers" on google. Finally found it! You saved tons of my time. Thank you!
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Guys... just made a pertty close analysers like this, but when I try to search with a long string (like the result of ''lord of the rin'' ), it´s not returnig just the unique one, it is returning all regs.
Just posted on stackoverflow, http://stackoverflow.com/questions/43902570/elasticsearch-multiple-analysers-not-working
If someone can help I would be grateful.
cheers