Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@abler98
Forked from karmi/movie-titles.rb
Created August 3, 2017 09:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save abler98/34264d7ec203c2752e420b5f0a47d4f6 to your computer and use it in GitHub Desktop.
Save abler98/34264d7ec203c2752e420b5f0a47d4f6 to your computer and use it in GitHub Desktop.
Multiple analyzers and query fields in Elasticsearch for auto-completion
require 'tire'
# Tire.configure { logger STDERR, level: 'debug' }
Tire.index('movie-titles') do
delete
create \
settings: {
index: {
analysis: {
tokenizer: {
title_tokenizer: {
type: 'whitespace'
},
},
filter: {
title_ngram_filter: {
type: 'edgeNGram',
min_gram: 1,
max_gram: 16,
side: 'front'
}
},
analyzer: {
title_default_analyzer: {
type: 'custom',
tokenizer: 'title_tokenizer',
filter: ['lowercase', 'asciifolding']
},
title_snowball_analyzer: {
type: 'custom',
tokenizer: 'title_tokenizer',
filter: ['lowercase', 'asciifolding', 'snowball']
},
title_shingle_analyzer: {
type: 'custom',
tokenizer: 'title_tokenizer',
filter: ['shingle', 'lowercase', 'asciifolding']
},
title_ngram_analyzer: {
type: 'custom',
tokenizer: 'title_tokenizer',
filter: ['lowercase', 'asciifolding', 'title_ngram_filter']
},
title_search_analyzer: {
type: 'custom',
tokenizer: 'title_tokenizer',
filter: ['lowercase', 'asciifolding']
}
}
}
},
},
mappings: {
document: {
properties: {
title: {
type: 'multi_field',
fields: {
title: { type: 'string', analyzer: 'title_default_analyzer' },
stemmed: { type: 'string', analyzer: 'title_snowball_analyzer' },
shingles: { type: 'string', analyzer: 'title_shingle_analyzer' },
ngrams: { type: 'string', index_analyzer: 'title_ngram_analyzer', search_analyzer: 'title_search_analyzer' }
}
}
}
}
}
store id: 1, title: "The Lord of the Rings"
store id: 2, title: "Lord of the Flies"
store id: 3, title: "The Ring"
store id: 4, title: "Lords of Dogtown"
store id: 5, title: "The Postman Always Rings Twice"
refresh
end
queries = [
'lord of the rin',
'lord of the r',
'lord of r',
'lord ring',
'lord rin',
'lo ri',
'lord',
'lo',
'rings',
'ring',
'rin',
'ri',
'r'
]
queries.each do |q|
puts '='*80,
"QUERY: '#{q}'",
'='*80
s = Tire.search('movie-titles') do
query do
match ['title^10', 'title.snowball^2', 'title.shingles^2', 'title.ngrams'], q, operator: 'and'
end
explain true
end
s.results.each do |result|
puts "#{result.title.ljust(30)} (Score: #{result._score})",
# result._explanation,
'-'*80
end
puts
end
================================================================================
QUERY: 'lord of the rin'
================================================================================
The Lord of the Rings (Score: 0.008028548)
--------------------------------------------------------------------------------
================================================================================
QUERY: 'lord of the r'
================================================================================
The Lord of the Rings (Score: 0.008028548)
--------------------------------------------------------------------------------
================================================================================
QUERY: 'lord of r'
================================================================================
The Lord of the Rings (Score: 0.005668437)
--------------------------------------------------------------------------------
================================================================================
QUERY: 'lord ring'
================================================================================
The Lord of the Rings (Score: 0.0039382037)
--------------------------------------------------------------------------------
================================================================================
QUERY: 'lord rin'
================================================================================
The Lord of the Rings (Score: 0.0039382037)
--------------------------------------------------------------------------------
================================================================================
QUERY: 'lo ri'
================================================================================
The Lord of the Rings (Score: 0.002912885)
--------------------------------------------------------------------------------
================================================================================
QUERY: 'lord'
================================================================================
Lord of the Flies (Score: 0.15342641)
--------------------------------------------------------------------------------
The Lord of the Rings (Score: 0.13424811)
--------------------------------------------------------------------------------
Lords of Dogtown (Score: 0.0023539662)
--------------------------------------------------------------------------------
================================================================================
QUERY: 'lo'
================================================================================
Lords of Dogtown (Score: 0.0023539662)
--------------------------------------------------------------------------------
Lord of the Flies (Score: 0.0023539662)
--------------------------------------------------------------------------------
The Lord of the Rings (Score: 0.0020597205)
--------------------------------------------------------------------------------
================================================================================
QUERY: 'rings'
================================================================================
The Postman Always Rings Twice (Score: 0.13424811)
--------------------------------------------------------------------------------
The Lord of the Rings (Score: 0.13424811)
--------------------------------------------------------------------------------
================================================================================
QUERY: 'ring'
================================================================================
The Ring (Score: 0.19178301)
--------------------------------------------------------------------------------
The Lord of the Rings (Score: 0.0020597205)
--------------------------------------------------------------------------------
The Postman Always Rings Twice (Score: 0.0017654747)
--------------------------------------------------------------------------------
================================================================================
QUERY: 'rin'
================================================================================
The Ring (Score: 0.0035309494)
--------------------------------------------------------------------------------
The Lord of the Rings (Score: 0.0020597205)
--------------------------------------------------------------------------------
The Postman Always Rings Twice (Score: 0.0017654747)
--------------------------------------------------------------------------------
================================================================================
QUERY: 'ri'
================================================================================
The Ring (Score: 0.0035309494)
--------------------------------------------------------------------------------
The Lord of the Rings (Score: 0.0020597205)
--------------------------------------------------------------------------------
The Postman Always Rings Twice (Score: 0.0017654747)
--------------------------------------------------------------------------------
================================================================================
QUERY: 'r'
================================================================================
The Ring (Score: 0.0035309494)
--------------------------------------------------------------------------------
The Lord of the Rings (Score: 0.0020597205)
--------------------------------------------------------------------------------
The Postman Always Rings Twice (Score: 0.0017654747)
--------------------------------------------------------------------------------
[Finished in 4.1s]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment