public
Last active

An example of using ngram analysis in ElasticSearch with the Tire rubygem

  • Download Gist
ngrams-and-tire.rb
Ruby
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
# An example of using ngram analysis in ElasticSearch with the Tire rubygem
# ==========================================================================
# The original, raw example: https://gist.github.com/988923
 
require 'rubygems'
require 'tire'
require 'yajl/json_gem'
 
class URL
def initialize(attributes={})
@attributes = attributes
end
 
def type
'url'
end
 
def to_indexed_json
@attributes.to_json
end
end
 
Tire.index('ngrams-and-tire') do
delete
create :settings => {
"index" => {
"analysis" => {
"filter" => {
 
# Let's define a custom ngram filter
#
"url_ngram" => {
"type" => "nGram",
"max_gram" => 5,
"min_gram" => 3},
 
# Let's define a custom stop words filter
#
"url_stop" => {
"type" => "stop",
"stopwords" => ["http", "https"]
}
},
"analyzer" => {
"url_analyzer" => {
 
# First, lowercase everything with the built-in tokenizer
#
"tokenizer" => "lowercase",
 
# Then, define our analyzer chain: remove generic stopwords,
# remove URL specific stopwords, apply our custom ngram filter
#
"filter" => ["stop", "url_stop", "url_ngram"],
 
"type" => "custom"
}
}
}
}
},
 
 
:mappings => {
"url" => {
"properties" => {
"url" => {
"boost" => 10,
"type" => "string",
 
# Let's use our custom analyzer for the `url` field
#
"analyzer" => "url_analyzer"
}
}
}
}
 
store URL.new :url => "http://urlaubinkroatien.de"
store URL.new :url => "http://urlaub-in-kroatien.de"
store URL.new :url => "http://besteurlaubinkroatien.de"
store URL.new :url => "http://kroatien.de"
 
refresh
end
 
 
s = Tire.search('ngrams-and-tire') { query { string 'url:urlaub' } }
puts "QUERY > url:urlaub",
s.results.map(&:url).inspect,
""
 
s = Tire.search('ngrams-and-tire') { query { string 'url:kroatien' } }
puts "QUERY > url:kroatien",
s.results.map(&:url).inspect,
""

Hello @karmi, when can we expect a new tire release? Or will you just keep the master branch always stable (as in "not broken")?

@mereghost, "master" should never be broken :)

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.