# An example of using ngram analysis in ElasticSearch with the Tire rubygem | |
# ========================================================================== | |
# The original, raw example: https://gist.github.com/988923 | |
require 'rubygems' | |
require 'tire' | |
require 'yajl/json_gem' | |
class URL | |
def initialize(attributes={}) | |
@attributes = attributes | |
end | |
def type | |
'url' | |
end | |
def to_indexed_json | |
@attributes.to_json | |
end | |
end | |
Tire.index('ngrams-and-tire') do | |
delete | |
create :settings => { | |
"index" => { | |
"analysis" => { | |
"filter" => { | |
# Let's define a custom ngram filter | |
# | |
"url_ngram" => { | |
"type" => "nGram", | |
"max_gram" => 5, | |
"min_gram" => 3}, | |
# Let's define a custom stop words filter | |
# | |
"url_stop" => { | |
"type" => "stop", | |
"stopwords" => ["http", "https"] | |
} | |
}, | |
"analyzer" => { | |
"url_analyzer" => { | |
# First, lowercase everything with the built-in tokenizer | |
# | |
"tokenizer" => "lowercase", | |
# Then, define our analyzer chain: remove generic stopwords, | |
# remove URL specific stopwords, apply our custom ngram filter | |
# | |
"filter" => ["stop", "url_stop", "url_ngram"], | |
"type" => "custom" | |
} | |
} | |
} | |
} | |
}, | |
:mappings => { | |
"url" => { | |
"properties" => { | |
"url" => { | |
"boost" => 10, | |
"type" => "string", | |
# Let's use our custom analyzer for the `url` field | |
# | |
"analyzer" => "url_analyzer" | |
} | |
} | |
} | |
} | |
store URL.new :url => "http://urlaubinkroatien.de" | |
store URL.new :url => "http://urlaub-in-kroatien.de" | |
store URL.new :url => "http://besteurlaubinkroatien.de" | |
store URL.new :url => "http://kroatien.de" | |
refresh | |
end | |
s = Tire.search('ngrams-and-tire') { query { string 'url:urlaub' } } | |
puts "QUERY > url:urlaub", | |
s.results.map(&:url).inspect, | |
"" | |
s = Tire.search('ngrams-and-tire') { query { string 'url:kroatien' } } | |
puts "QUERY > url:kroatien", | |
s.results.map(&:url).inspect, | |
"" |
This comment has been minimized.
Show comment
Hide comment
This comment has been minimized.
Show comment Hide comment
mereghost
commented
Sep 6, 2012
Hello @karmi, when can we expect a new tire release? Or will you just keep the master branch always stable (as in "not broken")? |
This comment has been minimized.
Show comment
Hide comment
This comment has been minimized.
Show comment Hide comment
@mereghost, "master" should never be broken :) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hello @karmi, when can we expect a new tire release? Or will you just keep the master branch always stable (as in "not broken")?