Skip to content

Instantly share code, notes, and snippets.

@hohyon-ryu
Created May 25, 2011 19:19
Show Gist options
  • Save hohyon-ryu/991684 to your computer and use it in GitHub Desktop.
Save hohyon-ryu/991684 to your computer and use it in GitHub Desktop.
Stemming for ElasticSearch
# Script to test stemming for ElasticSearch. Working now!!
# Reference: http://stackoverflow.com/questions/4981001/why-elasticsearch-is-not-finding-my-term
require 'rubygems'
require 'net/http'
require 'yaml'
require 'json'
# kill the index
delete = Net::HTTP::Delete.new("/willindex")
# create again
create_index = Net::HTTP::Post.new("/willindex")
create_index.body = {
"index"=>
{ "number_of_shards"=> 1,
"analysis"=> {
"filter"=> {
"snowball"=> {
"type"=> "snowball",
"language"=> "English"
}
},
"analyzer"=> {
"a_stemming"=> {
"type"=>"custom",
"tokenizer"=> "standard",
"filter"=> ["lowercase", "snowball"]
}
}
}
}
}.to_yaml
puts create_index.body
#Mapping was missing in the previous test
mapping = Net::HTTP::Put.new("/willindex/_mapping")
mapping.body={
"willdoc"=> {
"_all"=> {
"type"=> "string",
"analyzer"=> "a_stemming"
}
}
}.to_json
# index a record
index_record=Array.new
index_record[1] = Net::HTTP::Put.new("/willindex/willdoc/1")
index_record[1].body = {"text"=> "i love to walk"}.to_json
index_record[2] = Net::HTTP::Put.new("/willindex/willdoc/2")
index_record[2].body = {"text"=> "I love WALKING"}.to_json
index_record[3] = Net::HTTP::Put.new("/willindex/willdoc/3")
index_record[3].body = {"text"=> "i loved to walk"}.to_json
index_record[4] = Net::HTTP::Put.new("/willindex/willdoc/4")
index_record[4].body = {"text"=> "it is walkable"}.to_json
GetIndex = Net::HTTP::Get.new("/willindex/_status?pretty=true")
# do a search which requires stemming to succeed
# "q=walks" does not work! It has to be "q=text:walk"
search = Net::HTTP::Get.new("/willindex/_search?pretty=true&q=loving")
# perform requests
Net::HTTP.start("localhost", 9200) do |http|
# uncomment one of the following lines to switch between "index mode" and "search mode"
[delete, create_index, mapping, index_record[1], index_record[2], index_record[3], index_record[4]].each do |request|
#[search].each do |request|
puts http.request(request)
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment