Skip to content

Instantly share code, notes, and snippets.

@bru
Created August 21, 2011 14:51
Show Gist options
  • Save bru/1160694 to your computer and use it in GitHub Desktop.
Save bru/1160694 to your computer and use it in GitHub Desktop.
$:.unshift './lib'
require 'rubygems'
require 'rjb'
require 'rjbextension'
Document = Rjb::import('org.apache.lucene.document.Document')
StandardAnalyzer = Rjb::import('org.apache.lucene.analysis.standard.StandardAnalyzer')
Field = Rjb::import('org.apache.lucene.document.Field')
IndexWriter = Rjb::import('org.apache.lucene.index.IndexWriter')
IndexWriterConfig = Rjb::import('org.apache.lucene.index.IndexWriterConfig')
ParseException = Rjb::import('org.apache.lucene.queryParser.ParseException')
QueryParser = Rjb::import('org.apache.lucene.queryParser.QueryParser')
RAMDirectory = Rjb::import('org.apache.lucene.store.RAMDirectory')
IndexSearcher = Rjb::import('org.apache.lucene.search.IndexSearcher')
Store = Rjb::import 'org.apache.lucene.document.Field$Store'
Index = Rjb::import 'org.apache.lucene.document.Field$Index'
Resolution = Rjb::import 'org.apache.lucene.document.DateTools$Resolution'
VERSION = org.apache.lucene.util.Version.LUCENE_33
load_jvm(['-Xms128m', '-Xmx512m'])
def create_document(title, content)
doc = Document.new
doc.add Field.new("title", title, Store.YES, Index.NO)
doc.add Field.new("content", content, Store.YES, Index.ANALYZED)
doc
end
def create_index
idx = RAMDirectory.new
config = IndexWriterConfig.new(VERSION, StandardAnalyzer.new(VERSION))
writer = IndexWriter.new(idx, config)
writer.add_document(create_document("Theodore Roosevelt",
"It behooves every man to remember that the work of the " +
"critic, is of altogether secondary importance, and that, " +
"in the end, progress is accomplished by the man who does " +
"things."))
writer.add_document(create_document("Friedrich Hayek",
"The case for individual freedom rests largely on the " +
"recognition of the inevitable and universal ignorance " +
"of all of us concerning a great many of the factors on " +
"which the achievements of our ends and welfare depend."))
writer.add_document(create_document("Ayn Rand",
"There is nothing to take a man's freedom away from " +
"him, save other men. To be free, a man must be free " +
"of his brothers."))
writer.add_document(create_document("Mohandas Gandhi",
"Freedom is not worth having if it does not connote " +
"freedom to err."))
writer.optimize
writer.close
idx
end
def search(searcher, query_string)
parser = QueryParser.new(VERSION, "content", StandardAnalyzer.new(VERSION))
puts "Searching for #{query_string}"
query = parser.parse(query_string)
hits = searcher.search(query, 10)
hit_count = hits.totalHits
if hit_count.zero?
puts "No matching documents."
else
puts "%d total matching documents" % hit_count
puts "Hits for %s were found in quotes by:" % query_string
hits.scoreDocs.each_with_index do |score_doc, i|
doc_id = score_doc.doc
doc_score = score_doc.score
puts "doc_id: %s \t score: %s" % [doc_id, doc_score]
doc = searcher.doc(doc_id)
puts "%d. %s" % [i, doc.get("title")]
puts "Content: %s" % doc.get("content")
puts
end
end
end
def main
index = create_index
searcher = IndexSearcher.new(index)
search(searcher, "freedom")
search(searcher, "free");
search(searcher, "progress or achievements");
search(searcher, "wibble")
searcher.close
end
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment