Skip to content

Instantly share code, notes, and snippets.

@garethrees
Last active July 23, 2021 13:59
Show Gist options
  • Save garethrees/e040ce55a51a9106c249 to your computer and use it in GitHub Desktop.
Save garethrees/e040ce55a51a9106c249 to your computer and use it in GitHub Desktop.
Xapian high offset performance
require 'xapian'
def setup_xapian(database_path)
$db_path = File.join(database_path)
begin
$db = Xapian::Database.new($db_path)
$enquire = Xapian::Enquire.new($db)
rescue IOError => e
raise "Failed to open Xapian database #{$db_path}: #{e.message}"
end
$query_parser = Xapian::QueryParser.new
$query_parser.database = $db
$query_parser.default_op = Xapian::Query::OP_AND
$query_parser.add_boolean_prefix("model", "M")
$query_parser.add_boolean_prefix("modelid", "I")
end
class SimpleXapianSearch
def initialize(model_classes, query_string, options = {}, user_query = nil)
setup_xapian(options[:database_path])
model_query = Xapian::Query.new(Xapian::Query::OP_OR, model_classes.map { |mc| "M#{ mc }" })
user_query = $query_parser.parse_query(
query_string,
Xapian::QueryParser::FLAG_BOOLEAN | Xapian::QueryParser::FLAG_PHRASE |
Xapian::QueryParser::FLAG_LOVEHATE |
Xapian::QueryParser::FLAG_SPELLING_CORRECTION)
query = Xapian::Query.new(Xapian::Query::OP_AND, model_query, user_query)
limit = options[:limit] || 25
offset = options[:offset] || 0
sort_by_ascending = true
$enquire.query = query
if ENV.key?('SORT_BY_VALUE_THEN_RELEVANCE')
$enquire.sort_by_value_then_relevance!(0, sort_by_ascending)
end
$enquire.collapse_key = 1
matches = $enquire.mset(offset, limit, 100)
end
end
$xapian_database_path = "#{ Dir.pwd }/lib/acts_as_xapian/xapiandbs/#{Rails.env}"
Benchmark.bmbm do |x|
x.report("offset-0") {
low_offset = SimpleXapianSearch.new(['InfoRequestEvent'], 'information', :offset => 0, :database_path => $xapian_database_path)
}
x.report("offset-500") {
high_offset = SimpleXapianSearch.new(['InfoRequestEvent'], 'information', :offset => 500, :database_path => $xapian_database_path)
}
end
# Rehearsal ----------------------------------------------
# offset-0 0.400000 0.010000 0.410000 ( 0.411031)
# offset-500 0.430000 0.020000 0.450000 ( 0.448809)
# ------------------------------------- total: 0.860000sec
#
# user system total real
# offset-0 0.410000 0.000000 0.410000 ( 0.416388)
# offset-500 0.390000 0.020000 0.410000 ( 0.420391)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment