Created
September 7, 2024 13:57
-
-
Save gabriel-curtino/449634b522b4fa76d943f15a737f4ea1 to your computer and use it in GitHub Desktop.
Rails SQLite Full Text Search (per table, at db level)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## SqliteFullTextSearch Concern | |
# | |
# The `SqliteFullTextSearch` concern provides a set of methods and triggers to enable full-text search capabilities for ActiveRecord models using SQLite's FTS5 extension. | |
# | |
# ### Key Features | |
# - **Full-Text Search Scope**: Adds a `search` scope to the model for performing full-text searches. | |
# - **Index Creation and Management**: Automatically creates and manages FTS5 tables and triggers for the model. | |
# - **Attribute Configuration**: Allows specifying attributes to include in the full-text search index. | |
# - **Trigger Management**: Sets up triggers to keep the search index up-to-date with model changes. | |
# | |
# frozen_string_literal: true | |
module SqliteFullTextSearch | |
extend ActiveSupport::Concern | |
included do | |
ActiveRecord::SchemaDumper.ignore_tables << /^#{search_index_name}.*$/ | |
TOKENIZERS = { | |
porter: "porter unicode61 remove_diacritics 2", | |
unicode: "unicode61 remove_diacritics 2", | |
ascii: "ascii", | |
trigram: "trigram" | |
}.freeze | |
scope :search, ->(query) { | |
return none if query.blank? | |
joins("INNER JOIN #{search_index_name} ON #{search_index_name}.rowid = #{table_name}.rowid") | |
.where("#{search_index_name} MATCH ?", escape_fts_query(query)) | |
.order("rank") | |
} | |
end | |
class_methods do | |
def search_index_name | |
"#{table_name}_fts" | |
end | |
def search_attributes(*attributes, tokenizer: :porter) | |
#raise(ArgumentError, "Attributes must be present in the database schema") if attrs.any? { |a| !column_names.include?(a.to_s) } | |
#raise(ArgumentError, "Unknown tokenizer. Tokenizers available: #{TOKENIZERS.keys.join(", ")}") unless TOKENIZERS.include?(tokenizer.to_sym) | |
@@search_index_attributes = attributes - [ "rowid", :rowid ] | |
@@search_index_tokenizer = TOKENIZERS[tokenizer.to_sym] | |
after_initialize do | |
self.class.build_search_index if self.class.table_exists? and not self.class.search_index_created? | |
end | |
end | |
def search_index_created? | |
connection.table_exists?(search_index_name) | |
end | |
def build_search_index | |
drop_search_index | |
create_search_index | |
index_current_data | |
end | |
alias_method :rebuild_search_index, :build_search_index | |
def escape_fts_query(query) | |
# Define a regex pattern for allowed FTS5 bareword characters | |
allowed_chars = /\A[:A-Za-z0-9_\u0080-\uFFFF\u0026\u007E]+\z/u | |
# Split the query into individual words | |
words = query.split(/\s+/) | |
# Process each word | |
escaped_words = words.map do |word| | |
if word.match(allowed_chars) | |
word | |
else | |
'"' + word.gsub(/"/, '""') + '"' | |
end | |
end | |
# Join the escaped words back into a single string | |
escaped_words.join(' ') | |
end | |
private def search_index_attributes | |
@@search_index_attributes || [] | |
end | |
private def search_index_attribute_names | |
search_index_attributes.map do |attr| | |
if attr.is_a?(Hash) | |
"#{attr.keys.first}_#{attr.values.first}" | |
else | |
attr.to_s | |
end | |
end | |
end | |
private def search_index_local_attributes | |
search_index_attributes.select { |attr| !attr.is_a?(Hash) } | |
end | |
private def search_index_local_attribute_names | |
search_index_local_attributes.join(", ") | |
end | |
private def search_index_assoc_attributes | |
search_index_attributes | |
.select { |attr| attr.is_a?(Hash) } | |
.each_with_object(Hash.new { |h, k| h[k] = [] }) { |item, result| item.each { |k, v| result[k] << v } } | |
end | |
private def search_index_associations | |
search_index_assoc_attributes.keys | |
end | |
private def search_index_query_columns | |
search_index_attribute_names.join(", ") | |
end | |
private def search_index_query_values | |
search_index_attributes.collect do |attr| | |
if attr.is_a?(Hash) | |
assoc, column = attr.first | |
assoc_table_name = reflect_on_association(assoc).klass.table_name | |
assoc_primary_key = reflect_on_association(assoc).klass.primary_key | |
assoc_foreign_key = reflect_on_association(assoc).foreign_key | |
"#{assoc_table_name}.#{column}" | |
else | |
"#{table_name}.#{attr}" | |
end | |
end.join(", ") | |
end | |
private def search_index_query_joins | |
search_index_associations.collect do |assoc| | |
assoc_table_name = reflect_on_association(assoc).klass.table_name | |
assoc_primary_key = reflect_on_association(assoc).klass.primary_key | |
assoc_foreign_key = reflect_on_association(assoc).foreign_key | |
"INNER JOIN #{assoc_table_name} ON #{assoc_table_name}.#{assoc_primary_key} = #{table_name}.#{assoc_foreign_key}" | |
end.join(" ") | |
end | |
private def create_search_index | |
# ActiveRecord gets confused with the semicolons making queries fails in some cases. `execute_batch` does the work | |
!!ActiveRecord::Base.connection.send(:execute_batch, [ | |
# Index tables | |
"CREATE VIRTUAL TABLE #{search_index_name} USING fts5(#{search_index_query_columns}, content='', contentless_delete=1, tokenize='#{@@search_index_tokenizer}')", | |
"CREATE VIRTUAL TABLE #{search_index_name}_row USING fts5vocab(#{search_index_name}, row)", | |
"CREATE VIRTUAL TABLE #{search_index_name}_instance USING fts5vocab(#{search_index_name}, instance)", | |
# Model triggers | |
"CREATE TRIGGER #{search_index_name}_insert AFTER INSERT ON #{table_name} BEGIN | |
INSERT OR REPLACE INTO #{search_index_name}(rowid, #{search_index_query_columns}) | |
SELECT #{table_name}.rowid, #{search_index_query_values} | |
FROM #{table_name} #{search_index_query_joins} | |
WHERE #{table_name}.rowid = NEW.rowid; | |
END", | |
"CREATE TRIGGER #{search_index_name}_update AFTER UPDATE OF #{search_index_local_attribute_names} ON #{table_name} BEGIN | |
INSERT OR REPLACE INTO #{search_index_name}(rowid, #{search_index_query_columns}) | |
SELECT #{table_name}.rowid, #{search_index_query_values} | |
FROM #{table_name} #{search_index_query_joins} | |
WHERE #{table_name}.rowid = NEW.rowid; | |
END", | |
"CREATE TRIGGER #{search_index_name}_delete AFTER DELETE ON #{table_name} BEGIN | |
DELETE FROM #{search_index_name} WHERE rowid = OLD.rowid; | |
END", | |
# Associations triggers | |
*search_index_assoc_attributes.collect do |assoc, columns| | |
assoc_table_name = reflect_on_association(assoc).klass.table_name | |
assoc_primary_key = reflect_on_association(assoc).klass.primary_key | |
assoc_foreign_key = reflect_on_association(assoc).foreign_key | |
"CREATE TRIGGER #{search_index_name}_#{assoc}_update AFTER UPDATE OF #{columns.join(", ")} ON #{assoc_table_name} BEGIN | |
INSERT OR REPLACE INTO #{search_index_name}(rowid, #{search_index_query_columns}) | |
SELECT #{table_name}.rowid, #{search_index_query_values} | |
FROM #{table_name} | |
INNER JOIN #{assoc_table_name} ON #{assoc_table_name}.#{assoc_primary_key} = #{table_name}.#{assoc_foreign_key} | |
WHERE #{assoc_table_name}.#{assoc_primary_key} = NEW.#{assoc_primary_key}; | |
END" | |
end | |
]) | |
end | |
private def drop_search_index | |
# ActiveRecord gets confused with the semicolons making queries fails in some cases. `execute_batch` does the work | |
!!ActiveRecord::Base.connection.send(:execute_batch, [ | |
*search_index_associations.collect { |assoc| "DROP TRIGGER IF EXISTS #{search_index_name}_#{assoc}_update" }, | |
"DROP TRIGGER IF EXISTS #{search_index_name}_insert", | |
"DROP TRIGGER IF EXISTS #{search_index_name}_update", | |
"DROP TRIGGER IF EXISTS #{search_index_name}_delete", | |
"DROP TABLE IF EXISTS #{search_index_name}_row", | |
"DROP TABLE IF EXISTS #{search_index_name}_instance", | |
"DROP TABLE IF EXISTS #{search_index_name}", | |
]) | |
end | |
private def index_current_data | |
!!ActiveRecord::Base.connection.raw_connection.execute(<<-SQL.squish) | |
INSERT INTO #{search_index_name}(rowid, #{search_index_query_columns}) | |
SELECT #{table_name}.rowid, #{search_index_query_values} | |
FROM #{table_name} #{search_index_query_joins} | |
SQL | |
end | |
end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# How to use | |
class Guest < ApplicationRecord | |
include SqliteFullTextSearch | |
search_attributes :name, :email, :phone | |
end | |
class GuestController < ApplicationController | |
def search | |
guests = Guest.search(params[:query]) | |
render "guests/search", locals: { guests: guests } | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This is awesome, thanks for sharing it! I made a fork with some tweaks around making
has_many through:
work. I was running to some issues if my query string contains a:
so I removed it from theallowed_chars
for theescape_fts_query
method.