Skip to content

Instantly share code, notes, and snippets.

@gabriel-curtino
Created September 7, 2024 13:57
Show Gist options
  • Save gabriel-curtino/449634b522b4fa76d943f15a737f4ea1 to your computer and use it in GitHub Desktop.
Save gabriel-curtino/449634b522b4fa76d943f15a737f4ea1 to your computer and use it in GitHub Desktop.
Rails SQLite Full Text Search (per table, at db level)
## SqliteFullTextSearch Concern
#
# The `SqliteFullTextSearch` concern provides a set of methods and triggers to enable full-text search capabilities for ActiveRecord models using SQLite's FTS5 extension.
#
# ### Key Features
# - **Full-Text Search Scope**: Adds a `search` scope to the model for performing full-text searches.
# - **Index Creation and Management**: Automatically creates and manages FTS5 tables and triggers for the model.
# - **Attribute Configuration**: Allows specifying attributes to include in the full-text search index.
# - **Trigger Management**: Sets up triggers to keep the search index up-to-date with model changes.
#
# frozen_string_literal: true
module SqliteFullTextSearch
extend ActiveSupport::Concern
included do
ActiveRecord::SchemaDumper.ignore_tables << /^#{search_index_name}.*$/
TOKENIZERS = {
porter: "porter unicode61 remove_diacritics 2",
unicode: "unicode61 remove_diacritics 2",
ascii: "ascii",
trigram: "trigram"
}.freeze
scope :search, ->(query) {
return none if query.blank?
joins("INNER JOIN #{search_index_name} ON #{search_index_name}.rowid = #{table_name}.rowid")
.where("#{search_index_name} MATCH ?", escape_fts_query(query))
.order("rank")
}
end
class_methods do
def search_index_name
"#{table_name}_fts"
end
def search_attributes(*attributes, tokenizer: :porter)
#raise(ArgumentError, "Attributes must be present in the database schema") if attrs.any? { |a| !column_names.include?(a.to_s) }
#raise(ArgumentError, "Unknown tokenizer. Tokenizers available: #{TOKENIZERS.keys.join(", ")}") unless TOKENIZERS.include?(tokenizer.to_sym)
@@search_index_attributes = attributes - [ "rowid", :rowid ]
@@search_index_tokenizer = TOKENIZERS[tokenizer.to_sym]
after_initialize do
self.class.build_search_index if self.class.table_exists? and not self.class.search_index_created?
end
end
def search_index_created?
connection.table_exists?(search_index_name)
end
def build_search_index
drop_search_index
create_search_index
index_current_data
end
alias_method :rebuild_search_index, :build_search_index
def escape_fts_query(query)
# Define a regex pattern for allowed FTS5 bareword characters
allowed_chars = /\A[:A-Za-z0-9_\u0080-\uFFFF\u0026\u007E]+\z/u
# Split the query into individual words
words = query.split(/\s+/)
# Process each word
escaped_words = words.map do |word|
if word.match(allowed_chars)
word
else
'"' + word.gsub(/"/, '""') + '"'
end
end
# Join the escaped words back into a single string
escaped_words.join(' ')
end
private def search_index_attributes
@@search_index_attributes || []
end
private def search_index_attribute_names
search_index_attributes.map do |attr|
if attr.is_a?(Hash)
"#{attr.keys.first}_#{attr.values.first}"
else
attr.to_s
end
end
end
private def search_index_local_attributes
search_index_attributes.select { |attr| !attr.is_a?(Hash) }
end
private def search_index_local_attribute_names
search_index_local_attributes.join(", ")
end
private def search_index_assoc_attributes
search_index_attributes
.select { |attr| attr.is_a?(Hash) }
.each_with_object(Hash.new { |h, k| h[k] = [] }) { |item, result| item.each { |k, v| result[k] << v } }
end
private def search_index_associations
search_index_assoc_attributes.keys
end
private def search_index_query_columns
search_index_attribute_names.join(", ")
end
private def search_index_query_values
search_index_attributes.collect do |attr|
if attr.is_a?(Hash)
assoc, column = attr.first
assoc_table_name = reflect_on_association(assoc).klass.table_name
assoc_primary_key = reflect_on_association(assoc).klass.primary_key
assoc_foreign_key = reflect_on_association(assoc).foreign_key
"#{assoc_table_name}.#{column}"
else
"#{table_name}.#{attr}"
end
end.join(", ")
end
private def search_index_query_joins
search_index_associations.collect do |assoc|
assoc_table_name = reflect_on_association(assoc).klass.table_name
assoc_primary_key = reflect_on_association(assoc).klass.primary_key
assoc_foreign_key = reflect_on_association(assoc).foreign_key
"INNER JOIN #{assoc_table_name} ON #{assoc_table_name}.#{assoc_primary_key} = #{table_name}.#{assoc_foreign_key}"
end.join(" ")
end
private def create_search_index
# ActiveRecord gets confused with the semicolons making queries fails in some cases. `execute_batch` does the work
!!ActiveRecord::Base.connection.send(:execute_batch, [
# Index tables
"CREATE VIRTUAL TABLE #{search_index_name} USING fts5(#{search_index_query_columns}, content='', contentless_delete=1, tokenize='#{@@search_index_tokenizer}')",
"CREATE VIRTUAL TABLE #{search_index_name}_row USING fts5vocab(#{search_index_name}, row)",
"CREATE VIRTUAL TABLE #{search_index_name}_instance USING fts5vocab(#{search_index_name}, instance)",
# Model triggers
"CREATE TRIGGER #{search_index_name}_insert AFTER INSERT ON #{table_name} BEGIN
INSERT OR REPLACE INTO #{search_index_name}(rowid, #{search_index_query_columns})
SELECT #{table_name}.rowid, #{search_index_query_values}
FROM #{table_name} #{search_index_query_joins}
WHERE #{table_name}.rowid = NEW.rowid;
END",
"CREATE TRIGGER #{search_index_name}_update AFTER UPDATE OF #{search_index_local_attribute_names} ON #{table_name} BEGIN
INSERT OR REPLACE INTO #{search_index_name}(rowid, #{search_index_query_columns})
SELECT #{table_name}.rowid, #{search_index_query_values}
FROM #{table_name} #{search_index_query_joins}
WHERE #{table_name}.rowid = NEW.rowid;
END",
"CREATE TRIGGER #{search_index_name}_delete AFTER DELETE ON #{table_name} BEGIN
DELETE FROM #{search_index_name} WHERE rowid = OLD.rowid;
END",
# Associations triggers
*search_index_assoc_attributes.collect do |assoc, columns|
assoc_table_name = reflect_on_association(assoc).klass.table_name
assoc_primary_key = reflect_on_association(assoc).klass.primary_key
assoc_foreign_key = reflect_on_association(assoc).foreign_key
"CREATE TRIGGER #{search_index_name}_#{assoc}_update AFTER UPDATE OF #{columns.join(", ")} ON #{assoc_table_name} BEGIN
INSERT OR REPLACE INTO #{search_index_name}(rowid, #{search_index_query_columns})
SELECT #{table_name}.rowid, #{search_index_query_values}
FROM #{table_name}
INNER JOIN #{assoc_table_name} ON #{assoc_table_name}.#{assoc_primary_key} = #{table_name}.#{assoc_foreign_key}
WHERE #{assoc_table_name}.#{assoc_primary_key} = NEW.#{assoc_primary_key};
END"
end
])
end
private def drop_search_index
# ActiveRecord gets confused with the semicolons making queries fails in some cases. `execute_batch` does the work
!!ActiveRecord::Base.connection.send(:execute_batch, [
*search_index_associations.collect { |assoc| "DROP TRIGGER IF EXISTS #{search_index_name}_#{assoc}_update" },
"DROP TRIGGER IF EXISTS #{search_index_name}_insert",
"DROP TRIGGER IF EXISTS #{search_index_name}_update",
"DROP TRIGGER IF EXISTS #{search_index_name}_delete",
"DROP TABLE IF EXISTS #{search_index_name}_row",
"DROP TABLE IF EXISTS #{search_index_name}_instance",
"DROP TABLE IF EXISTS #{search_index_name}",
])
end
private def index_current_data
!!ActiveRecord::Base.connection.raw_connection.execute(<<-SQL.squish)
INSERT INTO #{search_index_name}(rowid, #{search_index_query_columns})
SELECT #{table_name}.rowid, #{search_index_query_values}
FROM #{table_name} #{search_index_query_joins}
SQL
end
end
end
# How to use
class Guest < ApplicationRecord
include SqliteFullTextSearch
search_attributes :name, :email, :phone
end
class GuestController < ApplicationController
def search
guests = Guest.search(params[:query])
render "guests/search", locals: { guests: guests }
end
end
@jmschneider
Copy link

This is awesome, thanks for sharing it! I made a fork with some tweaks around making has_many through: work. I was running to some issues if my query string contains a : so I removed it from the allowed_chars for the escape_fts_query method.

@gabriel-curtino
Copy link
Author

You're welcome @jmschneider! I've never used it in production yet, thanks for your improvements!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment