Skip to content

Instantly share code, notes, and snippets.

@kluzny
Last active February 12, 2021 17:14
Show Gist options
  • Save kluzny/498e3a3bace43b0e44ce07add44a99fa to your computer and use it in GitHub Desktop.
Save kluzny/498e3a3bace43b0e44ce07add44a99fa to your computer and use it in GitHub Desktop.
simple class that takes an active record model and generates pg's similarity based searches
# frozen_string_literal: true
# Uses pg's trigram extension to enable multi column searches
#
# add a search method to your model
# def self.search(query)
# SimilaritySearch.new(self, over: [:name, :email, :nickname]).search(query)
# end
#
# Then call it with Foo.search("bar")
class SimilaritySearch
attr_accessor :klass, :columns, :select
def initialize(klass, over:, select: nil)
self.klass = klass
self.columns = over
self.select = select || over
end
def search(unsafe_query)
relation_builder(sanitize(unsafe_query))
end
def table_name
@table_name ||= klass.table_name
end
def search_table_name
"pg_search"
end
# "products"."short_name"
def table_column(column)
%{"#{table_name}"."#{column}"}
end
# "pg_search"."id"
def search_column(column)
%{"#{search_table_name}"."#{column}"}
end
# rubocop:disable Style/LineLength
# INNER JOIN (
# SELECT
# "products"."id" AS pg_search_id,
# (similarity(coalesce("products"."short_name"::text, ''), 'FOO')) AS srank,
# (similarity(coalesce("products"."name"::text, ''), 'FOO')) AS nrank,
# (similarity((coalesce("products"."short_name"::text, '') || ' ' || coalesce("products"."name"::text, '') || ' ' || coalesce("products"."gmi_code"::text, '') || ' ' || coalesce("products"."currency"::text, '')), 'FOO')) AS rank
# FROM "products"
# WHERE (
# (
# (
# setweight(to_tsvector('simple', coalesce("products"."short_name"::text, '')), 'A') || setweight(to_tsvector('simple', coalesce("products"."name"::text, '')), 'D') || setweight(to_tsvector('simple', coalesce("products"."gmi_code"::text, '')), 'D') || setweight(to_tsvector('simple', coalesce("products"."currency"::text, '')), 'D'))
# @@
# (to_tsquery('simple', ''' ' || 'FOO' || ' ''' || ':*'))
# )
# )
# ) AS pg_search ON "products"."id" = pg_search.pg_search_id
# rubocop:enable Style/LineLength
def inner_join(query)
%{ INNER JOIN ( #{inner_query(query)} ) AS #{search_table_name} ON #{table_column(:id)} = #{search_column(:id)} }
end
# rubocop:disable Style/LineLength
# SELECT
# "products"."id" AS pg_search_id,
# (similarity(coalesce("products"."short_name"::text, ''), 'FOO')) AS srank,
# (similarity(coalesce("products"."name"::text, ''), 'FOO')) AS nrank,
# (similarity((coalesce("products"."short_name"::text, '') || ' ' || coalesce("products"."name"::text, '') || ' ' || coalesce("products"."gmi_code"::text, '') || ' ' || coalesce("products"."currency"::text, '')), 'FOO')) AS rank
# FROM "products"
# WHERE (
# (
# (
# setweight(to_tsvector('simple', coalesce("products"."short_name"::text, '')), 'A') || setweight(to_tsvector('simple', coalesce("products"."name"::text, '')), 'D') || setweight(to_tsvector('simple', coalesce("products"."gmi_code"::text, '')), 'D') || setweight(to_tsvector('simple', coalesce("products"."currency"::text, '')), 'D'))
# @@
# (to_tsquery('simple', ''' ' || 'FOO' || ' ''' || ':*'))
# )
# )
# rubocop:enable Style/LineLength
def inner_query(query)
relation = klass.select(:id)
columns.each do |column|
relation = relation.send(:select, rank_selector(column, query))
end
relation = relation.where(ts_vector_filter(query))
relation.to_sql
end
# rubocop:disable Style/LineLength
# SELECT
# "products"."id",
# "products"."name",
# "products"."short_name",
# pg_search.rank,
# pg_search.srank,
# pg_search.nrank
# FROM "products"
# INNER JOIN (
# SELECT
# "products"."id" AS pg_search_id,
# (similarity(coalesce("products"."short_name"::text, ''), 'FOO')) AS srank,
# (similarity(coalesce("products"."name"::text, ''), 'FOO')) AS nrank,
# (similarity((coalesce("products"."short_name"::text, '') || ' ' || coalesce("products"."name"::text, '') || ' ' || coalesce("products"."gmi_code"::text, '') || ' ' || coalesce("products"."currency"::text, '')), 'FOO')) AS rank
# FROM "products"
# WHERE (
# (
# (
# setweight(to_tsvector('simple', coalesce("products"."short_name"::text, '')), 'A') || setweight(to_tsvector('simple', coalesce("products"."name"::text, '')), 'D') || setweight(to_tsvector('simple', coalesce("products"."gmi_code"::text, '')), 'D') || setweight(to_tsvector('simple', coalesce("products"."currency"::text, '')), 'D'))
# @@
# (to_tsquery('simple', ''' ' || 'FOO' || ' ''' || ':*'))
# )
# )
# ) AS pg_search ON "products"."id" = pg_search.pg_search_id
# ORDER BY greatest(pg_search.srank, pg_search.nrank) DESC, "products"."id" ASC
# rubocop:enable Style/LineLength
def relation_builder(query)
klass.select("*").joins(inner_join(query)).order(greatest_rank)
end
# filter out models without any prefix matches
# (
# setweight(to_tsvector('simple', coalesce("products"."short_name"::text, '')), 'A') ||
# setweight(to_tsvector('simple', coalesce("products"."name"::text, '')), 'D') ||
# setweight(to_tsvector('simple', coalesce("products"."gmi_code"::text, '')), 'D') ||
# setweight(to_tsvector('simple', coalesce("products"."currency"::text, '')), 'D')
# )
# @@
# (
# to_tsquery('simple', ''' ' || 'FOO' || ' ''' || ':*')
# )
def ts_vector_filter(query)
%{(#{ts_weighted_columns}) @@ (#{ts_query(query)})}
end
# setweight(to_tsvector('simple', coalesce("products"."short_name"::text, '')), 'A') ||
# setweight(to_tsvector('simple', coalesce("products"."name"::text, '')), 'D') ||
# setweight(to_tsvector('simple', coalesce("products"."gmi_code"::text, '')), 'D') ||
# setweight(to_tsvector('simple', coalesce("products"."currency"::text, '')), 'D')
def ts_weighted_columns
columns.map { |column| ts_set_weight(column) }.join(" || ")
end
def pg_dictionary
"simple"
end
def auto_weight(_column)
"A"
end
# setweight(to_tsvector('simple', coalesce("products"."currency"::text, '')), 'A')
def ts_set_weight(column)
%{setweight(to_tsvector('#{pg_dictionary}', #{coalesce(column)}), '#{auto_weight(column)}')}
end
# to_tsquery('simple', ''' ' || 'FOO' || ' ''' || ':*')
def ts_query(query)
%{(to_tsquery('#{pg_dictionary}', ''' ' || #{query} || ' ''' || ':*'))}
end
# greatest(name_rank, short_name) DESC
# or
# short_name DESC
def greatest_rank
if rank_list.length > 1
%{greatest(#{search_rank_list})}
else
rank_list.first
end + " DESC"
end
def search_rank_list
rank_list.map do |rank|
search_column(rank)
end.join(",")
end
# name_rank, short_name
def rank_list
@rank_list ||= columns.map { |column| rank_name(column) }
end
# (similarity(coalesce("products"."short_name"::text, ''), 'FOO')) AS short_name_rank,
def rank_selector(column, query)
%{(#{rank_builder(column, query)}) as #{rank_name(column)}}
end
# coalesce("products"."currency"::text, ''))
def coalesce(column)
%{coalesce(#{table_column(column)}, '')}
end
# (similarity(coalesce("products"."short_name"::text, ''), 'FOO'))
def rank_builder(column, query)
%{similarity(#{coalesce(column)}, #{query})}
end
# short_name_rank
def rank_name(symbol)
"#{symbol}_rank"
end
# 'FOO'
def sanitize(unsafe_query)
ActiveRecord::Base.connection.quote(unsafe_query)
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment