Skip to content

Instantly share code, notes, and snippets.

@hungdh0x5e
Last active November 29, 2019 09:21
Show Gist options
  • Save hungdh0x5e/49a135070485e19c6653c9a2ae646c71 to your computer and use it in GitHub Desktop.
Save hungdh0x5e/49a135070485e19c6653c9a2ae646c71 to your computer and use it in GitHub Desktop.
Full text search with Postgres
# STEP 1
class AddSearchTextToLeads < ActiveRecord::Migration[5.2]
def change
change_table :leads, bulk: true do |t|
t.tsvector :search_text
end
add_index :leads, :search_text, using: :gin
end
end
# STEP 2
# test: SELECT lower_unaccent('áàâãäåāăą') -- Result: aaaaaaa
class AddLowerUnaccentFunction < ActiveRecord::Migration[5.2]
def self.up
execute "CREATE OR REPLACE FUNCTION lower_unaccent(text)
RETURNS text AS
$func$
SELECT lower(translate($1,
'¹²³ÀÁẢẠÂẤẦẨẬẪÃÄÅÆàáảạâấầẩẫậãäåæĀāĂẮẰẲẴẶăắằẳẵặĄąÇçĆćĈĉĊċČčĎďĐđÈÉẸÊẾỀỄỆËèéẹêẻẽểềếễệëĒēĔĕĖėĘęĚěĜĝĞğĠġĢģĤĥĦħĨÌÍỈỊÎÏìíỉịîïĩĪīĬĭĮįİıIJijĴĵĶķĸĹĺĻļĽľĿŀŁłÑñŃńŅņŇňʼnŊŋÒÓỎỌÔỐỒỔỖỘỐỒỔỖỘƠỚỜỞỠỢÕÖòóỏọôốồổỗộơớờỡợởõöŌōŎŏŐőŒœØøŔŕŖŗŘřߌśŜŝŞşŠšŢţŤťŦŧÙÚỦỤƯỪỨỬỮỰÛÜùúủụûưứừửữựüŨũŪūŬŭŮůŰűŲųŴŵÝýÿŶŷỸŸỲỳỶỷỸỹỴỵŹźŻżŽžёЁ',
'123aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaccccccccccddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeegggggggghhhhiiiiiiiiiiiiiiiiiiiiiiiijjkkkllllllllllnnnnnnnnnnnoooooooooooooooooooooooooooooooooooooooooooooooeeoorrrrrrsssssssssttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuwwyyyyyyyyyyyyzzzzzzее'));
$func$ LANGUAGE sql IMMUTABLE;"
end
def self.down
execute "drop function lower_unaccent(text) cascade"
end
end
# STEP 3
class AddTriggerToTokenizeLeads < ActiveRecord::Migration[5.2]
def self.up
execute "
CREATE OR REPLACE FUNCTION leads_search_text_trigger_func()
RETURNS TRIGGER LANGUAGE plpgsql AS $$
BEGIN NEW.search_text = setweight(to_tsvector(coalesce(lower_unaccent(NEW.phone))), 'A') || setweight(to_tsvector(coalesce(lower_unaccent(NEW.email))), 'B') || setweight(to_tsvector(coalesce(lower_unaccent(NEW.full_name))), 'C');
RETURN NEW;
END $$;
DROP TRIGGER IF EXISTS leads_search_text_trigger ON leads;
CREATE TRIGGER leads_search_text_trigger BEFORE INSERT OR UPDATE
OF phone, email, full_name ON leads FOR EACH ROW
EXECUTE PROCEDURE leads_search_text_trigger_func();
"
end
def self.down
execute "
DROP TRIGGER IF EXISTS leads_search_text_trigger ON leads;
DROP FUNCTION IF EXISTS leads_search_text_trigger_func;
"
end
end
# STEP 4
# lib/pg_search.rb
class PgSearch
DISALLOWED_TSQUERY_CHARACTERS = /['?\\:‘’]/.freeze
attr_reader :query, :options, :model, :normalizer, :column
def initialize(query, model, column, normalizer, options)
@query = query
@options = (options || {})
@model = model
@normalizer = normalizer
@column = column
end
def search
model.where(conditions.to_sql)
end
def conditions
Arel::Nodes::Grouping.new(
Arel::Nodes::InfixOperation.new("@@", PgSearch.arel_wrap(column), PgSearch.arel_wrap(tsquery))
)
end
# Extract from pg_search gem
# https://github.com/Casecommons/pg_search/blob/master/lib/pg_search/features/tsearch.rb#L100
# Result: "to_tsquery('simple', ''' ' || lower_unaccent('something') || ' ''' || ':*')"
def tsquery_for_term(unsanitized_term)
sanitized_term = unsanitized_term.gsub(DISALLOWED_TSQUERY_CHARACTERS, " ").strip
quoted_term = @model.connection.quote(sanitized_term)
normalize_term = Arel::Nodes::NamedFunction.new(normalizer, [Arel.sql(quoted_term)]).to_sql
terms = [
Arel::Nodes.build_quoted("' "),
Arel.sql(normalize_term),
Arel::Nodes.build_quoted(" '"),
(Arel::Nodes.build_quoted(":*") if options[:prefix])
].compact
tsquery_sql = terms.inject do |memo, term|
Arel::Nodes::InfixOperation.new("||", memo, Arel::Nodes.build_quoted(term))
end
dictionary = Arel::Nodes.build_quoted(:simple)
Arel::Nodes::NamedFunction.new("to_tsquery", [dictionary, tsquery_sql]).to_sql
end
def tsquery
query_terms = query.split(" ").compact
tsquery_terms = query_terms.map { |term| tsquery_for_term(term) }
tsquery_terms.join(options[:any_word] ? ' || ' : ' && ')
end
def self.arel_wrap(sql_string)
Arel::Nodes::Grouping.new(Arel.sql(sql_string))
end
end
# STEP 5
# Usage: Lead.pg_search('hungdh') => ActivateRelation
scope :pg_search, lambda { |query|
opts = { any_word: false, prefix: true }
PgSearch.new(query, self, 'search_text', 'lower_unaccent', opts)
.search
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment