Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save albertoperdomo/579816 to your computer and use it in GitHub Desktop.
Save albertoperdomo/579816 to your computer and use it in GitHub Desktop.
Normalize terms for search in DB
#Así he implementado tu propuesta como scope:
named_scope :normalized_name_begins_with, lambda { |term|
map = [
"âãäåāăąÁÂÃÄÅĀĂĄèééêëēĕėęěĒĔĖĘĚìíîïìĩīĭÌÍÎÏÌĨĪĬóôõöōŏőÒÓÔÕÖŌŎŐùúûüũūŭůÙÚÛÜŨŪŬŮñÑçÇüÜ".mb_chars,
"aaaaaaaaaaaaaaaeeeeeeeeeeeeeeeiiiiiiiiiiiiiiiiooooooooooooooouuuuuuuuuuuuuuuunnccuu"]
# ActiveSupport::Multibyte::Chars#tr broken?
adapted_word = "%#{term.mb_chars.split(//).map {|e| (p = map[0].index(e)) ? map[1][p,1] : e.to_s }.join.downcase}%"
{
:conditions => ["lower(translate(localized_city_names.name, '#{map[0]}', '#{map[1]}')) like #{LocalizedCityName.connection.quote term}"]
}
}
# params[:name] = 'tel'
# params[:limit] = 10
# Esta es la consulta en Ruby
LocalizedCityName.normalized_name_begins_with(params[:name]).descend_by_country_match_and_population(current_user.city.country_iso_code_two_letters).all(params[:limit])
# Esta es la consulta que genera con translate
LocalizedCityName Load (4940.1ms) SELECT "localized_city_names".*, ("cities".country_iso_code_two_letters = 'ES') AS in_country FROM "localized_city_names" INNER JOIN "cities" ON "cities".id = "localized_city_names".city_id WHERE (lower(translate(localized_city_names.name, 'âãäåāăąÁÂÃÄÅĀĂĄèééêëēĕėęěĒĔĖĘĚìíîïìĩīĭÌÍÎÏÌĨĪĬóôõöōŏőÒÓÔÕÖŌŎŐùúûüũūŭůÙÚÛÜŨŪŬŮñÑçÇüÜ', 'aaaaaaaaaaaaaaaeeeeeeeeeeeeeeeiiiiiiiiiiiiiiiiooooooooooooooouuuuuuuuuuuuuuuunnccuu')) like E'tel%') ORDER BY in_country DESC, cities.population DESC
# Un problema que tengo es que por alguna razón ignora el limit
# Por otro lado tarda bastante, casi 5 segundos
# Devuelve 54 resultados
#EXPLAIN
"Sort (cost=6234.21..6235.64 rows=571 width=59) (actual time=4911.203..4911.211 rows=54 loops=1)"
" Sort Key: (((cities.country_iso_code_two_letters)::text = 'ES'::text)), cities.population"
" Sort Method: quicksort Memory: 22kB"
" -> Nested Loop (cost=0.00..6208.07 rows=571 width=59) (actual time=2.433..4910.833 rows=54 loops=1)"
" -> Seq Scan on localized_city_names (cost=0.00..3093.01 rows=571 width=48) (actual time=2.409..4909.622 rows=54 loops=1)"
" Filter: (lower(translate((name)::text, 'âãäåāăąÁÂÃÄÅĀĂĄèééêëēĕėęěĒĔĖĘĚìíîïìĩīĭÌÍÎÏÌĨĪĬóôõöōŏőÒÓÔÕÖŌŎŐùúûüũūŭůÙÚÛÜŨŪŬŮñÑçÇüÜ'::text, 'aaaaaaaaaaaaaaaeeeeeeeeeeeeeeeiiiiiiiiiiiiiiiiooooooooooooooouuuuuuuuuuuuuuuunnccuu'::text)) ~~ 'tel%'::text)"
" -> Index Scan using cities_pkey on cities (cost=0.00..5.44 rows=1 width=15) (actual time=0.015..0.016 rows=1 loops=54)"
" Index Cond: (cities.id = localized_city_names.city_id)"
"Total runtime: 4911.370 ms"
#Así he implementado el scope usando regex basándome en un post que encontré en Google. Tiene menos mapeos de caracteres, igual la diferencia en tiempo de consulta se debe a eso?
#La URL del post original es: http://blog.nelsonsilva.eu/2008/12/15/rails-postgresql-accent-insensitive-search/
named_scope :normalized_name_begins_with, lambda { |term|
{
:conditions => ['localized_city_names.name ~* ?', "^" + term.downcase.accent_insensitive_regexp]
}
}
#He parcheado la clase string de la siguiente forma:
class String
@@ACCENT_INSENSITIVE_REGEXES=[
"(a|á|à|â|ã|A|Á|À|Â|Ã)","(e|é|è|ê|E|É|È|Ê)","(i|í|ì|I|Í|Ì)","(o|ó|ò|ô|õ|O|Ó|Ò|Ô|Õ)","(u|ú|ù|U|Ú|Ù)","(c|ç|Ç)"
]
# @@ACCENT_INSENSITIVE_REGEXES=[
# "(a|á|à|â|ã)","(e|é|è|ê)","(i|í|ì)","(o|ó|ò|ô|õ)","(u|ú|ù)","(c|ç)"
# ]
def accent_insensitive_regexp
res=self
@@ACCENT_INSENSITIVE_REGEXES.each {|exp|
res.gsub! Regexp.new(exp), exp
}
res
end
alias_method :old_upcase, :upcase
alias_method :old_downcase, :downcase
@@DOWNCASE_ACCENT_CHARS="çàáèéìíòóùúâêôãõ"
@@UPCASE_ACCENT_CHARS="ÇÀÁÈÉÌÍÒÓÙÚÂÊÔÃÕ"
def upcase
self.old_upcase.tr(@@DOWNCASE_ACCENT_CHARS,@@UPCASE_ACCENT_CHARS)
end
def downcase
self.old_downcase.tr(@@UPCASE_ACCENT_CHARS,@@DOWNCASE_ACCENT_CHARS)
end
end
# params[:name] = 'tel'
# params[:limit] = 10
# El otro scope que uso en la consulta
named_scope :descend_by_country_match_and_population, lambda { |country_code|
{
:select => %{"localized_city_names".*, ("cities".country_iso_code_two_letters = 'ES') AS in_country},
:joins => :city,
:order => "in_country DESC, cities.population DESC"
}
}
# Esta es la consulta en Ruby
LocalizedCityName.normalized_name_begins_with(params[:name]).descend_by_country_match_and_population(current_user.city.country_iso_code_two_letters).all(params[:limit])
# Esta es la consulta que genera con regex
LocalizedCityName Load (201.2ms) SELECT "localized_city_names".*, ("cities".country_iso_code_two_letters = 'ES') AS in_country FROM "localized_city_names" INNER JOIN "cities" ON "cities".id = "localized_city_names".city_id WHERE (localized_city_names.name ~* E'^t(e|é|è|ê|E|É|È|Ê)l') ORDER BY in_country DESC, cities.population DESC
# EXPLAIN
"Sort (cost=2612.98..2613.01 rows=11 width=59) (actual time=191.220..191.226 rows=54 loops=1)"
" Sort Key: (((cities.country_iso_code_two_letters)::text = 'ES'::text)), cities.population"
" Sort Method: quicksort Memory: 22kB"
" -> Nested Loop (cost=0.00..2612.79 rows=11 width=59) (actual time=0.128..190.946 rows=54 loops=1)"
" -> Seq Scan on localized_city_names (cost=0.00..2521.57 rows=11 width=48) (actual time=0.109..190.143 rows=54 loops=1)"
" Filter: ((name)::text ~* '^t(e|é|è|ê|E|É|È|Ê)l'::text)"
" -> Index Scan using cities_pkey on cities (cost=0.00..8.28 rows=1 width=15) (actual time=0.010..0.011 rows=1 loops=54)"
" Index Cond: (cities.id = localized_city_names.city_id)"
"Total runtime: 191.390 ms"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment