Created
December 14, 2011 18:05
-
-
Save robhurring/1477730 to your computer and use it in GitHub Desktop.
Search term parsing : http://proccli.com/advanced-search-query-parsing-ruby
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Search term parser from https://gist.github.com/1477730 | |
# Modified to allow periods (and other non-letter chars) in unquoted field values | |
# and field names. | |
# | |
# Helper class to help parse out more advanced saerch terms | |
# from a form query | |
# | |
# Note: all hash keys are downcased, so ID:10 == {'id' => 10} | |
# you can also access all keys with methods e.g.: terms.id = terms['id'] = 10 | |
# this doesn't work with query as thats reserved for the left-over pieces | |
# | |
# Usage: | |
# terms = SearchTerms.new('id:10 search terms here') | |
# => @query="search terms here", @parts={"id"=>"10"} | |
# => terms.query = 'search terms here' | |
# => terms['id'] = 10 | |
# | |
# terms = SearchTerms.new('name:"support for spaces" state:pa') | |
# => @query="", @parts={"name"=>"support for spaces", "state"=>"pa"} | |
# => terms.query = '' | |
# => terms['name'] = 'support for spaces' | |
# => terms.name = 'support for spaces' | |
# | |
# terms = SearchTerms.new('state:pa,nj,ca') | |
# => @query="", @parts={"state"=>["pa","nj","ca"]} | |
# | |
# terms = SearchTerms.new('state:pa,nj,ca', false) | |
# => @query="", @parts={"state"=>"pa,nj,c"} | |
# | |
# Useful to drive custom logic in controllers | |
class SearchTerms | |
attr_reader :query, :parts | |
# regex scanner for the parser | |
SCANNER = %r{ | |
(?: | |
([\w\.]+) # look for any word | |
) | |
(?: # check if it has a value attached | |
: # find the value delimiter | |
( | |
[\w,\-]+ # match any word-like values | |
| # -or- | |
(?:"(?:.+|[^\"])*") # match any quoted values | |
) | |
)? | |
}x | |
# query:: this is what you want tokenized | |
# split:: if you'd like to split values on "," then pass true | |
def initialize(query, split = true) | |
@query = query | |
@parts = {} | |
@split = split | |
parse_query! | |
end | |
def [](key) | |
@parts[key] | |
end | |
private | |
def parse_query! | |
tmp = [] | |
@query.scan(SCANNER).map do |key,value| | |
if value.nil? | |
tmp << key | |
else | |
key.downcase! | |
@parts[key] = clean_value(value) | |
define_metaclass_method(key){ @parts[key] } unless key == 'query' | |
end | |
end | |
@query = tmp.join(' ') | |
end | |
def clean_value(value) | |
return value.tr('"', '') if value.include?('"') | |
return value.split(',') if @split && value.include?(',') | |
return true if value == 'true' | |
return false if value == 'false' | |
return value.to_i if value =~ /^[1-9][0-9]*$/ | |
value | |
end | |
def define_metaclass_method(method, &block) | |
(class << self; self; end).send :define_method, method, &block | |
end | |
end | |
if $0 == __FILE__ | |
require 'test/unit' | |
class SearchTermsTest < Test::Unit::TestCase | |
TEST_CASES = { | |
"simple" => ["foo","foo",{}], | |
"simple_field" => ["one:two","",{"one" => "two"}], | |
"quotes" => [%{foo:"quoted value"}, "", {"foo" => "quoted value"}], | |
"term_with_period" => ["1.5","1.5",{}], | |
"multiple_fields" => ["one:two three:four","",{"one" => "two", "three" => "four"}], | |
"int_parse" => ["id:123","",{"id" => 123}], | |
"int_parse_leading_letter" => ["id:a01","","id" => "a01"], | |
"int_parse_leading_zero" => ["id:001","","id" => "001"], | |
"mixed_fields_terms" => ["one two:three four five:six","one four",{"two" => "three", "five" => "six"}] | |
} | |
TEST_CASES.each do |name, (input, query, parts)| | |
define_method("test_#{name}") do | |
terms = SearchTerms.new(input) | |
assert_equal query, terms.query | |
assert_equal parts, terms.parts | |
end | |
end | |
end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# basic usage to search users from your #index action | |
class UsersController < ApplicationController | |
def index | |
if params[:q] | |
terms = SearchTerms.new(params[:q]) | |
if terms['id'] | |
return redirect_to user_path(terms['id']) | |
else | |
@users = @users.search_by_name(terms.query) unless terms.query.blank? | |
@users = @users.with_role(terms['role']) if terms['role'] | |
@users = @users.registered(false) if terms['guest'] | |
end | |
end | |
end | |
end |
Good catch. It wasn't intentional, the regex is just bad on line #64 -- commenting that out would be a quick fix, but it wouldn't cast anything digit-like to int. I'll try to update the gist later today.
I changed the syntax to accept anything except whitespace in field values and only parse /[1-9][0-9]*/ as an integer: https://gist.github.com/2049623
Thanks for the code :)
Merging in @robertknight fixes/tests
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
For a query such as 'id:a012' this is parsed as {"id" => 0} instead of {"id" => "a012"}, is this unintentional?