Skip to content

Instantly share code, notes, and snippets.

@greeness
Created October 24, 2012 20:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save greeness/3948657 to your computer and use it in GitHub Desktop.
Save greeness/3948657 to your computer and use it in GitHub Desktop.
query parser example
from random import sample
def search(word):
s = sample(range(10), len(word))
print '-- SEARCH %-12s %68s %-60s' % (word, '', str(sorted(s)))
return set(s)
def set_op(operator, set1, set2):
if operator == 'AND':
return set1 & set2
if operator == 'OR':
return set1 | set2
return set()
def do_operations(operator, operands, hits_buffer=set()):
if len(operands) == 0: return set()
combined = set(search(operands[0]))
for word in operands[1:]:
hits = search(word)
combined = set_op(operator, combined, hits)
if hits_buffer:
combined = set_op(operator, combined, hits_buffer)
return combined
def parse(query):
print 'query: %s' % query
hits_buffer = set()
stack = []
for item in query.split():
print '- %-20s %-25s %s' % (item, list(hits_buffer), str(stack))
if item in ['OR', 'AND']:
stack.append(item)
elif item == '(':
continue
elif item == ')':
operands = []
while stack and stack[-1] not in ['OR', 'AND']:
operands.append(stack.pop())
operator = stack.pop()
hits_buffer = do_operations(operator, operands, hits_buffer)
print '-- %-19s %-40s' % (operator, list(hits_buffer))
else:
stack.append(item)
return hits_buffer
if __name__ == '__main__':
query = "OR ( AND ( maria sharapova ) tennis )"
parse(query)
print '-'*75
query = "AND ( OR ( AND ( maria sharapova ) tennis ) soccer )"
parse(query)
""" Result:
query: OR ( AND ( maria sharapova ) tennis )
- OR [] []
- ( [] ['OR']
- AND [] ['OR']
- ( [] ['OR', 'AND']
- maria [] ['OR', 'AND']
- sharapova [] ['OR', 'AND', 'maria']
- ) [] ['OR', 'AND', 'maria', 'sharapova']
-- SEARCH sharapova [0, 1, 2, 4, 5, 6, 7, 8, 9]
-- SEARCH maria [2, 5, 6, 7, 9]
-- AND [9, 2, 5, 6, 7]
- tennis [9, 2, 5, 6, 7] ['OR']
- ) [9, 2, 5, 6, 7] ['OR', 'tennis']
-- SEARCH tennis [0, 1, 2, 3, 6, 9]
-- OR [0, 1, 2, 3, 5, 6, 7, 9]
---------------------------------------------------------------------------
query: AND ( OR ( AND ( maria sharapova ) tennis ) soccer )
- AND [] []
- ( [] ['AND']
- OR [] ['AND']
- ( [] ['AND', 'OR']
- AND [] ['AND', 'OR']
- ( [] ['AND', 'OR', 'AND']
- maria [] ['AND', 'OR', 'AND']
- sharapova [] ['AND', 'OR', 'AND', 'maria']
- ) [] ['AND', 'OR', 'AND', 'maria', 'sharapova']
-- SEARCH sharapova [0, 1, 2, 3, 4, 5, 7, 8, 9]
-- SEARCH maria [0, 1, 3, 6, 9]
-- AND [0, 1, 3, 9]
- tennis [0, 1, 3, 9] ['AND', 'OR']
- ) [0, 1, 3, 9] ['AND', 'OR', 'tennis']
-- SEARCH tennis [3, 5, 6, 7, 8, 9]
-- OR [0, 1, 3, 5, 6, 7, 8, 9]
- soccer [0, 1, 3, 5, 6, 7, 8, 9] ['AND']
- ) [0, 1, 3, 5, 6, 7, 8, 9] ['AND', 'soccer']
-- SEARCH soccer [0, 1, 3, 4, 5, 7]
-- AND [0, 1, 3, 5, 7]
"""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment