Skip to content

Instantly share code, notes, and snippets.

@mahendrakalkura
Last active May 20, 2017 05:36
Show Gist options
  • Save mahendrakalkura/0a35340f326d21294be390205e309497 to your computer and use it in GitHub Desktop.
Save mahendrakalkura/0a35340f326d21294be390205e309497 to your computer and use it in GitHub Desktop.
Tell me about the most technically challenging and complicated project that you've worked in the past.
'''
I faced this problem when developing tweet.tv.
Steps
=====
1. Gather tweets from twitter.com using end-user supplied query.
Example: tom brady nfl
twitter.com translates this to "tom AND brady AND nfl".
2. Store the tweets in a PostgreSQL database.
3. Allow the end-user to re-run their original queries on the PostgreSQL
database.
Unfortunately, PostgreSQL does not understand standard search engine query
operators.
In order to solve this, I started researching alternate storage methods
(such as Solr). However, none of them satisfied our speed constraints. I then
circled back to PostgreSQL and prepared a query converter module using
PyParsing: http://pyparsing.wikispaces.com/Introduction
This module converts search engine queries into PostgreSQL native full-text
queries.
Examples:
1. `one two three` becomes `one & two & three`
2. `one OR two OR three` becomes `one | two | three`
3. `(one two) three` becomes `(one & two) | three`
'''
from pyparsing import (
alphanums,
CaselessKeyword,
opAssoc,
operatorPrecedence,
Optional,
ParseException,
quotedString,
stringEnd,
Word,
)
class unary_operation(object):
def __init__(self, token):
self.operator, self.operand = token[0]
def get_expression(self):
return '%(symbol)s %(operand)s' % {
'symbol': self.symbol,
'operand': self.operand.get_expression(),
}
class not_operation(unary_operation):
symbol = '!'
class binary_operation(object):
def __init__(self, token):
self.operator = token[0][1]
self.operands = token[0][0::2]
def get_expression(self):
return '( %(string)s )' % {
'string': (' %(symbol)s ' % {
'symbol': self.symbol,
}).join([
operand.get_expression()
for operand in self.operands
if operand.get_expression()
]),
}
class and_operation(binary_operation):
symbol = '&'
class or_operation(binary_operation):
symbol = '|'
class segment_operation(object):
def __init__(self, tokens):
self.value = tokens[0]
def get_expression(self):
return self.value
def phrase_operation(value):
return '( %(string)s )' % {
'string': ' & '.join([
item.strip()
for item in value[0][1:-1].strip().split(' ')
if item.strip()
]),
}
not_operator = CaselessKeyword('not')
and_operator = CaselessKeyword('and')
or_operator = CaselessKeyword('or')
segment = (~(not_operator | and_operator | or_operator) + (Word(
alphanums + '`~!@#$%^&*-_=+\\|<>/?'
) | quotedString.setParseAction(phrase_operation))).setParseAction(
segment_operation
)
expression = operatorPrecedence(segment, [
(not_operator, 1, opAssoc.RIGHT, not_operation),
(Optional(and_operator, default='and'), 2, opAssoc.LEFT, and_operation),
(or_operator, 2, opAssoc.LEFT, or_operation),
])
def get_query(query):
try:
return (expression + stringEnd).parseString(query)[0].get_expression()
except ParseException:
return ''
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment