mahendrakalkura/1.py

## 1.py
'''
I faced this problem when developing tweet.tv.

Steps
=====

1. Gather tweets from twitter.com using end-user supplied query.
   Example: tom brady nfl
   twitter.com translates this to "tom AND brady AND nfl".
2. Store the tweets in a PostgreSQL database.
3. Allow the end-user to re-run their original queries on the PostgreSQL
   database.

Unfortunately, PostgreSQL does not understand standard search engine query
operators.

In order to solve this, I started researching alternate storage methods
(such as Solr). However, none of them satisfied our speed constraints. I then
circled back to PostgreSQL and prepared a query converter module using
PyParsing: http://pyparsing.wikispaces.com/Introduction

This module converts search engine queries into PostgreSQL native full-text
queries.

Examples:

1. `one two three` becomes `one & two & three`
2. `one OR two OR three` becomes `one | two | three`
3. `(one two) three` becomes `(one & two) | three`
'''

from pyparsing import (
    alphanums,
    CaselessKeyword,
    opAssoc,
    operatorPrecedence,
    Optional,
    ParseException,
    quotedString,
    stringEnd,
    Word,
)


class unary_operation(object):

    def __init__(self, token):
        self.operator, self.operand = token[0]

    def get_expression(self):
        return '%(symbol)s %(operand)s' % {
            'symbol': self.symbol,
            'operand': self.operand.get_expression(),
        }


class not_operation(unary_operation):
    symbol = '!'


class binary_operation(object):

    def __init__(self, token):
        self.operator = token[0][1]
        self.operands = token[0][0::2]

    def get_expression(self):
        return '( %(string)s )' % {
            'string': (' %(symbol)s ' % {
                'symbol': self.symbol,
            }).join([
                operand.get_expression()
                for operand in self.operands
                if operand.get_expression()
            ]),
        }


class and_operation(binary_operation):
    symbol = '&'


class or_operation(binary_operation):
    symbol = '|'


class segment_operation(object):

    def __init__(self, tokens):
        self.value = tokens[0]

    def get_expression(self):
        return self.value


def phrase_operation(value):
    return '( %(string)s )' % {
        'string': ' & '.join([
            item.strip()
            for item in value[0][1:-1].strip().split(' ')
            if item.strip()
        ]),
    }

not_operator = CaselessKeyword('not')
and_operator = CaselessKeyword('and')
or_operator = CaselessKeyword('or')

segment = (~(not_operator | and_operator | or_operator) + (Word(
    alphanums + '`~!@#$%^&*-_=+\\|<>/?'
) | quotedString.setParseAction(phrase_operation))).setParseAction(
    segment_operation
)

expression = operatorPrecedence(segment, [
    (not_operator, 1, opAssoc.RIGHT, not_operation),
    (Optional(and_operator, default='and'), 2, opAssoc.LEFT, and_operation),
    (or_operator, 2, opAssoc.LEFT, or_operation),
])


def get_query(query):
    try:
        return (expression + stringEnd).parseString(query)[0].get_expression()
    except ParseException:
        return ''
	'''
	I faced this problem when developing tweet.tv.

	Steps
	=====

	1. Gather tweets from twitter.com using end-user supplied query.
	Example: tom brady nfl
	twitter.com translates this to "tom AND brady AND nfl".
	2. Store the tweets in a PostgreSQL database.
	3. Allow the end-user to re-run their original queries on the PostgreSQL
	database.

	Unfortunately, PostgreSQL does not understand standard search engine query
	operators.

	In order to solve this, I started researching alternate storage methods
	(such as Solr). However, none of them satisfied our speed constraints. I then
	circled back to PostgreSQL and prepared a query converter module using
	PyParsing: http://pyparsing.wikispaces.com/Introduction

	This module converts search engine queries into PostgreSQL native full-text
	queries.

	Examples:

	1. `one two three` becomes `one & two & three`
	2. `one OR two OR three` becomes `one \| two \| three`
	3. `(one two) three` becomes `(one & two) \| three`
	'''

	from pyparsing import (
	alphanums,
	CaselessKeyword,
	opAssoc,
	operatorPrecedence,
	Optional,
	ParseException,
	quotedString,
	stringEnd,
	Word,
	)


	class unary_operation(object):

	def __init__(self, token):
	self.operator, self.operand = token[0]

	def get_expression(self):
	return '%(symbol)s %(operand)s' % {
	'symbol': self.symbol,
	'operand': self.operand.get_expression(),
	}


	class not_operation(unary_operation):
	symbol = '!'


	class binary_operation(object):

	def __init__(self, token):
	self.operator = token[0][1]
	self.operands = token[0][0::2]

	def get_expression(self):
	return '( %(string)s )' % {
	'string': (' %(symbol)s ' % {
	'symbol': self.symbol,
	}).join([
	operand.get_expression()
	for operand in self.operands
	if operand.get_expression()
	]),
	}


	class and_operation(binary_operation):
	symbol = '&'


	class or_operation(binary_operation):
	symbol = '\|'


	class segment_operation(object):

	def __init__(self, tokens):
	self.value = tokens[0]

	def get_expression(self):
	return self.value


	def phrase_operation(value):
	return '( %(string)s )' % {
	'string': ' & '.join([
	item.strip()
	for item in value[0][1:-1].strip().split(' ')
	if item.strip()
	]),
	}

	not_operator = CaselessKeyword('not')
	and_operator = CaselessKeyword('and')
	or_operator = CaselessKeyword('or')

	segment = (~(not_operator \| and_operator \| or_operator) + (Word(
	alphanums + '`~!@#$%^&*-_=+\\\|<>/?'
	) \| quotedString.setParseAction(phrase_operation))).setParseAction(
	segment_operation
	)

	expression = operatorPrecedence(segment, [
	(not_operator, 1, opAssoc.RIGHT, not_operation),
	(Optional(and_operator, default='and'), 2, opAssoc.LEFT, and_operation),
	(or_operator, 2, opAssoc.LEFT, or_operation),
	])


	def get_query(query):
	try:
	return (expression + stringEnd).parseString(query)[0].get_expression()
	except ParseException:
	return ''