Last active
December 29, 2015 06:19
-
-
Save micaleel/7628506 to your computer and use it in GitHub Desktop.
A python class that parses and rewrite feature-sentiment queries in a form that can be consumed by Solr
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class QueryParser: | |
""" Parses and rewrite feature-sentiment queries in Solr/Lucene syntax """ | |
def parse(self, query): | |
assert isinstance(query, str) | |
tokens = query.split() | |
result = [] | |
for index in range(len(tokens)): | |
if tokens[index].endswith("+"): | |
entry = (tokens[index][:-1], 1) | |
elif tokens[index].endswith("-"): | |
entry = (tokens[index][:-1], -1) | |
else: | |
entry = (tokens[index], 0) | |
result.append(entry) | |
return result | |
def rewrite(self, query): | |
assert isinstance(query, str) | |
rewritten = [] | |
for feature, sentiment in self.parse(query): | |
if sentiment == 1: | |
rewritten.append("(feature:{} AND polarity:[0 TO *])".format(feature)) | |
elif sentiment == -1: | |
rewritten.append("(feature:{} AND polarity:[* TO 0])".format(feature)) | |
elif sentiment == 0: | |
rewritten.append("feature:{}".format(feature)) | |
else: | |
pass | |
return " OR ".join(rewritten).strip() | |
if __name__ == "__main__": | |
from pprint import pprint | |
query = "customer service+ wifi+ parking+ noise- swimming pool-" | |
parser = QueryParser() | |
pprint(parser.rewrite(query)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment