Skip to content

Instantly share code, notes, and snippets.

@joocer
Forked from leehsueh/boolparser.py
Last active August 2, 2021 13:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save joocer/9e939b2bda90d56bf48b8ff78eeba0e7 to your computer and use it in GitHub Desktop.
Save joocer/9e939b2bda90d56bf48b8ff78eeba0e7 to your computer and use it in GitHub Desktop.
Python Boolean Expression Parser/Evaluator
"""
Grammar:
========
Expression --> AndTerm { OR AndTerm}+
AndTerm --> Condition { AND Condition}+
Condition --> Terminal (>,<,>=,<=,==) Terminal | (Expression)
Terminal --> Number or String or Variable
Usage:
======
from boolparser import *
p = BooleanParser('<expression text>')
p.evaluate(variable_dict) # variable_dict is a dictionary providing values for variables that appear in <expression text>
"""
import operator
import re
REGEX_CHARACTERS = {ch: "\\" + ch for ch in ".^$*+?{}[]|()\\"}
def _sql_like_fragment_to_regex(fragment):
"""
Allows us to accepts LIKE statements to search data
"""
# https://codereview.stackexchange.com/a/36864/229677
safe_fragment = "".join([REGEX_CHARACTERS.get(ch, ch) for ch in fragment])
return re.compile("^" + safe_fragment.replace("%", ".*?").replace("_", ".") + "$")
# set of operators we can interpret not in the `operator` module
def like(x, y):
return _sql_like_fragment_to_regex(y.lower()).match(str(x).lower())
TOKENS = {
"NUM" : "$Number$",
"STR" : "$Literal$",
"VAR" : "$Variable$",
"BOOL": "$Boolean$",
">" : ">",
">=" : ">=",
"<" : "<",
"<=" : "<=",
"==" : "==",
"!=" : "!=",
"LP" : "(",
"RP" : ")",
"AND" : "AND",
"OR" : "OR",
"LIKE" : "LIKE"
}
TOKEN_OPERATORS = {
">": operator.gt,
">=": operator.ge,
"<": operator.lt,
"<=": operator.le,
"==": operator.eq,
"!=": operator.ne,
"LIKE": like
}
class TreeNode:
tokenType = None
value = None
left = None
right = None
def __init__(self, tokenType):
self.tokenType = tokenType
class Tokenizer:
expression = None
tokens = None
tokenTypes = None
i = 0
def __init__(self, exp):
self.expression = exp
def next(self):
self.i += 1
return self.tokens[self.i - 1]
def peek(self):
return self.tokens[self.i]
def hasNext(self):
return self.i < len(self.tokens)
def nextTokenType(self):
return self.tokenTypes[self.i]
def nextTokenTypeIsOperator(self):
t = self.tokenTypes[self.i]
return t in TOKEN_OPERATORS
def tokenize(self):
import re
reg = re.compile(r"(\bAND\b|\bOR\b|!=|==|<=|>=|<|>|\(|\)|LIKE)")
self.tokens = reg.split(self.expression)
self.tokens = [t.strip() for t in self.tokens if t.strip() != ""]
self.tokenTypes = []
for t in self.tokens:
if t in TOKENS:
self.tokenTypes.append(t)
else:
if t in ('True', 'False'):
self.tokenTypes.append(TOKENS["BOOL"])
elif t[0] == t[-1] == '"' or t[0] == t[-1] == "'":
self.tokenTypes.append(TOKENS["STR"])
else:
try:
number = float(t)
self.tokenTypes.append(TOKENS["NUM"])
except:
if re.search("^[a-zA-Z_]+$", t):
self.tokenTypes.append(TOKENS["VAR"])
else:
self.tokenTypes.append(None)
class Query:
tokenizer = None
root = None
def __init__(self, exp):
self.tokenizer = Tokenizer(exp)
self.tokenizer.tokenize()
self.parse()
def parse(self):
self.root = self.parseExpression()
def parseExpression(self):
andTerm1 = self.parseAndTerm()
while (
self.tokenizer.hasNext() and self.tokenizer.nextTokenType() == TOKENS["OR"]
):
self.tokenizer.next()
andTermX = self.parseAndTerm()
andTerm = TreeNode(TOKENS["OR"])
andTerm.left = andTerm1
andTerm.right = andTermX
andTerm1 = andTerm
return andTerm1
def parseAndTerm(self):
condition1 = self.parseCondition()
while (
self.tokenizer.hasNext() and self.tokenizer.nextTokenType() == TOKENS["AND"]
):
self.tokenizer.next()
conditionX = self.parseCondition()
condition = TreeNode(TOKENS["AND"])
condition.left = condition1
condition.right = conditionX
condition1 = condition
return condition1
def parseCondition(self):
if self.tokenizer.hasNext() and self.tokenizer.nextTokenType() == TOKENS["LP"]:
self.tokenizer.next()
expression = self.parseExpression()
if (
self.tokenizer.hasNext()
and self.tokenizer.nextTokenType() == TOKENS["RP"]
):
self.tokenizer.next()
return expression
else:
raise Exception("Closing ) expected, but got " + self.tokenizer.next())
terminal1 = self.parseTerminal()
if self.tokenizer.hasNext():
if self.tokenizer.nextTokenTypeIsOperator():
condition = TreeNode(self.tokenizer.nextTokenType())
self.tokenizer.next()
terminal2 = self.parseTerminal()
condition.left = terminal1
condition.right = terminal2
return condition
else:
raise Exception("Operator expected, but got " + self.tokenizer.next())
else:
raise Exception("Operator expected, but got nothing")
def parseTerminal(self):
if self.tokenizer.hasNext():
tokenType = self.tokenizer.nextTokenType()
if tokenType == TOKENS["NUM"]:
n = TreeNode(tokenType)
n.value = float(self.tokenizer.next())
return n
elif tokenType == TOKENS["VAR"]:
n = TreeNode(tokenType)
n.value = self.tokenizer.next()
return n
elif tokenType == TOKENS["STR"]:
n = TreeNode(tokenType)
n.value = self.tokenizer.next()[1:-1]
return n
elif tokenType == TOKENS["BOOL"]:
n = TreeNode(tokenType)
n.value = self.tokenizer.next() == 'True'
return n
else:
raise Exception(
"NUM, STR, or VAR expected, but got " + self.tokenizer.next()
)
else:
raise Exception(
"NUM, STR, or VAR expected, but got " + self.tokenizer.next()
)
def evaluate(self, variable_dict):
return self.evaluateRecursive(self.root, variable_dict)
def evaluateRecursive(self, treeNode, variable_dict):
if treeNode.tokenType in (TOKENS["NUM"], TOKENS["STR"], TOKENS["BOOL"]):
return treeNode.value
if treeNode.tokenType == TOKENS["VAR"]:
if treeNode.value in variable_dict:
value = variable_dict[treeNode.value]
if isinstance(value, int):
return float(value)
if value in ('True', 'False'):
return value == 'True'
return value
return None
left = self.evaluateRecursive(treeNode.left, variable_dict)
right = self.evaluateRecursive(treeNode.right, variable_dict)
if treeNode.tokenType in TOKEN_OPERATORS:
return TOKEN_OPERATORS[treeNode.tokenType](left, right)
elif treeNode.tokenType == TOKENS["AND"]:
return left and right
elif treeNode.tokenType == TOKENS["OR"]:
return left or right
else:
raise Exception("Unexpected type " + str(treeNode.tokenType))
def to_dnf(self):
return self.inner_to_dnf(self.root)
def inner_to_dnf(self, treeNode):
if treeNode.tokenType in (TOKENS["NUM"], TOKENS["STR"], TOKENS["VAR"]):
return treeNode.value
left = self.inner_to_dnf(treeNode.left)
right = self.inner_to_dnf(treeNode.right)
if treeNode.tokenType == TOKENS["AND"]:
return [left, right]
if treeNode.tokenType == TOKENS["OR"]:
return [left],[right]
if treeNode.tokenType in TOKEN_OPERATORS:
return (left, treeNode.tokenType, right)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment