-
-
Save alexxxnf/dc202a58d79c04fae39c1e7d652403f3 to your computer and use it in GitHub Desktop.
Regular expression filters in SQLAlchemy
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Module implementing an enhanced string column type for SQLAlchemy | |
with a support for regular expression operators in Postgres and SQLite. | |
""" | |
import re | |
from sqlalchemy import String as _String, Text as _Text, Unicode as _Unicode, UnicodeText as _UnicodeText, event, exc | |
from sqlalchemy.engine import Engine | |
from sqlalchemy.ext.compiler import compiles | |
from sqlalchemy.sql.expression import BinaryExpression, func, literal | |
from sqlalchemy.sql.operators import custom_op | |
import sqlite3 | |
__all__ = ['String', 'Text', 'Unicode', 'UnicodeText'] | |
class String(_String): | |
"""Enhanced version of standard SQLAlchemy's :class:`String`. | |
Supports additional operators that can be used while constructing | |
filter expressions. | |
""" | |
class comparator_factory(_String.comparator_factory): | |
"""Contains implementation of :class:`String` operators | |
related to regular expressions. | |
""" | |
def regexp(self, other): | |
return RegexMatchExpression(self.expr, literal(other), custom_op('~')) | |
def iregexp(self, other): | |
return RegexMatchExpression(self.expr, literal(other), custom_op('~*')) | |
def not_regexp(self, other): | |
return RegexMatchExpression(self.expr, literal(other), custom_op('!~')) | |
def not_iregexp(self, other): | |
return RegexMatchExpression(self.expr, literal(other), custom_op('!~*')) | |
class Text(_Text, String): | |
"""Enhanced version of standard SQLAlchemy's :class:`Text`. | |
Supports additional operators that can be used while constructing | |
filter expressions. | |
""" | |
pass | |
class Unicode(_Unicode, String): | |
"""Enhanced version of standard SQLAlchemy's :class:`Unicode`. | |
Supports additional operators that can be used while constructing | |
filter expressions. | |
""" | |
pass | |
class UnicodeText(_UnicodeText, Text): | |
"""Enhanced version of standard SQLAlchemy's :class:`UnicodeText`. | |
Supports additional operators that can be used while constructing | |
filter expressions. | |
""" | |
pass | |
class RegexMatchExpression(BinaryExpression): | |
"""Represents matching of a column againsts a regular expression.""" | |
@compiles(RegexMatchExpression, 'sqlite') | |
def sqlite_regex_match(element, compiler, **kw): | |
"""Compile the SQL expression representing a regular expression match | |
for the SQLite engine. | |
""" | |
# determine the name of a custom SQLite function to use for the operator | |
operator = element.operator.opstring | |
try: | |
func_name, _ = SQLITE_REGEX_FUNCTIONS[operator] | |
except (KeyError, ValueError) as e: | |
would_be_sql_string = ' '.join((compiler.process(element.left), | |
operator, | |
compiler.process(element.right))) | |
raise exc.StatementError( | |
"unknown regular expression match operator: %s" % operator, | |
would_be_sql_string, None, e) | |
# compile the expression as an invocation of the custom function | |
regex_func = getattr(func, func_name) | |
regex_func_call = regex_func(element.left, element.right) | |
return compiler.process(regex_func_call) | |
@event.listens_for(Engine, 'connect') | |
def sqlite_engine_connect(dbapi_connection, connection_record): | |
"""Listener for the event of establishing connection to a SQLite database. | |
Creates the functions handling regular expression operators | |
within SQLite engine, pointing them to their Python implementations above. | |
""" | |
if not isinstance(dbapi_connection, sqlite3.Connection): | |
return | |
for name, function in SQLITE_REGEX_FUNCTIONS.values(): | |
dbapi_connection.create_function(name, 2, function) | |
# Mapping from the regular expression matching operators | |
# to named Python functions that implement them for SQLite. | |
SQLITE_REGEX_FUNCTIONS = { | |
'~': ('REGEXP', | |
lambda value, regex: bool(re.match(regex, value))), | |
'~*': ('IREGEXP', | |
lambda value, regex: bool(re.match(regex, value, re.IGNORECASE))), | |
'!~': ('NOT_REGEXP', | |
lambda value, regex: not re.match(regex, value)), | |
'!~*': ('NOT_IREGEXP', | |
lambda value, regex: not re.match(regex, value, re.IGNORECASE)), | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment