Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Regular expression filters in SQLAlchemy
"""
Module implementing an enhanced string column type for SQLAlchemy
with a support for regular expression operators in Postgres and SQLite.
"""
import re
from sqlalchemy import String as _String, event, exc
from sqlalchemy.engine import Engine
from sqlalchemy.ext.compiler import compiles
from sqlalchemy.sql.expression import BinaryExpression, func, literal
from sqlalchemy.sql.operators import custom_op
import sqlite3
__all__ = ['String']
class String(_String):
"""Enchanced version of standard SQLAlchemy's :class:`String`.
Supports additional operators that can be used while constructing
filter expressions.
"""
class comparator_factory(_String.comparator_factory):
"""Contains implementation of :class:`String` operators
related to regular expressions.
"""
def regexp(self, other):
return RegexMatchExpression(self.expr, literal(other), custom_op('~'))
def iregexp(self, other):
return RegexMatchExpression(self.expr, literal(other), custom_op('~*'))
def not_regexp(self, other):
return RegexMatchExpression(self.expr, literal(other), custom_op('!~'))
def not_iregexp(self, other):
return RegexMatchExpression(self.expr, literal(other), custom_op('!~*'))
class RegexMatchExpression(BinaryExpression):
"""Represents matching of a column againsts a regular expression."""
@compiles(RegexMatchExpression, 'sqlite')
def sqlite_regex_match(element, compiler, **kw):
"""Compile the SQL expression representing a regular expression match
for the SQLite engine.
"""
# determine the name of a custom SQLite function to use for the operator
operator = element.operator.opstring
try:
func_name, _ = SQLITE_REGEX_FUNCTIONS[operator]
except (KeyError, ValueError), e:
would_be_sql_string = ' '.join((compiler.process(element.left),
operator,
compiler.process(element.right)))
raise exc.StatementError(
"unknown regular expression match operator: %s" % operator,
would_be_sql_string, None, e)
# compile the expression as an invocation of the custom function
regex_func = getattr(func, func_name)
regex_func_call = regex_func(element.left, element.right)
return compiler.process(regex_func_call)
@event.listens_for(Engine, 'connect')
def sqlite_engine_connect(dbapi_connection, connection_record):
"""Listener for the event of establishing connection to a SQLite database.
Creates the functions handling regular expression operators
within SQLite engine, pointing them to their Python implementations above.
"""
if not isinstance(dbapi_connection, sqlite3.Connection):
return
for name, function in SQLITE_REGEX_FUNCTIONS.values():
dbapi_connection.create_function(name, 2, function)
# Mapping from the regular expression matching operators
# to named Python functions that implement them for SQLite.
SQLITE_REGEX_FUNCTIONS = {
'~': ('REGEXP',
lambda value, regex: bool(re.match(regex, value))),
'~*': ('IREGEXP',
lambda value, regex: bool(re.match(regex, value, re.IGNORECASE))),
'!~': ('NOT_REGEXP',
lambda value, regex: not re.match(regex, value)),
'!~*': ('NOT_IREGEXP',
lambda value, regex: not re.match(regex, value, re.IGNORECASE)),
}
@datavistics

This comment has been minimized.

Copy link

datavistics commented Jan 17, 2018

I used this to create a column, then I populated the database (sqlite3) with a few rows, but I get this error from the Pycharm database console:

[2018-01-17 12:14:13] Connected
sql> select count(*) from relationships_submission where relationships_submission.title REGEXP '\[\d+\]'
[2018-01-17 12:14:13] [1] [SQLITE_ERROR] SQL error or missing database (no such function: REGEXP)
sql> select count(*) from relationships_submission where relationships_submission.title ~ '\[\d+\]'
[2018-01-17 12:14:23] [1] [SQLITE_ERROR] SQL error or missing database (near "~": syntax error)

Any advice?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.