Skip to content

Instantly share code, notes, and snippets.

@dlukes
Last active September 1, 2016 12:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dlukes/25467d658a5c5f53be0cfb55969e7dcd to your computer and use it in GitHub Desktop.
Save dlukes/25467d658a5c5f53be0cfb55969e7dcd to your computer and use it in GitHub Desktop.
Case-insensitive, diacritic-sensitive collation with MySQL demo
# Case-insensitive, diacritic-sensitive collation with MySQL demo
#
# Dependencies -- install ``python3``, then:
#
# $ pip3 install --user pymysql sqlalchemy
#
# Replace <user>, <passwd> and <db> in the create_engine() URL with suitable
# values corresponding to your local MySQL setup.
#
# Usage:
#
# $ python3 mysql_collation.py
from sqlalchemy import Column, Integer, Unicode
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from sqlalchemy import create_engine
from unicodedata import normalize
engine = create_engine(
"mysql+pymysql://<user>:<passwd>@localhost/<db>?charset=utf8mb4",
echo=True)
Base = declarative_base(engine)
class Word(Base):
__tablename__ = "words"
__table_args__ = dict(mysql_charset="utf8mb4")
id = Column(Integer, primary_key=True)
val = Column(Unicode(256, collation="utf8_general_ci"), nullable=False)
Base.metadata.create_all()
DBSession = sessionmaker(bind=engine)
session = DBSession()
def find(word):
return [w.val for w in session.query(Word).filter(Word.val == normalize("NFD", word))]
if __name__ == "__main__":
for w in "čeří ceři ceri".split():
w = Word(val=normalize("NFD", w))
session.add(w)
session.commit()
for s in "čeří ČEří ceři CeřI ceri CeRi".split():
print("\033[31mSearched for {!r}, found: {!r}\033[0m".format(s, find(s)))
session.commit()
Word.__table__.drop()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment