Skip to content

Instantly share code, notes, and snippets.

@dvdbng
Forked from xflr6/bench_pg_array.py
Created February 9, 2017 22:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dvdbng/91a835fddce719d4cf5ee6058799be72 to your computer and use it in GitHub Desktop.
Save dvdbng/91a835fddce719d4cf5ee6058799be72 to your computer and use it in GitHub Desktop.
Benchmark PostgreSQL array vs. join performance
# bench_pg_array.py - benchmark postgresql array vs. join performance
"""
Replicate http://shon.github.io/2015/12/21/postgres_array_performance.html
with proper join table indexes (uniqueness constraints) using sqlalchemy.
$ python -i bench_pg_array.py
>>> setup()
$ python -m timeit -s "import bench_pg_array" "bench_pg_array.test_join()"
1000 loops, best of 3: 1.49 msec per loop
$ python -m timeit -s "import bench_pg_array" "bench_pg_array.test_array()"
1000 loops, best of 3: 1.24 msec per loop
"""
import random
import sqlalchemy as sa
from sqlalchemy import Table, Column, Integer, Text, ForeignKey
from sqlalchemy.dialects.postgresql import ARRAY, array
import sqlalchemy.ext.declarative
engine = sa.create_engine('postgresql://postgres@/spam', echo=False)
class Base(sa.ext.declarative.declarative_base()):
__abstract__ = True
id = Column(Integer, primary_key=True)
class Post(Base):
__tablename__ = 'post'
title = Column(Text, nullable=False, server_default='example title')
tags = Column(ARRAY(Text), nullable=False, default=array([], type_=Text))
__table_args__ = (
sa.Index('ix_post_tags', tags, postgresql_using='gin'),
)
class Tag(Base):
__tablename__ = 'tag'
name = Column(Text, nullable=False)
post_tag = Table('post_tag', Base.metadata,
Column('post_id', Integer, ForeignKey('post.id'), primary_key=True),
Column('tag_id', Integer, ForeignKey('tag.id'), primary_key=True),
sa.UniqueConstraint('tag_id', 'post_id'))
def setup(no_of_posts=25000, no_of_tags=10000, tags_per_post=15, engine=engine):
Base.metadata.drop_all(engine)
Base.metadata.create_all(engine)
tagnames = ['tag-%d' % i for i in range(1, no_of_tags + 1)]
with engine.begin() as conn:
conn = conn.execution_options(compiled_cache={})
sa.insert(Tag, bind=conn).execute([{'name': t} for t in tagnames])
insert_post = sa.insert(Post, bind=conn).execute
for _ in range(no_of_posts):
insert_post(tags=random.sample(tagnames, tags_per_post))
sa.insert(post_tag, bind=conn).from_select(['post_id', 'tag_id'],
sa.select([Post.id, sa.func.substring(sa.column('tag'), '\d+').cast(sa.INT)])
.select_from(sa.func.unnest(Post.tags).alias('tag'))).execute()
with engine.connect() as conn:
conn = conn.execution_options(isolation_level='AUTOCOMMIT')
conn.execute('VACUUM ANALYZE')
def test_join(tag_id=8, engine=engine):
"""SELECT count(*) AS count_1
FROM post JOIN post_tag ON post.id = post_tag.post_id
WHERE post_tag.tag_id = %(tag_id_1)s"""
return sa.select([sa.func.count()], bind=engine)\
.select_from(sa.join(Post, post_tag))\
.where(post_tag.c.tag_id == tag_id).scalar()
def test_array(tag='tag-%d' % 8, engine=engine):
"""SELECT count(*) AS count_1
FROM post
WHERE post.tags @> %(tags_1)s
"""
return sa.select([sa.func.count()], bind=engine)\
.select_from(Post)\
.where(Post.tags.contains([tag])).scalar()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment