Skip to content

Instantly share code, notes, and snippets.

@xflr6
Last active June 4, 2022 08:29
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save xflr6/219f7a3cb1994ed937ee to your computer and use it in GitHub Desktop.
Save xflr6/219f7a3cb1994ed937ee to your computer and use it in GitHub Desktop.
Benchmark PostgreSQL array vs. join performance
"""Benchmark PostgreSQL array vs. join performance.
Replicate http://shon.github.io/2015/12/21/postgres_array_performance.html
with proper join table indexes (uniqueness constraints) using SQLAlchemy.
$ python -i bench_pg_array.py
>>> setup()
$ python -m timeit -s "import bench_pg_array" "bench_pg_array.test_join()"
500 loops, best of 5: 445 usec per loop
$ python -m timeit -s "import bench_pg_array" "bench_pg_array.test_array()"
1000 loops, best of 5: 362 usec per loop
"""
import functools
import random
import sqlalchemy as sa
from sqlalchemy import Table, Column, Integer, Text, ForeignKey
import sqlalchemy.orm
from sqlalchemy.dialects.postgresql import ARRAY, array
ENGINE = sa.create_engine('postgresql://postgres@/spam', echo=True)
REGISTRY = sa.orm.registry()
@REGISTRY.mapped
class Base:
__abstract__ = True
id = Column(Integer, primary_key=True)
@REGISTRY.mapped
class Post(Base):
__tablename__ = 'post'
title = Column(Text, nullable=False, server_default='example title')
tags = Column(ARRAY(Text), nullable=False, default=array([], type_=Text))
__table_args__ = (
sa.Index('ix_post_tags', tags, postgresql_using='gin'),
)
@REGISTRY.mapped
class Tag(Base):
__tablename__ = 'tag'
name = Column(Text, nullable=False)
post_tag = Table('post_tag', REGISTRY.metadata,
Column('post_id', ForeignKey('post.id'), primary_key=True),
Column('tag_id', ForeignKey('tag.id'), primary_key=True),
sa.UniqueConstraint('tag_id', 'post_id'))
def setup(*,
no_of_posts: int = 25_000,
no_of_tags: int = 10_000,
tags_per_post: int = 15) -> None:
REGISTRY.metadata.drop_all(ENGINE)
REGISTRY.metadata.create_all(ENGINE)
tagnames = [f'tag-{i:d}' for i in range(1, no_of_tags + 1)]
with ENGINE.begin() as conn:
conn.execute(sa.insert(Tag), [{'name': t} for t in tagnames])
insert_post = functools.partial(conn.execute, sa.insert(Post))
for _ in range(no_of_posts):
insert_post(tags=random.sample(tagnames, tags_per_post))
select_pairs = (sa.select(Post.id,
sa.func.substring(sa.column('tag'), r'\d+').cast(sa.INT))
.select_from(sa.func.unnest(Post.tags).alias('tag',
joins_implicitly=True)))
conn.execute(sa.insert(post_tag)
.from_select(['post_id', 'tag_id'], select_pairs))
with ENGINE.connect() as conn:
conn = conn.execution_options(isolation_level='AUTOCOMMIT')
conn.execute('VACUUM ANALYZE')
def test_join(*,
tag_id: int = 8,
engine: sa.engine.Engine = ENGINE) -> int:
"""SELECT count(*) AS count_1
FROM post JOIN post_tag ON post.id = post_tag.post_id
WHERE post_tag.tag_id = %(tag_id_1)s"""
select = (sa.select(sa.func.count())
.join_from(Post, post_tag)
.where(post_tag.c.tag_id == tag_id))
with engine.connect() as conn:
return conn.scalar(select)
def test_array(*,
tag: str = 'tag-8',
engine: sa.engine.Engine = ENGINE) -> int:
"""SELECT count(*) AS count_1
FROM post
WHERE post.tags @> %(tags_1)s
"""
select = (sa.select(sa.func.count())
.select_from(Post)
.where(Post.tags.contains([tag])))
with engine.connect() as conn:
return conn.scalar(select)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment