Skip to content

Instantly share code, notes, and snippets.

@Multihuntr
Created February 4, 2021 02:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Multihuntr/8e613ac6fe86967e84ee0e0e921bdffb to your computer and use it in GitHub Desktop.
Save Multihuntr/8e613ac6fe86967e84ee0e0e921bdffb to your computer and use it in GitHub Desktop.
Sqlalchemy create partitioned tables in a for loop in Postgresql database
# BIG NOTE:
# I made this becuase I thought the method was interesting. It's not useful in it's current form.
# If you can, use an index instead. https://stackoverflow.com/a/27895337
from sqlalchemy import Column, Integer, Float, ForeignKey, event, DDL
from sqlalchemy.schema import CreateSchema
from sqlalchemy.dialects.postgresql import DOUBLE_PRECISION
from sqlalchemy.ext.declarative import declarative_base, declared_attr
Base = declarative_base()
# Make sure you're using engine.create/engine.create_all to create the tables
schema_name = 'point_partitions'
def check_schema(ddl, target, bind, state, **kw):
return not bind.dialect.has_schema(bind, schema=schema_name)
event.listen(Base.metadata, 'before_create',
CreateSchema(schema_name).execute_if(callable_=check_schema)
)
Double = Float().with_variant(DOUBLE_PRECISION(), 'postgresql')
# The partitioned tables are attached after being created as described here:
# https://stackoverflow.com/questions/61545680/postgresql-partition-and-sqlalchemy
# To make this nice, we create:
# - A mixin which has the properties of the table
# - The base table to be partitioned
# - Each table which is a partition separately in a for loop
class PointMixin:
# Point group is what we are partitioning by
@declared_attr
def point_group_id(self):
return Column(Integer, ForeignKey('point_group.point_group_id', ondelete='CASCADE'), primary_key=True)
# We don't set a backref/back_populates, else it would conflict on each of the partition tables.
t = Column(Double, nullable=False, primary_key=True)
x = Column(Double, nullable=False)
y = Column(Double, nullable=False)
z = Column(Double, nullable=True)
class Point(PointMixin, Base):
__tablename__ = 'point'
__table_args__ = {
'postgresql_partition_by': 'RANGE(point_group_id)',
}
PARTITION_RANGE = 10
def PointX(point_group_id):
gid_lo = point_group_id
gid_hi = gid_lo + PARTITION_RANGE - 1
nm = f'partition_{gid_lo:08d}_{gid_hi:08d}'
# Using `type` to instantiate allows us to set the class name before the class is created
# Trying to set __name__ after creating the class fails because the Base.metadata metaclass has already
# registered the class by it's name after instantiation, replacing the previous partition table.
# i.e. This DOESN'T work:
#
# class PointPartitionX(PointMixin, Base):
# __name__ = f'PointPartition{gid_lo}'
#
# Because __name__ is set after we have declared PointPartitionX being a subclass of Base.
PointPartitionX = type(
f'PointPartition{gid_lo}',
(PointMixin, Base),
{
'__tablename__': nm,
'__table_args__': { 'schema': 'point_partitions' }
}
)
PointPartitionX.__table__.add_is_dependent_on(Point.__table__)
# This DDL only runs if the table is created. Thus this code is idempotent.
event.listen(
PointPartitionX.__table__,
'after_create',
DDL(f'ALTER TABLE point ATTACH PARTITION point_partitions.{nm} FOR VALUES FROM ({gid_lo}) TO ({gid_hi+1})')
)
return PointPartitionX
# We will create a fixed number of partitions ahead of time.
# This has the downside that if the database grows too large, we need to increase this number.
# But, dynamic partitions have their own problems. https://stackoverflow.com/a/7892660
for x in range(0, 100, PARTITION_RANGE):
part = PointX(x)
# After this, you can just insert into the table like normal and Postgresql will allocate the rows to the partitions
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment