Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save joevandyk/5914950 to your computer and use it in GitHub Desktop.
Save joevandyk/5914950 to your computer and use it in GitHub Desktop.
/* This demonstrates a trigger for denormalizing a join table into an array.
Hasn't really been tested much, but seems to work.
Will put it into production tomorrow and see!
Maybe PostgreSQL 9.4 will come with arrays of foreign keys. That will remove a lot of the
need for simple join tables.
http://blog.2ndquadrant.com/postgresql-9-3-development-array-element-foreign-keys didn't
make it into 9.3.
*/
set client_min_messages = 'warning';
set search_path = 'test';
drop schema if exists test cascade;
create schema test;
-- Not used by the trigger, but it's pretty handy, and is used by the test code.
-- Returns true if array is empty, false if array is not.
create or replace function array_empty(anyarray) returns boolean as $$
select $1 = '{}'
$$ language sql immutable;
-- array_remove was added to postgresql 9.2.
-- calling this one array_erase so it won't conflict.
create or replace function array_erase(anyarray, anyelement) returns anyarray as $$
select array(select v from unnest($1) g(v) where v <> $2)
$$ language sql;
-- Trigger function to handle the denormalization.
create function denormalize_join_into_array() returns trigger as $f$
declare
-- TODO figure out better names for these variables
v_foreign_table_name text := TG_ARGV[0]; -- table that's being updated
v_foreign_pkey text := TG_ARGV[1]; -- column on the updated table that contains the primary key
v_foreign_array_column_name text := TG_ARGV[2]; -- column on the updated table that stores the array
v_source_fkey text := TG_ARGV[3]; -- column on the source table that contains the foreign key to the table being updated
v_source_column_name text := TG_ARGV[4]; -- column on the source table that contains the value that will be inserted into the array
should_update boolean := false;
begin
if TG_OP = 'UPDATE' then
-- Only bother updating if there's been a change we care about.
-- On updates, we delete from the old row and append to the new one.
-- TODO figure out better way to do this.
execute format('select $1.%1$I is distinct from $2.%1$I or $1.%2$I is distinct from $2.%2$I',
v_source_column_name, v_source_fkey) using new, old into should_update;
end if;
-- TODO figure out simple way to remove duplication in the update statements.
if TG_OP = 'INSERT' or should_update then
-- Append the new value to the array column.
execute format('update %1$I set %2$I = array_append(%2$I, $1.%4$I) where $1.%6$I = %1$I.%5$I',
v_foreign_table_name, v_foreign_array_column_name, v_foreign_array_column_name,
v_source_column_name, v_foreign_pkey, v_source_fkey) using new;
end if;
if TG_OP = 'DELETE' or should_update then
-- Remove the old value from the array column.
execute format('update %1$I set %2$I = array_erase(%2$I, $1.%4$I) where $1.%6$I = %1$I.%5$I',
v_foreign_table_name, v_foreign_array_column_name, v_foreign_array_column_name,
v_source_column_name, v_foreign_pkey, v_source_fkey) using old;
end if;
return null;
end $f$ language plpgsql;
-- Define line_items <-> shipments relations.
create table line_items (
id integer primary key,
shipment_ids integer[]
);
create table shipments (
id integer primary key,
tracking_number text not null
);
create table line_items_shipments (
line_item_id integer references line_items,
shipment_id integer references shipments,
primary key (line_item_id, shipment_id)
);
-- Define the denormalization trigger.
create trigger D after insert or update or delete on line_items_shipments
for each row execute procedure denormalize_join_into_array('line_items', 'id', 'shipment_ids', 'line_item_id', 'shipment_id');
insert into line_items select * from generate_series(1, 250000);
insert into shipments select *, 'tracking number' from generate_series(1, 249990);
insert into line_items_shipments select id, id from shipments;
-- There's 10 line items that haven't been shipped.
-- We're gonna need an gin index.
create index on line_items using gin(shipment_ids);
analyze line_items; analyze shipments; analyze line_items_shipments;
-- Test Queries below.
-- Find unshipped line items via array. Much faster. Simpler, sexier query.
explain analyze select count(*) from line_items li where array_empty(shipment_ids);
/* Aggregate (cost=20.02..20.03 rows=1 width=0) (actual time=0.054..0.054 rows=1 loops=1)
-> Bitmap Heap Scan on line_items li (cost=16.01..20.02 rows=1 width=0) (actual time=0.042..0.043 rows=10 loops=1)
Recheck Cond: (shipment_ids = '{}'::integer[])
-> Bitmap Index Scan on line_items_shipment_ids_idx (cost=0.00..16.01 rows=1 width=0) (actual time=0.021..0.021 rows=10 loops=1)
Index Cond: (shipment_ids = '{}'::integer[])
Total runtime: 0.099 ms */
-- Find unshipped line items via join. This is slow.
explain analyze
select count(*) from line_items li
left join line_items_shipments lis on lis.line_item_id = li.id where lis.line_item_id is null;
/* Aggregate (cost=18445.86..18445.87 rows=1 width=0) (actual time=382.830..382.830 rows=1 loops=1)
-> Hash Anti Join (cost=7708.77..18445.83 rows=10 width=0) (actual time=264.776..382.814 rows=10 loops=1)
Hash Cond: (li.id = lis.line_item_id)
-> Seq Scan on line_items li (cost=0.00..5931.00 rows=250000 width=4) (actual time=32.664..86.894 rows=250000 loops=1)
-> Hash (cost=3606.90..3606.90 rows=249990 width=4) (actual time=111.456..111.456 rows=249990 loops=1)
Buckets: 4096 Batches: 16 Memory Usage: 560kB
-> Seq Scan on line_items_shipments lis (cost=0.00..3606.90 rows=249990 width=4) (actual time=0.009..49.887 rows=249990 loops=1)
Total runtime: 382.882 ms */
-- Tests to make sure that inserts, updates, and deletes to the join table work as expected.
select * from line_items where id <= 3;
update line_items_shipments set line_item_id = 1 where line_item_id = 2;
update line_items_shipments set shipment_id = 1 where line_item_id = 3;
select * from line_items where id <= 3;
delete from line_items_shipments where shipment_id = 1;
select * from line_items where id <= 3;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment