chelseadole/duplicate_to_partitioned_table.sql

## duplicate_to_partitioned_table.sql
-- This function is designed to duplicate all live INSERTS/UPDATES/DELETES from one table (referred to as "source_table_name"
-- to a second partitioned table (referred to as "destination_table_name"). The function should be set to trigger after insert/
-- update/delete on the source table.

-- This function is designed to be leveraged for partitioned table migration through this method:
--    1) Create an empty partitioned copy of the "source_table_name". Alter primary key as necessary, as partitioned Postgres
--       tables do not support unique/primary keys not included in the partition key.
--    2) Create the following function, and attach it as a trigger to "source_table_name". At this point, incoming new DML is
--       being copied successfully to the partitioned table, so only historical data will need to be backfilled.
--    3) Target rows in "source_table_name" with an updated_at value BEFORE the trigger was attached, and backfill them into
--       "destination_table_name" through bulk inserts. When conflict on primary key occurs, do nothing -- as this means that
--       live traffic has concurrently loaded the value to "destination_table_name", and should take priority. Backfill may be
--       completed via a script, or directly on Postgres using Postgres functions.

-- The following assumptions are made for the purposes of this function:
--    1) The "destination_table_name" has a valid PRIMARY KEY (PK) present in the partition key. This PK may be utilized by
--       live traffic to perform "INSERT ... ON CONFLICT ..." (aka, "upsert") queries
--    2) The "source_table_name" has a fields called "updated_at" (timestamp), "created_at" (timestamp), and "value" (any,
--       used as an example.

-- NOTE: THIS IS AN EXAMPLE. REAL TABLE/COLUMN NAMES MUST BE CORRECTED, AND IT IS LIKELY THAT THE FUNCTION DEFINITION WILL
--       REQUIRE CHANGE FOR REAL APPLICATIONS

CREATE OR REPLACE FUNCTION duplicate_to_partitioned_table()
RETURNS TRIGGER AS
$$
BEGIN
  IF ( TG_OP = 'INSERT') THEN
      -- 1) Insert row. Because the table has a PRIMARY KEY, it is important that this fails if a row for the PK already exists
      INSERT INTO <destination_table_name> VALUES (NEW.*);
      RETURN NEW;
  ELSIF ( TG_OP = 'UPDATE') THEN
      -- 2) Upsert into new table. If not exists, write new value. If the value already exists, update only the values which
      --    should be updated. In this example, the "created_at" value should remain the same.
      INSERT INTO <destination_table_name> VALUES (NEW.*)
      ON CONFLICT <primary_key> DO UPDATE
      SET value = NEW.value, updated_at = NEW.updated_at, created_at = OLD.created_at;
      RETURN NEW;
  ELSIF ( TG_OP = 'DELETE') THEN
      -- 3) Delete row.
      DELETE FROM <destination_table_name> WHERE id = OLD.id;
      RETURN OLD;
  END IF;
END;
$$ LANGUAGE PLPGSQL;

CREATE TRIGGER duplicate_to_partitioned_table_trigger
AFTER INSERT OR UPDATE OR DELETE ON <source_table_name>
FOR EACH ROW EXECUTE PROCEDURE duplicate_to_partitioned_table();
	-- This function is designed to duplicate all live INSERTS/UPDATES/DELETES from one table (referred to as "source_table_name"
	-- to a second partitioned table (referred to as "destination_table_name"). The function should be set to trigger after insert/
	-- update/delete on the source table.

	-- This function is designed to be leveraged for partitioned table migration through this method:
	-- 1) Create an empty partitioned copy of the "source_table_name". Alter primary key as necessary, as partitioned Postgres
	-- tables do not support unique/primary keys not included in the partition key.
	-- 2) Create the following function, and attach it as a trigger to "source_table_name". At this point, incoming new DML is
	-- being copied successfully to the partitioned table, so only historical data will need to be backfilled.
	-- 3) Target rows in "source_table_name" with an updated_at value BEFORE the trigger was attached, and backfill them into
	-- "destination_table_name" through bulk inserts. When conflict on primary key occurs, do nothing -- as this means that
	-- live traffic has concurrently loaded the value to "destination_table_name", and should take priority. Backfill may be
	-- completed via a script, or directly on Postgres using Postgres functions.

	-- The following assumptions are made for the purposes of this function:
	-- 1) The "destination_table_name" has a valid PRIMARY KEY (PK) present in the partition key. This PK may be utilized by
	-- live traffic to perform "INSERT ... ON CONFLICT ..." (aka, "upsert") queries
	-- 2) The "source_table_name" has a fields called "updated_at" (timestamp), "created_at" (timestamp), and "value" (any,
	-- used as an example.

	-- NOTE: THIS IS AN EXAMPLE. REAL TABLE/COLUMN NAMES MUST BE CORRECTED, AND IT IS LIKELY THAT THE FUNCTION DEFINITION WILL
	-- REQUIRE CHANGE FOR REAL APPLICATIONS

	CREATE OR REPLACE FUNCTION duplicate_to_partitioned_table()
	RETURNS TRIGGER AS
	$$
	BEGIN
	IF ( TG_OP = 'INSERT') THEN
	-- 1) Insert row. Because the table has a PRIMARY KEY, it is important that this fails if a row for the PK already exists
	INSERT INTO <destination_table_name> VALUES (NEW.*);
	RETURN NEW;
	ELSIF ( TG_OP = 'UPDATE') THEN
	-- 2) Upsert into new table. If not exists, write new value. If the value already exists, update only the values which
	-- should be updated. In this example, the "created_at" value should remain the same.
	INSERT INTO <destination_table_name> VALUES (NEW.*)
	ON CONFLICT <primary_key> DO UPDATE
	SET value = NEW.value, updated_at = NEW.updated_at, created_at = OLD.created_at;
	RETURN NEW;
	ELSIF ( TG_OP = 'DELETE') THEN
	-- 3) Delete row.
	DELETE FROM <destination_table_name> WHERE id = OLD.id;
	RETURN OLD;
	END IF;
	END;
	$$ LANGUAGE PLPGSQL;

	CREATE TRIGGER duplicate_to_partitioned_table_trigger
	AFTER INSERT OR UPDATE OR DELETE ON <source_table_name>
	FOR EACH ROW EXECUTE PROCEDURE duplicate_to_partitioned_table();