Skip to content

Instantly share code, notes, and snippets.

@JoshCheek
Last active April 4, 2018 21:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save JoshCheek/85c779a475b1d97bf19d4838bc7ce084 to your computer and use it in GitHub Desktop.
Save JoshCheek/85c779a475b1d97bf19d4838bc7ce084 to your computer and use it in GitHub Desktop.
Postgresql: Diffing tables
# UPDATE: I've stopped putting updates into this gist
# as it's starting to get into the internal structure of our db
# Instead, you need to get the link to the private gist.
# You can ask me or Matt for it.
# -----------------------------
# CURRENT STATUS:
# * handles composite primary keys
# * handles case sensitive primary keys
# * handles 3 tables
# * original: the common ancestor (aka parent, my naming is inconsistent in this gist)
# * pcr: one of the children, represents the data in the pcr schema
# * prod: the other child, represents the data in the public schema
# * identifies differences (in either table)
# * rows added
# * rows deleted
# * rows modified
# * does not identify unchanged rows
#
# STILL TO DO:
# * emit these on a per-column basis instead of a per-row basis
# ===== SETUP =====
require 'pg'
db = PG.connect dbname: 'josh_testing'
db.exec 'begin'
def db.exec(*)
super.to_a
end
def db.exec_params(*)
super.to_a
end
# db.exec 'create extension hstore;'
# ===== FUNCTIONS =====
db.exec <<~SQL
-- https://wiki.postgresql.org/wiki/Retrieve_primary_key_columns
-- select (get_primary_keys('tablename')).*
create or replace function get_primary_keys(table_name text)
returns table(col_name text, data_type text)
as $$
SELECT
a.attname::text as col_name,
format_type(a.atttypid, a.atttypmod) AS data_type
FROM pg_index i
JOIN pg_attribute a
ON a.attrelid = i.indrelid
AND a.attnum = ANY(i.indkey)
WHERE i.indrelid = table_name::regclass
AND i.indisprimary;
$$ language sql;
create or replace function diff_table(orig_table text, pcr_table text, prod_table text)
returns table(pks text, table_name text, column_name text, orig_value text, pcr_value text, prod_value text)
as $fn$
declare
pks text := (select string_agg(quote_ident(col_name), ',')
from get_primary_keys(orig_table));
query_body text := format($$
select
(%1$s)::text as pks,
%2$L::text as table_name,
'FIXME'::text as column_name,
orig.val1 as orig_value,
pcr.val1 as pcr_value,
prod.val1 as prod_value
from %2$I as orig
full join %3$I as pcr using (%1$s)
full join %4$I as prod using (%1$s)
where pcr is distinct from prod
$$,
pks, orig_table, pcr_table, prod_table
);
begin
-- raise notice 'QUERY: %', query_body;
return query execute query_body;
-- TODO:
-- For each row in the above query,
-- instead of returning that row,
-- pass it to a "diff-records" fn
-- and have that fn emit column-name,orig,pcr,prod
-- for each differing column
end $fn$ language plpgsql;
SQL
# ===== TEST 1 =====
db.exec <<~SQL
-- composite primary key with case sensitive column names
create table parent (id1 integer, "iD2" integer, val1 text, primary key (id1, "iD2"));
create table pcr (id1 integer, "iD2" integer, val1 text, primary key (id1, "iD2"));
create table prod (id1 integer, "iD2" integer, val1 text, primary key (id1, "iD2"));
insert into parent (id1, "iD2", val1)
values (1, 1, 'unchanged'),
(2, 1, 'deleted-pcr'),
(2, 2, 'deleted-prod'),
(2, 3, 'deleted-both'),
(3, 1, 'modified-pcr'),
(3, 2, 'modified-prod'),
(3, 3, 'modified-both-same'),
(3, 4, 'modified-both-different');
insert into pcr (id1, "iD2", val1)
values (1, 1, 'unchanged'),
(2, 2, 'deleted-prod'),
(3, 1, 'MODIFIED-PCR'),
(3, 2, 'modified-prod'),
(3, 3, 'MODIFIED-BOTH-SAME'),
(3, 4, 'MODIFIED-BOTH-DIFFERENT-PCR'),
(4, 1, 'added-pcr');
insert into prod (id1, "iD2", val1)
values (1, 1, 'unchanged'),
(2, 1, 'deleted-pcr'),
(3, 1, 'modified-pcr'),
(3, 2, 'MODIFIED-PROD'),
(3, 3, 'MODIFIED-BOTH-SAME'),
(3, 4, 'MODIFIED-BOTH-DIFFERENT-PROD'),
(4, 2, 'added-prod');
SQL
result = db.exec("select * from diff_table('parent', 'pcr', 'prod')").sort_by { |r| r['pks'] }
EXPECTED_IDS = %w[(2,1) (2,2) (3,1) (3,2) (3,4) (4,1) (4,2)]
ACTUAL_IDS = result.map { |row| row['pks'] }
headers = result.first.to_h.keys
rows = [
headers,
headers.map { |h| '-'*h.length },
*result.map { |h|
[ h["pks"],
h["table_name"].inspect,
h["column_name"].inspect,
h["orig_value"].inspect,
h["pcr_value"].inspect,
h["prod_value"].inspect,
]
}
]
format = rows.transpose.map { |col| col.map(&:length).max }.map { |n| "%-#{n}s" }.join(" ")
rows.each { |row| puts (format % row).strip }
if EXPECTED_IDS != ACTUAL_IDS
EXPECTED_IDS # =>
ACTUAL_IDS # =>
raise "EXPECTED: #{EXPECTED_IDS.inspect}\nACTUAL: #{ACTUAL_IDS.inspect}"
end
# >> pks table_name column_name orig_value pcr_value prod_value
# >> --- ---------- ----------- ---------- --------- ----------
# >> (2,1) "parent" "FIXME" "deleted-pcr" nil "deleted-pcr"
# >> (2,2) "parent" "FIXME" "deleted-prod" "deleted-prod" nil
# >> (3,1) "parent" "FIXME" "modified-pcr" "MODIFIED-PCR" "modified-pcr"
# >> (3,2) "parent" "FIXME" "modified-prod" "modified-prod" "MODIFIED-PROD"
# >> (3,4) "parent" "FIXME" "modified-both-different" "MODIFIED-BOTH-DIFFERENT-PCR" "MODIFIED-BOTH-DIFFERENT-PROD"
# >> (4,1) "parent" "FIXME" nil "added-pcr" nil
# >> (4,2) "parent" "FIXME" nil nil "added-prod"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment