Last active
April 4, 2018 21:11
-
-
Save JoshCheek/85c779a475b1d97bf19d4838bc7ce084 to your computer and use it in GitHub Desktop.
Postgresql: Diffing tables
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# UPDATE: I've stopped putting updates into this gist | |
# as it's starting to get into the internal structure of our db | |
# Instead, you need to get the link to the private gist. | |
# You can ask me or Matt for it. | |
# ----------------------------- | |
# CURRENT STATUS: | |
# * handles composite primary keys | |
# * handles case sensitive primary keys | |
# * handles 3 tables | |
# * original: the common ancestor (aka parent, my naming is inconsistent in this gist) | |
# * pcr: one of the children, represents the data in the pcr schema | |
# * prod: the other child, represents the data in the public schema | |
# * identifies differences (in either table) | |
# * rows added | |
# * rows deleted | |
# * rows modified | |
# * does not identify unchanged rows | |
# | |
# STILL TO DO: | |
# * emit these on a per-column basis instead of a per-row basis | |
# ===== SETUP ===== | |
require 'pg' | |
db = PG.connect dbname: 'josh_testing' | |
db.exec 'begin' | |
def db.exec(*) | |
super.to_a | |
end | |
def db.exec_params(*) | |
super.to_a | |
end | |
# db.exec 'create extension hstore;' | |
# ===== FUNCTIONS ===== | |
db.exec <<~SQL | |
-- https://wiki.postgresql.org/wiki/Retrieve_primary_key_columns | |
-- select (get_primary_keys('tablename')).* | |
create or replace function get_primary_keys(table_name text) | |
returns table(col_name text, data_type text) | |
as $$ | |
SELECT | |
a.attname::text as col_name, | |
format_type(a.atttypid, a.atttypmod) AS data_type | |
FROM pg_index i | |
JOIN pg_attribute a | |
ON a.attrelid = i.indrelid | |
AND a.attnum = ANY(i.indkey) | |
WHERE i.indrelid = table_name::regclass | |
AND i.indisprimary; | |
$$ language sql; | |
create or replace function diff_table(orig_table text, pcr_table text, prod_table text) | |
returns table(pks text, table_name text, column_name text, orig_value text, pcr_value text, prod_value text) | |
as $fn$ | |
declare | |
pks text := (select string_agg(quote_ident(col_name), ',') | |
from get_primary_keys(orig_table)); | |
query_body text := format($$ | |
select | |
(%1$s)::text as pks, | |
%2$L::text as table_name, | |
'FIXME'::text as column_name, | |
orig.val1 as orig_value, | |
pcr.val1 as pcr_value, | |
prod.val1 as prod_value | |
from %2$I as orig | |
full join %3$I as pcr using (%1$s) | |
full join %4$I as prod using (%1$s) | |
where pcr is distinct from prod | |
$$, | |
pks, orig_table, pcr_table, prod_table | |
); | |
begin | |
-- raise notice 'QUERY: %', query_body; | |
return query execute query_body; | |
-- TODO: | |
-- For each row in the above query, | |
-- instead of returning that row, | |
-- pass it to a "diff-records" fn | |
-- and have that fn emit column-name,orig,pcr,prod | |
-- for each differing column | |
end $fn$ language plpgsql; | |
SQL | |
# ===== TEST 1 ===== | |
db.exec <<~SQL | |
-- composite primary key with case sensitive column names | |
create table parent (id1 integer, "iD2" integer, val1 text, primary key (id1, "iD2")); | |
create table pcr (id1 integer, "iD2" integer, val1 text, primary key (id1, "iD2")); | |
create table prod (id1 integer, "iD2" integer, val1 text, primary key (id1, "iD2")); | |
insert into parent (id1, "iD2", val1) | |
values (1, 1, 'unchanged'), | |
(2, 1, 'deleted-pcr'), | |
(2, 2, 'deleted-prod'), | |
(2, 3, 'deleted-both'), | |
(3, 1, 'modified-pcr'), | |
(3, 2, 'modified-prod'), | |
(3, 3, 'modified-both-same'), | |
(3, 4, 'modified-both-different'); | |
insert into pcr (id1, "iD2", val1) | |
values (1, 1, 'unchanged'), | |
(2, 2, 'deleted-prod'), | |
(3, 1, 'MODIFIED-PCR'), | |
(3, 2, 'modified-prod'), | |
(3, 3, 'MODIFIED-BOTH-SAME'), | |
(3, 4, 'MODIFIED-BOTH-DIFFERENT-PCR'), | |
(4, 1, 'added-pcr'); | |
insert into prod (id1, "iD2", val1) | |
values (1, 1, 'unchanged'), | |
(2, 1, 'deleted-pcr'), | |
(3, 1, 'modified-pcr'), | |
(3, 2, 'MODIFIED-PROD'), | |
(3, 3, 'MODIFIED-BOTH-SAME'), | |
(3, 4, 'MODIFIED-BOTH-DIFFERENT-PROD'), | |
(4, 2, 'added-prod'); | |
SQL | |
result = db.exec("select * from diff_table('parent', 'pcr', 'prod')").sort_by { |r| r['pks'] } | |
EXPECTED_IDS = %w[(2,1) (2,2) (3,1) (3,2) (3,4) (4,1) (4,2)] | |
ACTUAL_IDS = result.map { |row| row['pks'] } | |
headers = result.first.to_h.keys | |
rows = [ | |
headers, | |
headers.map { |h| '-'*h.length }, | |
*result.map { |h| | |
[ h["pks"], | |
h["table_name"].inspect, | |
h["column_name"].inspect, | |
h["orig_value"].inspect, | |
h["pcr_value"].inspect, | |
h["prod_value"].inspect, | |
] | |
} | |
] | |
format = rows.transpose.map { |col| col.map(&:length).max }.map { |n| "%-#{n}s" }.join(" ") | |
rows.each { |row| puts (format % row).strip } | |
if EXPECTED_IDS != ACTUAL_IDS | |
EXPECTED_IDS # => | |
ACTUAL_IDS # => | |
raise "EXPECTED: #{EXPECTED_IDS.inspect}\nACTUAL: #{ACTUAL_IDS.inspect}" | |
end | |
# >> pks table_name column_name orig_value pcr_value prod_value | |
# >> --- ---------- ----------- ---------- --------- ---------- | |
# >> (2,1) "parent" "FIXME" "deleted-pcr" nil "deleted-pcr" | |
# >> (2,2) "parent" "FIXME" "deleted-prod" "deleted-prod" nil | |
# >> (3,1) "parent" "FIXME" "modified-pcr" "MODIFIED-PCR" "modified-pcr" | |
# >> (3,2) "parent" "FIXME" "modified-prod" "modified-prod" "MODIFIED-PROD" | |
# >> (3,4) "parent" "FIXME" "modified-both-different" "MODIFIED-BOTH-DIFFERENT-PCR" "MODIFIED-BOTH-DIFFERENT-PROD" | |
# >> (4,1) "parent" "FIXME" nil "added-pcr" nil | |
# >> (4,2) "parent" "FIXME" nil nil "added-prod" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment