Skip to content

Instantly share code, notes, and snippets.

@jpotts18
Last active January 13, 2018 08:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jpotts18/0ed0d557a46b3dc6b7405770e8e78050 to your computer and use it in GitHub Desktop.
Save jpotts18/0ed0d557a46b3dc6b7405770e8e78050 to your computer and use it in GitHub Desktop.
Time-based Differentials
-- $> createdb time_based_differentials
-- $> psql time_based_differentials
-- Setup Basic Tables
CREATE TABLE customers (
id SERIAL UNIQUE NOT NULL,
first_name char(30), -- not unique
last_name char(30),
department char(50),
created_at timestamp NOT NULL DEFAULT NOW(),
deleted_at timestamp
);
INSERT INTO customers (first_name, last_name, department) VALUES
('Michael', 'Scott', 'Manager'),
('Jim', 'Halpert', 'Sales'),
('Dwight', 'Schrute', 'Sales'),
('Andy', 'Bernard', 'Sales'),
('Pam', 'Beasley', 'Admin'),
('Oscar', 'Martinez', 'Accounting'),
('Toby', 'Flinderson', 'HR');
-- \d+ customers
-- select * from customers;
CREATE TABLE products (
id SERIAL UNIQUE NOT NULL,
name char(30),
category char(30),
created_at timestamp NOT NULL DEFAULT NOW(),
deleted_at timestamp
);
INSERT INTO products (name, category) VALUES
('Stapler', 'Office'),
('Paper', 'Office'),
('Throwing Star', 'Weapon'),
('Pepper Spray', 'Weapon'),
('Root Beer', 'Food'),
('Mug', 'Home'),
('Candy Jar', 'Home'),
('Grape Soda', 'Food'),
('Jello', 'Food'),
('Paperclips', 'Office');
-- \d+ products
-- select * from products
CREATE TABLE orders (
id SERIAL UNIQUE NOT NULL,
product_id integer,
customer_id integer,
amount decimal,
shipping decimal,
discount decimal,
created_at timestamp NOT NULL DEFAULT NOW(),
deleted_at timestamp,
FOREIGN KEY (product_id) REFERENCES products (id),
FOREIGN KEY (customer_id) REFERENCES customers (id)
);
INSERT INTO orders (product_id, customer_id, amount, shipping, discount)
SELECT
CEILING(RANDOM() * (SELECT COUNT(*) FROM products)),
CEILING(RANDOM() * (SELECT COUNT(*) FROM customers)),
CEILING(RANDOM() * 19) + 0.99,
CEILING(RANDOM() * 4) + 0.99,
CASE WHEN RANDOM() > 0.8 THEN CEILING(RANDOM() * -5) - 0.99 ELSE 0 END
FROM generate_series(1,50);
-- Add Modified dates to all Tables
ALTER TABLE customers ADD COLUMN modified_at timestamp DEFAULT NOW();
ALTER TABLE orders ADD COLUMN modified_at timestamp DEFAULT NOW();
ALTER TABLE products ADD COLUMN modified_at timestamp DEFAULT NOW();
-- Might make sense to set your columns as the created date as well
-- Create Function
-- https://stackoverflow.com/a/9556527/1736429
CREATE OR REPLACE FUNCTION update_modified_at_column()
RETURNS TRIGGER AS $$
BEGIN
NEW.modified_at = now();
RETURN NEW;
END;
$$ language 'plpgsql';
-- Create Trigger and Apply Function to Tables
-- https://www.openscg.com/2014/05/trigger-overhead/
CREATE TRIGGER update_customers_modified_at
BEFORE UPDATE ON customers FOR EACH ROW EXECUTE PROCEDURE update_modified_at_column();
CREATE TRIGGER update_orders_modified_at
BEFORE UPDATE ON orders FOR EACH ROW EXECUTE PROCEDURE update_modified_at_column();
CREATE TRIGGER update_products_modified_at
BEFORE UPDATE ON products FOR EACH ROW EXECUTE PROCEDURE update_modified_at_column();
-- Verify Trigger
SELECT * from customers where first_name = 'Pam';
UPDATE customers SET last_name = 'Halpert' WHERE last_name = 'Beasley';
SELECT * from customers where first_name = 'Pam';
-- createdb data_warehouse
-- psql data_warehouse
CREATE TABLE etl_log (
id SERIAL UNIQUE NOT NULL,
level char(50) not null default 'debug',
message varchar(255) not null,
job_name char(100), -- not unique
process_name char(100),
created_at timestamp NOT NULL DEFAULT NOW()
);
-- CUSTOMER
-- This table matches the state of the table in the source system
CREATE TABLE source_customers (
id integer,
first_name char(30),
last_name char(30),
department char(50),
created_at timestamp,
deleted_at timestamp,
modified_at timestamp,
dw_updated_at timestamp
);
-- This table hold the diffs and be a copy of the schema source_customers
CREATE TEMP TABLE staging_customers AS TABLE source_customers;
-- This table insulates the data warehouse primary key
-- switch id for key
-- Don't lose this table
CREATE TABLE hub_customers (
key SERIAL UNIQUE NOT NULL,
source char(100),
id integer,
dw_updated_at timestamp
);
-- This table functions as your conformed dimension
CREATE TABLE dim_customers (
id integer,
key integer,
first_name char(30),
last_name char(30),
department char(50),
created_at timestamp,
deleted_at timestamp,
modified_at timestamp,
dw_updated_at timestamp
);
-- PRODUCTS
CREATE TABLE source_products (
id integer,
name char(30),
category char(30),
created_at timestamp,
deleted_at timestamp,
modified_at timestamp
);
CREATE TABLE staging_products AS TABLE source_products;
CREATE TABLE hub_products (
key SERIAL UNIQUE NOT NULL,
source char(100),
id integer,
dw_updated_at timestamp
);
CREATE TABLE dim_products (
id integer,
key integer,
name char(30),
category char(30),
created_at timestamp,
deleted_at timestamp,
modified_at timestamp,
dw_updated_at timestamp
);
-- ORDERS
CREATE TABLE source_orders (
id integer,
product_key integer,
customer_key integer,
amount decimal,
shipping decimal,
discount decimal,
created_at timestamp,
deleted_at timestamp,
modified_at timestamp
)
-- No Hub
-- Temp Stage ->
-- Delete from fact where order_id
-- Insert into fact from stage
CREATE TABLE staging_orders AS TABLE source_orders;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment