Skip to content

Instantly share code, notes, and snippets.

@KMahoney
Created June 27, 2015 19:52
Show Gist options
  • Star 10 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save KMahoney/dcc12d3ff6a49c11cdc9 to your computer and use it in GitHub Desktop.
Save KMahoney/dcc12d3ff6a49c11cdc9 to your computer and use it in GitHub Desktop.
-- So as an example of using a log orientated approach in PostgreSQL,
-- let's write a simple blog application. We will want to be able to:
-- * Write and edit blog posts
-- * Publish revisions of posts for public viewing
-- * Delete posts
-- * Add or remove tags to posts
-- Let's start by creating a schema.
DROP SCHEMA IF EXISTS logblog CASCADE;
CREATE SCHEMA logblog;
-- Article Table
-- Create a table representing the set of article slugs. This allows us
-- to reference a slug as a foriegn key and maintain
-- consistency. PostgreSQL would not allow you to reference the `slug`
-- field in the `article_revision` table in a foreign key as it is not
-- unique in that table.
CREATE TABLE logblog.article (
slug TEXT NOT NULL PRIMARY KEY CHECK(slug SIMILAR TO '[-a-z]+')
);
-- Revision Table
-- Next is an immutable log of article revisions. This should look fairly
-- straight forward. We can create new articles by inserting the slug
-- into `article` and the revision into `article_revision` inside a
-- transaction, and update an article by simply inserting the revision
-- (I'll show you later).
CREATE SEQUENCE logblog.revision_id_seq;
CREATE TABLE logblog.article_revision (
revision_id INTEGER PRIMARY KEY DEFAULT nextval('logblog.revision_id_seq'),
timestamp TIMESTAMP NOT NULL DEFAULT now(),
slug TEXT NOT NULL REFERENCES logblog.article,
title TEXT NOT NULL,
content TEXT NOT NULL
);
-- Publishing
-- Now an immutable log of article publish events. The public will be
-- able to see the last published version of an article. This means you
-- can publish an earlier revision to 'rollback' an article.
-- Note the timestamp when an article was published or deleted is
-- distinct from when the revision was created. Readers are probably more
-- interested in when an article was first published than when it was
-- first drafted.
CREATE TABLE logblog.article_publish (
timestamp TIMESTAMP NOT NULL DEFAULT now(),
revision_id INTEGER REFERENCES logblog.article_revision
);
-- Deleting
-- An immutable log of article deletion events. Articles are only
-- considered deleted when the deletion timestamp is later than any
-- publish actions. This means articles can be 'undeleted' by
-- re-publishing them. In a log orientated database no data is every
-- truly removed. This is something you'll have to take into account
-- if you're handling sensitive data.
CREATE TABLE logblog.article_deletion (
timestamp TIMESTAMP NOT NULL DEFAULT now(),
slug TEXT NOT NULL REFERENCES logblog.article
);
-- Tagging
-- An immutable log of tag events. It's awkward to create a tag table
-- with a set of unique tag names like we do with articles, so instead
-- we just record tag events. This is a bit lazy as it doesn't enforce
-- consistency with revoked tags (i.e. you can revoke a non-existant
-- tag).
CREATE TYPE logblog.tag_event_type AS ENUM ('add', 'revoke');
CREATE TABLE logblog.tag_event (
timestamp TIMESTAMP NOT NULL DEFAULT now(),
slug TEXT NOT NULL REFERENCES logblog.article,
event logblog.tag_event_type NOT NULL,
tag TEXT NOT NULL
);
-- Building Views
-- In order to easily query the current state of our application we can
-- build up some PostgreSQL views to make it easier for us. We're going
-- to make heavy use of `DISTINCT ON` to find the latest state of each
-- component.
-- This view is the latest deletion date for an article (if applicable)
CREATE VIEW logblog.last_deleted_view AS
SELECT DISTINCT ON (slug) timestamp AS deleted_on, slug
FROM logblog.article_deletion
ORDER BY slug, deleted_on DESC;
-- We will want to show users the latest published content of an article.
CREATE VIEW logblog.last_published_view AS
SELECT DISTINCT ON (rev.slug)
rev.revision_id,
pub.timestamp AS last_updated_on,
rev.slug,
rev.title,
rev.content
FROM logblog.article_publish AS pub
INNER JOIN logblog.article_revision AS rev ON rev.revision_id = pub.revision_id
ORDER BY rev.slug, last_updated_on DESC;
-- We'll also want to know when an article was first published, as this
-- is the date you usually show on an article (maybe you could use the
-- last published timestamp to show when it was last updated).
CREATE VIEW logblog.first_published_view AS
SELECT DISTINCT ON (rev.slug)
rev.slug,
pub.timestamp AS first_published_on
FROM logblog.article_publish AS pub
INNER JOIN logblog.article_revision AS rev ON rev.revision_id = pub.revision_id
ORDER BY rev.slug, first_published_on;
-- We'll aggregate the tags as a PostgreSQL array for convenience.
CREATE VIEW logblog.article_tag_view AS
WITH last_tag_event AS
(SELECT DISTINCT ON (slug, tag) *
FROM logblog.tag_event
ORDER BY slug, tag, timestamp DESC)
SELECT slug, array_agg(tag) AS tags
FROM last_tag_event
WHERE event = 'add'
GROUP BY slug;
-- The Public's View
-- Here we use the previous views as building blocks to create our public
-- article view. Note we don't show articles that have a deletion date
-- later than the last published date.
CREATE VIEW logblog.public_article_view AS
SELECT last_pub.slug,
first_pub.first_published_on,
last_pub.last_updated_on,
last_pub.title,
last_pub.content,
COALESCE(tags.tags, '{}'::TEXT[]) AS tags
FROM logblog.last_published_view AS last_pub
LEFT JOIN logblog.last_deleted_view AS del
ON del.slug = last_pub.slug
LEFT JOIN logblog.first_published_view AS first_pub
ON first_pub.slug = last_pub.slug
LEFT JOIN logblog.article_tag_view AS tags
ON tags.slug = last_pub.slug
WHERE NOT COALESCE(del.deleted_on > last_pub.last_updated_on, false)
ORDER BY first_pub.first_published_on;
-- The Life of a Blog Post
-- To finish, a fun query to show the entire history of an article.
CREATE VIEW logblog.article_history_view AS
WITH
revision_events AS
(SELECT timestamp,
slug,
('Created article revision ' || revision_id)::TEXT AS event
FROM logblog.article_revision),
publish_events AS
(SELECT pub.timestamp,
rev.slug,
('Published revision ' || pub.revision_id)::TEXT AS event
FROM logblog.article_publish AS pub
INNER JOIN logblog.article_revision AS rev
ON rev.revision_id = pub.revision_id),
deletion_events AS
(SELECT timestamp,
slug,
'Deleted article'::TEXT AS event
FROM logblog.article_deletion),
tag_events AS
(SELECT timestamp,
slug,
(CASE
WHEN event = 'add' THEN ('Added tag ' || tag)
WHEN event = 'revoke' THEN ('Deleted tag ' || tag)
END)::TEXT AS event
FROM logblog.tag_event)
(SELECT * FROM revision_events)
UNION (SELECT * FROM publish_events)
UNION (SELECT * FROM deletion_events)
UNION (SELECT * FROM tag_events);
-- Improving read performance
CREATE TABLE logblog.public_article_state (
slug TEXT PRIMARY KEY REFERENCES logblog.article,
title TEXT,
content TEXT
);
CREATE FUNCTION logblog.insert_slug() RETURNS trigger AS $$
BEGIN
INSERT INTO logblog.public_article_state (slug) VALUES (NEW.slug);
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER insert_slug
AFTER INSERT ON logblog.article
FOR EACH ROW EXECUTE PROCEDURE logblog.insert_slug();
CREATE FUNCTION logblog.update_content() RETURNS trigger AS $$
BEGIN
UPDATE logblog.public_article_state
SET title = rev.title, content = rev.content
FROM logblog.article_revision AS rev
WHERE rev.slug = logblog.public_article_state.slug
AND rev.revision_id = NEW.revision_id;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER update_content
AFTER INSERT ON logblog.article_publish
FOR EACH ROW EXECUTE PROCEDURE logblog.update_content();
-- Test Data
-- This is how you create a new article. If the article slug already
-- exists the transaction will abort.
BEGIN;
INSERT INTO logblog.article VALUES ('simple');
INSERT INTO logblog.article_revision (revision_id, timestamp, slug, title, content) VALUES
(1, '2015-01-01 00:00:00', 'simple',
'A Simple Title',
'This is a simple published article');
COMMIT;
-- Pubish the article
INSERT INTO logblog.article_publish (timestamp, revision_id) VALUES
('2015-01-01 01:00:00', 1);
-- Tag the article
INSERT INTO logblog.tag_event (timestamp, slug, event, tag) VALUES
('2015-01-01 02:00:00', 'simple', 'add', 'simple-tag');
-- Some more articles
INSERT INTO logblog.article VALUES
('revised'), ('deleted'), ('unpublished'), ('unpublished-revision'), ('republished');
INSERT INTO logblog.article_revision (revision_id, timestamp, slug, title, content) VALUES
(2, '2015-01-01 00:00:00', 'revised',
'Revised',
'You will not see this content because it has been revised.'),
(3, '2015-01-01 02:00:00', 'revised',
'Revised',
'This is revised and published content.'),
(4, '2015-01-01 00:00:00', 'deleted',
'Deleted',
'This is a deleted article. You should not see this.'),
(5, '2015-01-01 00:00:00', 'unpublished',
'Unpublished',
'This content was never published :('),
(6, '2015-01-01 00:00:00', 'unpublished-revision',
'Unpublished Revision',
'This article has revised content you can not see yet.'),
(7, '2015-01-01 02:00:00', 'unpublished-revision',
'Unpublished Revision',
'This is revised content you can not see.'),
(8, '2015-01-01 00:00:00', 'republished',
'Republished',
'This article was deleted then re-published.');
INSERT INTO logblog.article_publish (timestamp, revision_id) VALUES
('2015-01-01 01:00:00', 2),
('2015-01-01 03:00:00', 3),
('2015-01-01 01:00:00', 4),
('2015-01-01 01:00:00', 6),
('2015-01-01 01:00:00', 8),
('2015-01-01 03:00:00', 8);
INSERT INTO logblog.article_deletion (timestamp, slug) VALUES
('2015-01-01 02:00:00', 'deleted'),
('2015-01-01 02:00:00', 'republished');
INSERT INTO logblog.tag_event (timestamp, slug, event, tag) VALUES
('2015-01-01 01:00:00', 'revised', 'add', 'lots'),
('2015-01-01 02:00:00', 'revised', 'add', 'of'),
('2015-01-01 03:00:00', 'revised', 'add', 'tags'),
('2015-01-01 04:00:00', 'revised', 'add', 'deleted-tag'),
('2015-01-01 05:00:00', 'revised', 'revoke', 'deleted-tag');
-- Just in case you want to add further revisions
ALTER SEQUENCE logblog.revision_id_seq RESTART WITH 9;
SELECT slug, first_published_on, content FROM logblog.public_article_view;
SELECT * FROM logblog.article_history_view ORDER BY slug, timestamp;
EXPLAIN SELECT * FROM logblog.public_article_view;
SELECT * FROM logblog.public_article_state;
EXPLAIN SELECT * FROM logblog.public_article_state;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment