Skip to content

Instantly share code, notes, and snippets.

@bokwoon95
Last active March 22, 2024 22:31
Show Gist options
  • Star 19 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save bokwoon95/4fd34a78e72b2935e78ec0f40e7e49e1 to your computer and use it in GitHub Desktop.
Save bokwoon95/4fd34a78e72b2935e78ec0f40e7e49e1 to your computer and use it in GitHub Desktop.
how to model threaded comments (e.g. reddit comments) in SQL with a simple 'ancestors' column
-- how to model threaded comments (e.g. reddit comments) in SQL with a simple 'ancestors' column
-- The comment tree:
-- [1]
-- / \
-- [2] [4]
-- / \ \
-- [3] [7] [6]
-- /
-- [5]
-- This is an improved version of the classic adjacency list in SQL used to model
-- trees. By replacing the parent_id with an array of ancestors, one can query
-- all nested child elements without the need for recursive queries. This makes it
-- more efficient. This works for any database that supports array-like data
-- types (Postgres supports arrays natively, while MySQL and Sqlite3 support JSON
-- arrays)
-- This method is referred to as the 'Path Enumeration' in
-- https://stackoverflow.com/a/192462. The stackoverflow answer espouses something
-- called a 'Closure Table', but Path Enumeration is much simpler to use. It is
-- also mentioned in
-- https://www.sqlteam.com/articles/more-trees-hierarchies-in-sql as a better
-- alternative to Joe Celko's 'Nested Set' model.
-- Postgres --
CREATE TABLE comments (
comment_id INT UNIQUE,
ancestors INT[]
);
INSERT INTO comments
(comment_id, ancestors)
VALUES
(1, NULL),
(2, '{1}'),
(4, '{1}'),
(3, '{1, 2}'),
(7, '{1, 2}'),
(6, '{1, 4}'),
(5, '{1, 2, 3}');
-- Postgres: Get all ancestors of comment 3, ordered by depth
SELECT
ancestors
FROM
comments
WHERE
comment_id = 3
;
-- Postgres: Get all descendants of comment 1, ordered by depth
SELECT
comment_id,
array_length(ancestors) AS depth
FROM
comments
WHERE
1 = ANY(ancestors)
ORDER BY
depth
;
-- Postgres: Get all siblings of comment 3
SELECT
comment_id
FROM
comments
WHERE
ancestors = (SELECT c2.ancestors FROM comments AS c2 WHERE c2.comment_id = 3)
AND comment_id <> 3
;
-- Postgres: Add comment 8 under comment 7
INSERT INTO comments
(comment_id, ancestors)
SELECT
8,
array_append(ancestors, comment_id)
FROM
comments
WHERE
comment_id = 7
;
-- Postgres: Delete comment 2 and all its child comments
DELETE FROM
comments
WHERE
comment_id = 2
OR 2 = ANY(ancestors)
;
-- MySQL --
CREATE TABLE comments (
comment_id INT UNIQUE,
ancestors JSON
);
INSERT INTO comments
(comment_id, ancestors)
VALUES
(1, NULL),
(2, '[1]'),
(4, '[1]'),
(3, '[1, 2]'),
(7, '[1, 2]'),
(6, '[1, 4]'),
(5, '[1, 2, 3]')
;
-- MySQL: Get all ancestors of comment 3, ordered by depth
SELECT
ancestors
FROM
comments
WHERE
comment_id = 3
;
-- MySQL: Get all descendants of comment 1, ordered by depth
SELECT
comment_id,
json_length(ancestors) AS depth
FROM
comments
WHERE
json_contains(ancestors, 1)
ORDER BY
depth
;
-- MySQL: Get all siblings of comment 3
SELECT
comment_id
FROM
comments
WHERE
ancestors = (SELECT c2.ancestors FROM comments AS c2 WHERE c2.comment_id = 3)
AND comment_id <> 3
;
-- MySQL: Add comment 8 under comment 7
INSERT INTO comments
(comment_id, ancestors)
SELECT
8,
json_array_append(ancestors, '$', comment_id)
FROM
comments
WHERE
comment_id = 7
;
-- MySQL: Delete comment 2 and all its child comments
DELETE FROM
comments
WHERE
comment_id = 2
OR json_contains(ancestors, 2)
;
-- Sqlite3 --
CREATE TABLE comments (
comment_id INT UNIQUE,
ancestors JSON
);
INSERT INTO comments
(comment_id, ancestors)
VALUES
(1, NULL),
(2, '[1]'),
(4, '[1]'),
(3, '[1, 2]'),
(7, '[1, 2]'),
(6, '[1, 4]'),
(5, '[1, 2, 3]')
;
-- Sqlite3: Get all ancestors of comment 3, ordered by depth
SELECT
ancestors
FROM
comments
WHERE
comment_id = 3
;
-- Sqlite3: Get all descendants of comment 1, ordered by depth
SELECT
comment_id,
json_array_length(ancestors) AS depth
FROM
comments
WHERE
EXISTS(SELECT * FROM json_each(ancestors) WHERE json_each.value = 1)
ORDER BY
depth
;
-- Sqlite3: Get all siblings of comment 3
SELECT
comment_id
FROM
comments
WHERE
ancestors = (SELECT c2.ancestors FROM comments AS c2 WHERE c2.comment_id = 3)
AND comment_id <> 3
;
-- Sqlite3: Add comment 8 under comment 7
INSERT INTO comments
(comment_id, ancestors)
SELECT
8,
json_insert(ancestors, '$[#]', comment_id)
FROM
comments
WHERE
comment_id = 7
;
-- Sqlite3: Reparent
-- Sqlite3: Delete comment 2 and all its child comments
DELETE FROM
comments
WHERE
comment_id = 2
OR EXISTS(SELECT * FROM json_each(ancestors) WHERE json_each.value = 2)
;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment