Last active
July 18, 2024 10:23
-
-
Save bokwoon95/4fd34a78e72b2935e78ec0f40e7e49e1 to your computer and use it in GitHub Desktop.
how to model threaded comments (e.g. reddit comments) in SQL with a simple 'ancestors' column
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- how to model threaded comments (e.g. reddit comments) in SQL with a simple 'ancestors' column | |
-- The comment tree: | |
-- [1] | |
-- / \ | |
-- [2] [4] | |
-- / \ \ | |
-- [3] [7] [6] | |
-- / | |
-- [5] | |
-- This is an improved version of the classic adjacency list in SQL used to model | |
-- trees. By replacing the parent_id with an array of ancestors, one can query | |
-- all nested child elements without the need for recursive queries. This makes it | |
-- more efficient. This works for any database that supports array-like data | |
-- types (Postgres supports arrays natively, while MySQL and Sqlite3 support JSON | |
-- arrays) | |
-- This method is referred to as the 'Path Enumeration' in | |
-- https://stackoverflow.com/a/192462. The stackoverflow answer espouses something | |
-- called a 'Closure Table', but Path Enumeration is much simpler to use. It is | |
-- also mentioned in | |
-- https://www.sqlteam.com/articles/more-trees-hierarchies-in-sql as a better | |
-- alternative to Joe Celko's 'Nested Set' model. | |
-- Postgres -- | |
CREATE TABLE comments ( | |
comment_id INT UNIQUE, | |
ancestors INT[] | |
); | |
INSERT INTO comments | |
(comment_id, ancestors) | |
VALUES | |
(1, NULL), | |
(2, '{1}'), | |
(4, '{1}'), | |
(3, '{1, 2}'), | |
(7, '{1, 2}'), | |
(6, '{1, 4}'), | |
(5, '{1, 2, 3}'); | |
-- Postgres: Get all ancestors of comment 3, ordered by depth | |
SELECT | |
ancestors | |
FROM | |
comments | |
WHERE | |
comment_id = 3 | |
; | |
-- Postgres: Get all descendants of comment 1, ordered by depth | |
SELECT | |
comment_id, | |
array_length(ancestors) AS depth | |
FROM | |
comments | |
WHERE | |
1 = ANY(ancestors) | |
ORDER BY | |
depth | |
; | |
-- Postgres: Get all siblings of comment 3 | |
SELECT | |
comment_id | |
FROM | |
comments | |
WHERE | |
ancestors = (SELECT c2.ancestors FROM comments AS c2 WHERE c2.comment_id = 3) | |
AND comment_id <> 3 | |
; | |
-- Postgres: Add comment 8 under comment 7 | |
INSERT INTO comments | |
(comment_id, ancestors) | |
SELECT | |
8, | |
array_append(ancestors, comment_id) | |
FROM | |
comments | |
WHERE | |
comment_id = 7 | |
; | |
-- Postgres: Delete comment 2 and all its child comments | |
DELETE FROM | |
comments | |
WHERE | |
comment_id = 2 | |
OR 2 = ANY(ancestors) | |
; | |
-- MySQL -- | |
CREATE TABLE comments ( | |
comment_id INT UNIQUE, | |
ancestors JSON | |
); | |
INSERT INTO comments | |
(comment_id, ancestors) | |
VALUES | |
(1, NULL), | |
(2, '[1]'), | |
(4, '[1]'), | |
(3, '[1, 2]'), | |
(7, '[1, 2]'), | |
(6, '[1, 4]'), | |
(5, '[1, 2, 3]') | |
; | |
-- MySQL: Get all ancestors of comment 3, ordered by depth | |
SELECT | |
ancestors | |
FROM | |
comments | |
WHERE | |
comment_id = 3 | |
; | |
-- MySQL: Get all descendants of comment 1, ordered by depth | |
SELECT | |
comment_id, | |
json_length(ancestors) AS depth | |
FROM | |
comments | |
WHERE | |
json_contains(ancestors, 1) | |
ORDER BY | |
depth | |
; | |
-- MySQL: Get all siblings of comment 3 | |
SELECT | |
comment_id | |
FROM | |
comments | |
WHERE | |
ancestors = (SELECT c2.ancestors FROM comments AS c2 WHERE c2.comment_id = 3) | |
AND comment_id <> 3 | |
; | |
-- MySQL: Add comment 8 under comment 7 | |
INSERT INTO comments | |
(comment_id, ancestors) | |
SELECT | |
8, | |
json_array_append(ancestors, '$', comment_id) | |
FROM | |
comments | |
WHERE | |
comment_id = 7 | |
; | |
-- MySQL: Delete comment 2 and all its child comments | |
DELETE FROM | |
comments | |
WHERE | |
comment_id = 2 | |
OR json_contains(ancestors, 2) | |
; | |
-- Sqlite3 -- | |
CREATE TABLE comments ( | |
comment_id INT UNIQUE, | |
ancestors JSON | |
); | |
INSERT INTO comments | |
(comment_id, ancestors) | |
VALUES | |
(1, NULL), | |
(2, '[1]'), | |
(4, '[1]'), | |
(3, '[1, 2]'), | |
(7, '[1, 2]'), | |
(6, '[1, 4]'), | |
(5, '[1, 2, 3]') | |
; | |
-- Sqlite3: Get all ancestors of comment 3, ordered by depth | |
SELECT | |
ancestors | |
FROM | |
comments | |
WHERE | |
comment_id = 3 | |
; | |
-- Sqlite3: Get all descendants of comment 1, ordered by depth | |
SELECT | |
comment_id, | |
json_array_length(ancestors) AS depth | |
FROM | |
comments | |
WHERE | |
EXISTS(SELECT * FROM json_each(ancestors) WHERE json_each.value = 1) | |
ORDER BY | |
depth | |
; | |
-- Sqlite3: Get all siblings of comment 3 | |
SELECT | |
comment_id | |
FROM | |
comments | |
WHERE | |
ancestors = (SELECT c2.ancestors FROM comments AS c2 WHERE c2.comment_id = 3) | |
AND comment_id <> 3 | |
; | |
-- Sqlite3: Add comment 8 under comment 7 | |
INSERT INTO comments | |
(comment_id, ancestors) | |
SELECT | |
8, | |
json_insert(ancestors, '$[#]', comment_id) | |
FROM | |
comments | |
WHERE | |
comment_id = 7 | |
; | |
-- Sqlite3: Reparent | |
-- Sqlite3: Delete comment 2 and all its child comments | |
DELETE FROM | |
comments | |
WHERE | |
comment_id = 2 | |
OR EXISTS(SELECT * FROM json_each(ancestors) WHERE json_each.value = 2) | |
; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment