This gist contains setup SQL and example data for reproducing an object ordering problem that I'm trying to optimize.
-- For normal_rand() so we can create distributions that are not uniform. | |
-- The "real" distributions are not normal but they are closer to normal than uniform. | |
CREATE EXTENSION tablefunc; | |
BEGIN; | |
-- ########################### | |
-- The base "object" that is displayed using different either id DESC proxy | |
-- or scores. | |
CREATE TABLE objects ( | |
id INTEGER PRIMARY KEY, | |
status SMALLINT DEFAULT 1 | |
); | |
INSERT INTO objects (id) | |
SELECT i AS id FROM generate_series(1,2e6::int) AS i; | |
-- ########################### | |
-- Object scores can be used to order the set. This could be views, replies, etc. | |
CREATE TABLE object_scores ( | |
object_id INTEGER PRIMARY KEY REFERENCES objects(id), | |
score NUMERIC NOT NULL DEFAULT 0 | |
); | |
CREATE INDEX object_scores_score_idx ON object_scores (score); | |
WITH random AS ( | |
SELECT abs(normal_rand(2e6::int, 0, 1)) AS r | |
), random_range AS ( | |
SELECT row_number() OVER () AS id, r AS score | |
FROM random | |
) | |
INSERT INTO object_scores | |
SELECT id, score | |
FROM random_range; | |
-- Mark some of them with a different status for filtering. The status | |
-- could toggle things like visibility. | |
UPDATE objects | |
SET status = 0 | |
FROM ( | |
SELECT trunc(random() * 2e6 + 1) AS id | |
FROM generate_series(1,1e3::int) AS i | |
) AS T | |
WHERE T.id = objects.id; | |
-- ########################### | |
-- Containers (and their objects) live in categories. | |
CREATE TABLE categories ( | |
id SERIAL PRIMARY KEY | |
); | |
INSERT INTO categories (id) | |
SELECT i AS id FROM generate_series(1,10) AS i; | |
-- ########################### | |
-- Objects live in containers. Containers have categories. | |
CREATE TABLE containers ( | |
id SERIAL PRIMARY KEY, | |
status SMALLINT DEFAULT 1, | |
category_id INTEGER REFERENCES categories(id) | |
); | |
WITH random AS ( | |
SELECT trunc(abs(normal_rand((5*1e5)::int, 5, 2))) AS category_id | |
), random_containers AS ( | |
SELECT row_number() OVER () AS id, greatest(1, least(10, category_id)) AS category_id | |
FROM random | |
) | |
INSERT INTO containers (id, category_id) | |
SELECT id, category_id | |
FROM random_containers; | |
-- Change status on some random containers to introduce filtering. | |
UPDATE containers | |
SET status = 0 | |
FROM ( | |
SELECT trunc(random() * (5*1e5) + 1) AS id | |
FROM generate_series(1,(5*1e3::int)) AS i | |
) AS T | |
WHERE T.id = containers.id; | |
-- ########################### | |
-- Objects have one canonical/default container but can live in more than one secondary | |
-- container where is_default is false. | |
CREATE TABLE object_container_map ( | |
object_id INTEGER REFERENCES objects(id), | |
container_id INTEGER REFERENCES containers(id) ON DELETE CASCADE, | |
is_default BOOLEAN DEFAULT true | |
); | |
CREATE UNIQUE INDEX object_container_map_linked ON object_container_map(container_id, object_id) WHERE is_default = FALSE; | |
CREATE UNIQUE INDEX object_container_map_default ON object_container_map(object_id) WHERE is_default = TRUE; | |
-- Default somewhat random mappings | |
WITH random AS ( | |
SELECT trunc(abs(normal_rand(2e6::int, 2.5*1e5, 0.5*1e5))) AS container_id | |
), random_maps AS ( | |
SELECT row_number() OVER () AS object_id, greatest(1, least((5*1e5), container_id)) AS container_id | |
FROM random | |
) | |
INSERT INTO object_container_map (container_id, object_id) | |
SELECT container_id, object_id | |
FROM random_maps; | |
-- Create some meta mappings. This will provide test data for the slow queries below. | |
INSERT INTO categories (id) VALUES (11); | |
DO $$ | |
DECLARE i INTEGER; | |
BEGIN | |
FOR i IN SELECT rownum FROM generate_series(1, 20) AS rownum LOOP | |
INSERT INTO containers (id, category_id) VALUES ((SELECT MAX(id)+1 FROM containers), 11); | |
WITH random_objects AS ( | |
SELECT id AS object_id | |
FROM objects | |
ORDER BY random() | |
LIMIT 80000 | |
) | |
INSERT INTO object_container_map (object_id, container_id, is_default) | |
SELECT DISTINCT object_id, (SELECT MAX(id) FROM containers), false | |
FROM random_objects; | |
END LOOP; | |
END $$; | |
-------------------------------------------------------------------------------- | |
-- Distributions | |
SELECT category_id, COUNT(*) | |
FROM containers | |
GROUP BY category_id; | |
-- The meta mapping should be the largest with ~80k | |
SELECT container_id, COUNT(*) | |
FROM object_container_map | |
GROUP BY container_id | |
ORDER BY COUNT DESC | |
LIMIT 20; | |
SELECT containers.category_id, COUNT(*) | |
FROM objects | |
JOIN object_container_map ON ( | |
object_container_map.object_id = objects.id | |
) | |
JOIN containers ON ( | |
containers.id = object_container_map.container_id | |
) | |
GROUP BY containers.category_id | |
ORDER BY COUNT DESC; | |
-- Slow, but not too concerned | |
EXPLAIN ANALYZE | |
WITH object_positions AS ( | |
SELECT objects.id, row_number() OVER (ORDER BY objects.id DESC) AS index | |
FROM objects | |
JOIN object_container_map ON ( | |
objects.id = object_container_map.object_id AND | |
object_container_map.is_default | |
) | |
JOIN containers ON ( | |
containers.id = object_container_map.container_id | |
) | |
WHERE objects.status = 1 AND containers.status = 1 AND containers.category_id = 9 | |
) | |
SELECT id, index | |
FROM object_positions | |
WHERE index BETWEEN 4000 - 6 AND 4000 + 6 | |
ORDER BY index ASC; | |
CREATE INDEX object_container_map_is_default_idx on object_container_map(container_id) where is_default; | |
-- By container-- this query should be fast ~5ms or less | |
WITH object_positions AS ( | |
SELECT object_container_map.container_id, objects.id, row_number() OVER (ORDER BY objects.id DESC) AS index | |
FROM objects | |
JOIN object_container_map ON ( | |
objects.id = object_container_map.object_id | |
) | |
JOIN containers ON ( | |
containers.id = object_container_map.container_id | |
) | |
WHERE objects.status = 1 AND containers.status = 1 AND containers.id = 254457 AND object_container_map.is_default | |
) | |
SELECT container_id, id, index | |
FROM object_positions | |
WHERE index BETWEEN 17 - 6 AND 17 + 6 | |
ORDER BY index ASC; | |
-- ############################################################################################ | |
-- ###################### Need help here. We need to get this data in ~100ms or less. ######### | |
-- Filter by meta container. This takes about 1500-2500ms once it's in memory on a mac book | |
-- pro (similar on production HW). | |
EXPLAIN (ANALYZE, BUFFERS) | |
WITH object_positions AS ( | |
SELECT object_container_map.container_id, objects.id, containers.category_id, row_number() OVER (PARTITION BY object_container_map.container_id ORDER BY objects.id DESC) AS index | |
FROM objects | |
JOIN object_container_map ON ( | |
objects.id = object_container_map.object_id | |
AND object_container_map.is_default = false | |
) | |
JOIN object_container_map default_map ON ( | |
objects.id = default_map.object_id | |
AND default_map.is_default = true | |
) | |
JOIN containers ON ( | |
containers.id = default_map.container_id | |
) | |
WHERE objects.status = 1 AND containers.status = 1 AND object_container_map.container_id = 500001 | |
AND containers.category_id IN (2,3,4,5,6) | |
ORDER BY objects.id DESC | |
) | |
SELECT row_number() OVER (), container_id, id AS object_id, category_id, index | |
FROM object_positions | |
WHERE index BETWEEN 50000 - 6 AND 50000 + 6 | |
ORDER BY index ASC; | |
-- Ordering by score. Also needs to be ~100ms. | |
EXPLAIN (ANALYZE, BUFFERS) | |
WITH object_positions AS ( | |
SELECT object_container_map.container_id, objects.id, containers.category_id, object_scores.score, row_number() OVER (PARTITION BY object_container_map.container_id ORDER BY object_scores.score DESC) AS index | |
FROM objects | |
JOIN object_scores ON ( | |
object_scores.object_id = objects.id | |
) | |
JOIN object_container_map ON ( | |
objects.id = object_container_map.object_id | |
AND object_container_map.is_default = false | |
) | |
JOIN object_container_map default_map ON ( | |
objects.id = default_map.object_id | |
AND default_map.is_default = true | |
) | |
JOIN containers ON ( | |
containers.id = default_map.container_id | |
) | |
WHERE objects.status = 1 AND containers.status = 1 AND object_container_map.container_id = 500001 | |
AND containers.category_id IN (2,3,4,5,6) | |
) | |
SELECT row_number() OVER (), container_id, id AS object_id, category_id, score, index | |
FROM object_positions | |
WHERE index BETWEEN 50000 - 6 AND 50000 + 6 | |
ORDER BY index ASC; | |
COMMIT; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment