Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
This gist contains setup SQL and example data for reproducing an object ordering problem that I'm trying to optimize.
-- For normal_rand() so we can create distributions that are not uniform.
-- The "real" distributions are not normal but they are closer to normal than uniform.
CREATE EXTENSION tablefunc;
BEGIN;
-- ###########################
-- The base "object" that is displayed using different either id DESC proxy
-- or scores.
CREATE TABLE objects (
id INTEGER PRIMARY KEY,
status SMALLINT DEFAULT 1
);
INSERT INTO objects (id)
SELECT i AS id FROM generate_series(1,2e6::int) AS i;
-- ###########################
-- Object scores can be used to order the set. This could be views, replies, etc.
CREATE TABLE object_scores (
object_id INTEGER PRIMARY KEY REFERENCES objects(id),
score NUMERIC NOT NULL DEFAULT 0
);
CREATE INDEX object_scores_score_idx ON object_scores (score);
WITH random AS (
SELECT abs(normal_rand(2e6::int, 0, 1)) AS r
), random_range AS (
SELECT row_number() OVER () AS id, r AS score
FROM random
)
INSERT INTO object_scores
SELECT id, score
FROM random_range;
-- Mark some of them with a different status for filtering. The status
-- could toggle things like visibility.
UPDATE objects
SET status = 0
FROM (
SELECT trunc(random() * 2e6 + 1) AS id
FROM generate_series(1,1e3::int) AS i
) AS T
WHERE T.id = objects.id;
-- ###########################
-- Containers (and their objects) live in categories.
CREATE TABLE categories (
id SERIAL PRIMARY KEY
);
INSERT INTO categories (id)
SELECT i AS id FROM generate_series(1,10) AS i;
-- ###########################
-- Objects live in containers. Containers have categories.
CREATE TABLE containers (
id SERIAL PRIMARY KEY,
status SMALLINT DEFAULT 1,
category_id INTEGER REFERENCES categories(id)
);
WITH random AS (
SELECT trunc(abs(normal_rand((5*1e5)::int, 5, 2))) AS category_id
), random_containers AS (
SELECT row_number() OVER () AS id, greatest(1, least(10, category_id)) AS category_id
FROM random
)
INSERT INTO containers (id, category_id)
SELECT id, category_id
FROM random_containers;
-- Change status on some random containers to introduce filtering.
UPDATE containers
SET status = 0
FROM (
SELECT trunc(random() * (5*1e5) + 1) AS id
FROM generate_series(1,(5*1e3::int)) AS i
) AS T
WHERE T.id = containers.id;
-- ###########################
-- Objects have one canonical/default container but can live in more than one secondary
-- container where is_default is false.
CREATE TABLE object_container_map (
object_id INTEGER REFERENCES objects(id),
container_id INTEGER REFERENCES containers(id) ON DELETE CASCADE,
is_default BOOLEAN DEFAULT true
);
CREATE UNIQUE INDEX object_container_map_linked ON object_container_map(container_id, object_id) WHERE is_default = FALSE;
CREATE UNIQUE INDEX object_container_map_default ON object_container_map(object_id) WHERE is_default = TRUE;
-- Default somewhat random mappings
WITH random AS (
SELECT trunc(abs(normal_rand(2e6::int, 2.5*1e5, 0.5*1e5))) AS container_id
), random_maps AS (
SELECT row_number() OVER () AS object_id, greatest(1, least((5*1e5), container_id)) AS container_id
FROM random
)
INSERT INTO object_container_map (container_id, object_id)
SELECT container_id, object_id
FROM random_maps;
-- Create some meta mappings. This will provide test data for the slow queries below.
INSERT INTO categories (id) VALUES (11);
DO $$
DECLARE i INTEGER;
BEGIN
FOR i IN SELECT rownum FROM generate_series(1, 20) AS rownum LOOP
INSERT INTO containers (id, category_id) VALUES ((SELECT MAX(id)+1 FROM containers), 11);
WITH random_objects AS (
SELECT id AS object_id
FROM objects
ORDER BY random()
LIMIT 80000
)
INSERT INTO object_container_map (object_id, container_id, is_default)
SELECT DISTINCT object_id, (SELECT MAX(id) FROM containers), false
FROM random_objects;
END LOOP;
END $$;
--------------------------------------------------------------------------------
-- Distributions
SELECT category_id, COUNT(*)
FROM containers
GROUP BY category_id;
-- The meta mapping should be the largest with ~80k
SELECT container_id, COUNT(*)
FROM object_container_map
GROUP BY container_id
ORDER BY COUNT DESC
LIMIT 20;
SELECT containers.category_id, COUNT(*)
FROM objects
JOIN object_container_map ON (
object_container_map.object_id = objects.id
)
JOIN containers ON (
containers.id = object_container_map.container_id
)
GROUP BY containers.category_id
ORDER BY COUNT DESC;
-- Slow, but not too concerned
EXPLAIN ANALYZE
WITH object_positions AS (
SELECT objects.id, row_number() OVER (ORDER BY objects.id DESC) AS index
FROM objects
JOIN object_container_map ON (
objects.id = object_container_map.object_id AND
object_container_map.is_default
)
JOIN containers ON (
containers.id = object_container_map.container_id
)
WHERE objects.status = 1 AND containers.status = 1 AND containers.category_id = 9
)
SELECT id, index
FROM object_positions
WHERE index BETWEEN 4000 - 6 AND 4000 + 6
ORDER BY index ASC;
CREATE INDEX object_container_map_is_default_idx on object_container_map(container_id) where is_default;
-- By container-- this query should be fast ~5ms or less
WITH object_positions AS (
SELECT object_container_map.container_id, objects.id, row_number() OVER (ORDER BY objects.id DESC) AS index
FROM objects
JOIN object_container_map ON (
objects.id = object_container_map.object_id
)
JOIN containers ON (
containers.id = object_container_map.container_id
)
WHERE objects.status = 1 AND containers.status = 1 AND containers.id = 254457 AND object_container_map.is_default
)
SELECT container_id, id, index
FROM object_positions
WHERE index BETWEEN 17 - 6 AND 17 + 6
ORDER BY index ASC;
-- ############################################################################################
-- ###################### Need help here. We need to get this data in ~100ms or less. #########
-- Filter by meta container. This takes about 1500-2500ms once it's in memory on a mac book
-- pro (similar on production HW).
EXPLAIN (ANALYZE, BUFFERS)
WITH object_positions AS (
SELECT object_container_map.container_id, objects.id, containers.category_id, row_number() OVER (PARTITION BY object_container_map.container_id ORDER BY objects.id DESC) AS index
FROM objects
JOIN object_container_map ON (
objects.id = object_container_map.object_id
AND object_container_map.is_default = false
)
JOIN object_container_map default_map ON (
objects.id = default_map.object_id
AND default_map.is_default = true
)
JOIN containers ON (
containers.id = default_map.container_id
)
WHERE objects.status = 1 AND containers.status = 1 AND object_container_map.container_id = 500001
AND containers.category_id IN (2,3,4,5,6)
ORDER BY objects.id DESC
)
SELECT row_number() OVER (), container_id, id AS object_id, category_id, index
FROM object_positions
WHERE index BETWEEN 50000 - 6 AND 50000 + 6
ORDER BY index ASC;
-- Ordering by score. Also needs to be ~100ms.
EXPLAIN (ANALYZE, BUFFERS)
WITH object_positions AS (
SELECT object_container_map.container_id, objects.id, containers.category_id, object_scores.score, row_number() OVER (PARTITION BY object_container_map.container_id ORDER BY object_scores.score DESC) AS index
FROM objects
JOIN object_scores ON (
object_scores.object_id = objects.id
)
JOIN object_container_map ON (
objects.id = object_container_map.object_id
AND object_container_map.is_default = false
)
JOIN object_container_map default_map ON (
objects.id = default_map.object_id
AND default_map.is_default = true
)
JOIN containers ON (
containers.id = default_map.container_id
)
WHERE objects.status = 1 AND containers.status = 1 AND object_container_map.container_id = 500001
AND containers.category_id IN (2,3,4,5,6)
)
SELECT row_number() OVER (), container_id, id AS object_id, category_id, score, index
FROM object_positions
WHERE index BETWEEN 50000 - 6 AND 50000 + 6
ORDER BY index ASC;
COMMIT;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment