Last active
August 29, 2015 13:56
-
-
Save drsnyder/9277054 to your computer and use it in GitHub Desktop.
This gist contains setup SQL and example data for reproducing an object ordering problem that I'm trying to optimize.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- For normal_rand() so we can create distributions that are not uniform. | |
-- The "real" distributions are not normal but they are closer to normal than uniform. | |
CREATE EXTENSION tablefunc; | |
BEGIN; | |
-- ########################### | |
-- The base "object" that is displayed using different either id DESC proxy | |
-- or scores. | |
CREATE TABLE objects ( | |
id INTEGER PRIMARY KEY, | |
status SMALLINT DEFAULT 1 | |
); | |
INSERT INTO objects (id) | |
SELECT i AS id FROM generate_series(1,2e6::int) AS i; | |
-- ########################### | |
-- Object scores can be used to order the set. This could be views, replies, etc. | |
CREATE TABLE object_scores ( | |
object_id INTEGER PRIMARY KEY REFERENCES objects(id), | |
score NUMERIC NOT NULL DEFAULT 0 | |
); | |
CREATE INDEX object_scores_score_idx ON object_scores (score); | |
WITH random AS ( | |
SELECT abs(normal_rand(2e6::int, 0, 1)) AS r | |
), random_range AS ( | |
SELECT row_number() OVER () AS id, r AS score | |
FROM random | |
) | |
INSERT INTO object_scores | |
SELECT id, score | |
FROM random_range; | |
-- Mark some of them with a different status for filtering. The status | |
-- could toggle things like visibility. | |
UPDATE objects | |
SET status = 0 | |
FROM ( | |
SELECT trunc(random() * 2e6 + 1) AS id | |
FROM generate_series(1,1e3::int) AS i | |
) AS T | |
WHERE T.id = objects.id; | |
-- ########################### | |
-- Containers (and their objects) live in categories. | |
CREATE TABLE categories ( | |
id SERIAL PRIMARY KEY | |
); | |
INSERT INTO categories (id) | |
SELECT i AS id FROM generate_series(1,10) AS i; | |
-- ########################### | |
-- Objects live in containers. Containers have categories. | |
CREATE TABLE containers ( | |
id SERIAL PRIMARY KEY, | |
status SMALLINT DEFAULT 1, | |
category_id INTEGER REFERENCES categories(id) | |
); | |
WITH random AS ( | |
SELECT trunc(abs(normal_rand((5*1e5)::int, 5, 2))) AS category_id | |
), random_containers AS ( | |
SELECT row_number() OVER () AS id, greatest(1, least(10, category_id)) AS category_id | |
FROM random | |
) | |
INSERT INTO containers (id, category_id) | |
SELECT id, category_id | |
FROM random_containers; | |
-- Change status on some random containers to introduce filtering. | |
UPDATE containers | |
SET status = 0 | |
FROM ( | |
SELECT trunc(random() * (5*1e5) + 1) AS id | |
FROM generate_series(1,(5*1e3::int)) AS i | |
) AS T | |
WHERE T.id = containers.id; | |
-- ########################### | |
-- Objects have one canonical/default container but can live in more than one secondary | |
-- container where is_default is false. | |
CREATE TABLE object_container_map ( | |
object_id INTEGER REFERENCES objects(id), | |
container_id INTEGER REFERENCES containers(id) ON DELETE CASCADE, | |
is_default BOOLEAN DEFAULT true | |
); | |
CREATE UNIQUE INDEX object_container_map_linked ON object_container_map(container_id, object_id) WHERE is_default = FALSE; | |
CREATE UNIQUE INDEX object_container_map_default ON object_container_map(object_id) WHERE is_default = TRUE; | |
-- Default somewhat random mappings | |
WITH random AS ( | |
SELECT trunc(abs(normal_rand(2e6::int, 2.5*1e5, 0.5*1e5))) AS container_id | |
), random_maps AS ( | |
SELECT row_number() OVER () AS object_id, greatest(1, least((5*1e5), container_id)) AS container_id | |
FROM random | |
) | |
INSERT INTO object_container_map (container_id, object_id) | |
SELECT container_id, object_id | |
FROM random_maps; | |
-- Create some meta mappings. This will provide test data for the slow queries below. | |
INSERT INTO categories (id) VALUES (11); | |
DO $$ | |
DECLARE i INTEGER; | |
BEGIN | |
FOR i IN SELECT rownum FROM generate_series(1, 20) AS rownum LOOP | |
INSERT INTO containers (id, category_id) VALUES ((SELECT MAX(id)+1 FROM containers), 11); | |
WITH random_objects AS ( | |
SELECT id AS object_id | |
FROM objects | |
ORDER BY random() | |
LIMIT 80000 | |
) | |
INSERT INTO object_container_map (object_id, container_id, is_default) | |
SELECT DISTINCT object_id, (SELECT MAX(id) FROM containers), false | |
FROM random_objects; | |
END LOOP; | |
END $$; | |
-------------------------------------------------------------------------------- | |
-- Distributions | |
SELECT category_id, COUNT(*) | |
FROM containers | |
GROUP BY category_id; | |
-- The meta mapping should be the largest with ~80k | |
SELECT container_id, COUNT(*) | |
FROM object_container_map | |
GROUP BY container_id | |
ORDER BY COUNT DESC | |
LIMIT 20; | |
SELECT containers.category_id, COUNT(*) | |
FROM objects | |
JOIN object_container_map ON ( | |
object_container_map.object_id = objects.id | |
) | |
JOIN containers ON ( | |
containers.id = object_container_map.container_id | |
) | |
GROUP BY containers.category_id | |
ORDER BY COUNT DESC; | |
-- Slow, but not too concerned | |
EXPLAIN ANALYZE | |
WITH object_positions AS ( | |
SELECT objects.id, row_number() OVER (ORDER BY objects.id DESC) AS index | |
FROM objects | |
JOIN object_container_map ON ( | |
objects.id = object_container_map.object_id AND | |
object_container_map.is_default | |
) | |
JOIN containers ON ( | |
containers.id = object_container_map.container_id | |
) | |
WHERE objects.status = 1 AND containers.status = 1 AND containers.category_id = 9 | |
) | |
SELECT id, index | |
FROM object_positions | |
WHERE index BETWEEN 4000 - 6 AND 4000 + 6 | |
ORDER BY index ASC; | |
CREATE INDEX object_container_map_is_default_idx on object_container_map(container_id) where is_default; | |
-- By container-- this query should be fast ~5ms or less | |
WITH object_positions AS ( | |
SELECT object_container_map.container_id, objects.id, row_number() OVER (ORDER BY objects.id DESC) AS index | |
FROM objects | |
JOIN object_container_map ON ( | |
objects.id = object_container_map.object_id | |
) | |
JOIN containers ON ( | |
containers.id = object_container_map.container_id | |
) | |
WHERE objects.status = 1 AND containers.status = 1 AND containers.id = 254457 AND object_container_map.is_default | |
) | |
SELECT container_id, id, index | |
FROM object_positions | |
WHERE index BETWEEN 17 - 6 AND 17 + 6 | |
ORDER BY index ASC; | |
-- ############################################################################################ | |
-- ###################### Need help here. We need to get this data in ~100ms or less. ######### | |
-- Filter by meta container. This takes about 1500-2500ms once it's in memory on a mac book | |
-- pro (similar on production HW). | |
EXPLAIN (ANALYZE, BUFFERS) | |
WITH object_positions AS ( | |
SELECT object_container_map.container_id, objects.id, containers.category_id, row_number() OVER (PARTITION BY object_container_map.container_id ORDER BY objects.id DESC) AS index | |
FROM objects | |
JOIN object_container_map ON ( | |
objects.id = object_container_map.object_id | |
AND object_container_map.is_default = false | |
) | |
JOIN object_container_map default_map ON ( | |
objects.id = default_map.object_id | |
AND default_map.is_default = true | |
) | |
JOIN containers ON ( | |
containers.id = default_map.container_id | |
) | |
WHERE objects.status = 1 AND containers.status = 1 AND object_container_map.container_id = 500001 | |
AND containers.category_id IN (2,3,4,5,6) | |
ORDER BY objects.id DESC | |
) | |
SELECT row_number() OVER (), container_id, id AS object_id, category_id, index | |
FROM object_positions | |
WHERE index BETWEEN 50000 - 6 AND 50000 + 6 | |
ORDER BY index ASC; | |
-- Ordering by score. Also needs to be ~100ms. | |
EXPLAIN (ANALYZE, BUFFERS) | |
WITH object_positions AS ( | |
SELECT object_container_map.container_id, objects.id, containers.category_id, object_scores.score, row_number() OVER (PARTITION BY object_container_map.container_id ORDER BY object_scores.score DESC) AS index | |
FROM objects | |
JOIN object_scores ON ( | |
object_scores.object_id = objects.id | |
) | |
JOIN object_container_map ON ( | |
objects.id = object_container_map.object_id | |
AND object_container_map.is_default = false | |
) | |
JOIN object_container_map default_map ON ( | |
objects.id = default_map.object_id | |
AND default_map.is_default = true | |
) | |
JOIN containers ON ( | |
containers.id = default_map.container_id | |
) | |
WHERE objects.status = 1 AND containers.status = 1 AND object_container_map.container_id = 500001 | |
AND containers.category_id IN (2,3,4,5,6) | |
) | |
SELECT row_number() OVER (), container_id, id AS object_id, category_id, score, index | |
FROM object_positions | |
WHERE index BETWEEN 50000 - 6 AND 50000 + 6 | |
ORDER BY index ASC; | |
COMMIT; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment