Skip to content

Instantly share code, notes, and snippets.

@drsnyder
Last active August 29, 2015 13:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save drsnyder/9277054 to your computer and use it in GitHub Desktop.
Save drsnyder/9277054 to your computer and use it in GitHub Desktop.
This gist contains setup SQL and example data for reproducing an object ordering problem that I'm trying to optimize.
-- For normal_rand() so we can create distributions that are not uniform.
-- The "real" distributions are not normal but they are closer to normal than uniform.
CREATE EXTENSION tablefunc;
BEGIN;
-- ###########################
-- The base "object" that is displayed using different either id DESC proxy
-- or scores.
CREATE TABLE objects (
id INTEGER PRIMARY KEY,
status SMALLINT DEFAULT 1
);
INSERT INTO objects (id)
SELECT i AS id FROM generate_series(1,2e6::int) AS i;
-- ###########################
-- Object scores can be used to order the set. This could be views, replies, etc.
CREATE TABLE object_scores (
object_id INTEGER PRIMARY KEY REFERENCES objects(id),
score NUMERIC NOT NULL DEFAULT 0
);
CREATE INDEX object_scores_score_idx ON object_scores (score);
WITH random AS (
SELECT abs(normal_rand(2e6::int, 0, 1)) AS r
), random_range AS (
SELECT row_number() OVER () AS id, r AS score
FROM random
)
INSERT INTO object_scores
SELECT id, score
FROM random_range;
-- Mark some of them with a different status for filtering. The status
-- could toggle things like visibility.
UPDATE objects
SET status = 0
FROM (
SELECT trunc(random() * 2e6 + 1) AS id
FROM generate_series(1,1e3::int) AS i
) AS T
WHERE T.id = objects.id;
-- ###########################
-- Containers (and their objects) live in categories.
CREATE TABLE categories (
id SERIAL PRIMARY KEY
);
INSERT INTO categories (id)
SELECT i AS id FROM generate_series(1,10) AS i;
-- ###########################
-- Objects live in containers. Containers have categories.
CREATE TABLE containers (
id SERIAL PRIMARY KEY,
status SMALLINT DEFAULT 1,
category_id INTEGER REFERENCES categories(id)
);
WITH random AS (
SELECT trunc(abs(normal_rand((5*1e5)::int, 5, 2))) AS category_id
), random_containers AS (
SELECT row_number() OVER () AS id, greatest(1, least(10, category_id)) AS category_id
FROM random
)
INSERT INTO containers (id, category_id)
SELECT id, category_id
FROM random_containers;
-- Change status on some random containers to introduce filtering.
UPDATE containers
SET status = 0
FROM (
SELECT trunc(random() * (5*1e5) + 1) AS id
FROM generate_series(1,(5*1e3::int)) AS i
) AS T
WHERE T.id = containers.id;
-- ###########################
-- Objects have one canonical/default container but can live in more than one secondary
-- container where is_default is false.
CREATE TABLE object_container_map (
object_id INTEGER REFERENCES objects(id),
container_id INTEGER REFERENCES containers(id) ON DELETE CASCADE,
is_default BOOLEAN DEFAULT true
);
CREATE UNIQUE INDEX object_container_map_linked ON object_container_map(container_id, object_id) WHERE is_default = FALSE;
CREATE UNIQUE INDEX object_container_map_default ON object_container_map(object_id) WHERE is_default = TRUE;
-- Default somewhat random mappings
WITH random AS (
SELECT trunc(abs(normal_rand(2e6::int, 2.5*1e5, 0.5*1e5))) AS container_id
), random_maps AS (
SELECT row_number() OVER () AS object_id, greatest(1, least((5*1e5), container_id)) AS container_id
FROM random
)
INSERT INTO object_container_map (container_id, object_id)
SELECT container_id, object_id
FROM random_maps;
-- Create some meta mappings. This will provide test data for the slow queries below.
INSERT INTO categories (id) VALUES (11);
DO $$
DECLARE i INTEGER;
BEGIN
FOR i IN SELECT rownum FROM generate_series(1, 20) AS rownum LOOP
INSERT INTO containers (id, category_id) VALUES ((SELECT MAX(id)+1 FROM containers), 11);
WITH random_objects AS (
SELECT id AS object_id
FROM objects
ORDER BY random()
LIMIT 80000
)
INSERT INTO object_container_map (object_id, container_id, is_default)
SELECT DISTINCT object_id, (SELECT MAX(id) FROM containers), false
FROM random_objects;
END LOOP;
END $$;
--------------------------------------------------------------------------------
-- Distributions
SELECT category_id, COUNT(*)
FROM containers
GROUP BY category_id;
-- The meta mapping should be the largest with ~80k
SELECT container_id, COUNT(*)
FROM object_container_map
GROUP BY container_id
ORDER BY COUNT DESC
LIMIT 20;
SELECT containers.category_id, COUNT(*)
FROM objects
JOIN object_container_map ON (
object_container_map.object_id = objects.id
)
JOIN containers ON (
containers.id = object_container_map.container_id
)
GROUP BY containers.category_id
ORDER BY COUNT DESC;
-- Slow, but not too concerned
EXPLAIN ANALYZE
WITH object_positions AS (
SELECT objects.id, row_number() OVER (ORDER BY objects.id DESC) AS index
FROM objects
JOIN object_container_map ON (
objects.id = object_container_map.object_id AND
object_container_map.is_default
)
JOIN containers ON (
containers.id = object_container_map.container_id
)
WHERE objects.status = 1 AND containers.status = 1 AND containers.category_id = 9
)
SELECT id, index
FROM object_positions
WHERE index BETWEEN 4000 - 6 AND 4000 + 6
ORDER BY index ASC;
CREATE INDEX object_container_map_is_default_idx on object_container_map(container_id) where is_default;
-- By container-- this query should be fast ~5ms or less
WITH object_positions AS (
SELECT object_container_map.container_id, objects.id, row_number() OVER (ORDER BY objects.id DESC) AS index
FROM objects
JOIN object_container_map ON (
objects.id = object_container_map.object_id
)
JOIN containers ON (
containers.id = object_container_map.container_id
)
WHERE objects.status = 1 AND containers.status = 1 AND containers.id = 254457 AND object_container_map.is_default
)
SELECT container_id, id, index
FROM object_positions
WHERE index BETWEEN 17 - 6 AND 17 + 6
ORDER BY index ASC;
-- ############################################################################################
-- ###################### Need help here. We need to get this data in ~100ms or less. #########
-- Filter by meta container. This takes about 1500-2500ms once it's in memory on a mac book
-- pro (similar on production HW).
EXPLAIN (ANALYZE, BUFFERS)
WITH object_positions AS (
SELECT object_container_map.container_id, objects.id, containers.category_id, row_number() OVER (PARTITION BY object_container_map.container_id ORDER BY objects.id DESC) AS index
FROM objects
JOIN object_container_map ON (
objects.id = object_container_map.object_id
AND object_container_map.is_default = false
)
JOIN object_container_map default_map ON (
objects.id = default_map.object_id
AND default_map.is_default = true
)
JOIN containers ON (
containers.id = default_map.container_id
)
WHERE objects.status = 1 AND containers.status = 1 AND object_container_map.container_id = 500001
AND containers.category_id IN (2,3,4,5,6)
ORDER BY objects.id DESC
)
SELECT row_number() OVER (), container_id, id AS object_id, category_id, index
FROM object_positions
WHERE index BETWEEN 50000 - 6 AND 50000 + 6
ORDER BY index ASC;
-- Ordering by score. Also needs to be ~100ms.
EXPLAIN (ANALYZE, BUFFERS)
WITH object_positions AS (
SELECT object_container_map.container_id, objects.id, containers.category_id, object_scores.score, row_number() OVER (PARTITION BY object_container_map.container_id ORDER BY object_scores.score DESC) AS index
FROM objects
JOIN object_scores ON (
object_scores.object_id = objects.id
)
JOIN object_container_map ON (
objects.id = object_container_map.object_id
AND object_container_map.is_default = false
)
JOIN object_container_map default_map ON (
objects.id = default_map.object_id
AND default_map.is_default = true
)
JOIN containers ON (
containers.id = default_map.container_id
)
WHERE objects.status = 1 AND containers.status = 1 AND object_container_map.container_id = 500001
AND containers.category_id IN (2,3,4,5,6)
)
SELECT row_number() OVER (), container_id, id AS object_id, category_id, score, index
FROM object_positions
WHERE index BETWEEN 50000 - 6 AND 50000 + 6
ORDER BY index ASC;
COMMIT;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment