-
-
Save adriangb/05a01cca99a438de2a286945903c44f4 to your computer and use it in GitHub Desktop.
dictionary table example
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CREATE OR REPLACE FUNCTION random_bytes(length integer) | |
RETURNS bytea AS $$ | |
DECLARE | |
bytes bytea := ''; | |
i integer := 0; | |
BEGIN | |
-- generate 1024 bytes at a time using gen_random_bytes(1024) | |
WHILE i < length LOOP | |
bytes := bytes || gen_random_bytes(least(1024, length - i)); | |
i := i + 1024; | |
END LOOP; | |
RETURN bytes; | |
END; | |
$$ LANGUAGE plpgsql; | |
DROP TABLE IF EXISTS original, original_hashed, dictionary_table CASCADE; | |
DROP VIEW IF EXISTS joined_view; | |
CREATE TABLE original AS ( | |
WITH large_random_bytes AS ( | |
SELECT n AS id, random_bytes(4096 + n) AS attributes | |
FROM generate_series(1, 1000) n | |
) | |
SELECT | |
-- An incrementing timestamp | |
'2024-01-01 00:00:00.00+00'::timestamptz + (n || ' seconds')::interval AS start_timestamp, | |
-- Another relatively small column, let's just make it a random string including `n` to make it unique | |
'random_string_' || (n % 100) AS event_name, | |
-- The attributes column | |
lrb.attributes AS attributes | |
FROM generate_series(0, 1000000) n | |
JOIN large_random_bytes lrb ON n % 1000 = lrb.id | |
); | |
CREATE INDEX ON original(start_timestamp); | |
CREATE TABLE dictionary_table AS ( | |
SELECT DISTINCT time_bucket('1 day', start_timestamp) AS start_timestamp_range, attributes, md5(attributes) AS hash | |
FROM original | |
); | |
CREATE INDEX ON dictionary_table (start_timestamp_range, hash); | |
CREATE TABLE original_hashed AS ( | |
SELECT | |
start_timestamp, | |
event_name, | |
md5(attributes) AS hash | |
FROM original | |
); | |
CREATE INDEX ON original_hashed (start_timestamp, time_bucket('1 day', start_timestamp), hash); | |
CREATE VIEW joined_view AS ( | |
SELECT | |
original_hashed.start_timestamp, | |
original_hashed.event_name, | |
dictionary_table.attributes | |
FROM original_hashed | |
LEFT JOIN dictionary_table ON ( | |
time_bucket('1 day', original_hashed.start_timestamp) = dictionary_table.start_timestamp_range | |
AND | |
original_hashed.hash = dictionary_table.hash | |
) | |
); | |
-- Select all data | |
explain (analyze, buffers, verbose, settings, format json) | |
SELECT * | |
FROM joined_view | |
WHERE ( | |
start_timestamp > ('2024-01-01 00:00:00.00+00'::timestamptz + interval '1 day') | |
AND | |
start_timestamp < ('2024-01-01 00:00:00.00+00'::timestamptz + interval '2 day') | |
); | |
-- https://app.pgmustard.com/#/explore/f8b3b7d5-c236-4326-9a67-20fa4cdff4bc | |
explain (analyze, buffers, verbose, settings, format json) | |
SELECT * | |
FROM original | |
WHERE ( | |
start_timestamp > ('2024-01-01 00:00:00.00+00'::timestamptz + interval '1 day') | |
AND | |
start_timestamp < ('2024-01-01 00:00:00.00+00'::timestamptz + interval '2 day') | |
); | |
-- https://app.pgmustard.com/#/explore/bcad3767-b8aa-4a37-acc7-7667266f29be | |
-- A relatively aggregation selective query | |
explain (analyze, buffers, verbose, settings, format json) | |
SELECT count(*) | |
FROM joined_view | |
WHERE ( | |
get_byte(attributes, 4) < 100 | |
AND | |
start_timestamp > ('2024-01-01 00:00:00.00+00'::timestamptz + interval '1 day') | |
AND | |
start_timestamp < ('2024-01-01 00:00:00.00+00'::timestamptz + interval '2 day') | |
); | |
-- https://app.pgmustard.com/#/explore/c5b68839-4c66-4b14-8de5-757bfc22fdb3 | |
explain (analyze, buffers, verbose, settings, format json) | |
SELECT count(*) | |
FROM original | |
WHERE ( | |
get_byte(attributes, 4) < 100 | |
AND | |
start_timestamp > ('2024-01-01 00:00:00.00+00'::timestamptz + interval '1 day') | |
AND | |
start_timestamp < ('2024-01-01 00:00:00.00+00'::timestamptz + interval '2 day') | |
); | |
-- https://app.pgmustard.com/#/explore/831d9c4c-e911-4caf-a6c3-eee5343e44d8 | |
-- Select only some columns | |
explain (analyze, buffers, verbose, settings, format json) | |
SELECT start_timestamp | |
FROM joined_view | |
WHERE ( | |
start_timestamp > ('2024-01-01 00:00:00.00+00'::timestamptz + interval '1 day') | |
AND | |
start_timestamp < ('2024-01-01 00:00:00.00+00'::timestamptz + interval '2 day') | |
); | |
-- https://app.pgmustard.com/#/explore/5f617bfa-7816-4f8d-9fd7-24388c08978c | |
explain (analyze, buffers, verbose, settings, format json) | |
SELECT start_timestamp | |
FROM original | |
WHERE ( | |
start_timestamp > ('2024-01-01 00:00:00.00+00'::timestamptz + interval '1 day') | |
AND | |
start_timestamp < ('2024-01-01 00:00:00.00+00'::timestamptz + interval '2 day') | |
); | |
-- https://app.pgmustard.com/#/explore/f05de3e8-ca8d-4404-ad33-69f103a3b85a | |
-- A relatively selective query | |
explain (analyze, buffers, verbose, settings, format json) | |
SELECT * | |
FROM joined_view | |
WHERE ( | |
get_byte(attributes, 4) < 100 | |
AND | |
start_timestamp > ('2024-01-01 00:00:00.00+00'::timestamptz + interval '1 day') | |
AND | |
start_timestamp < ('2024-01-01 00:00:00.00+00'::timestamptz + interval '2 day') | |
); | |
-- https://app.pgmustard.com/#/explore/a75b20bd-07d5-4402-a0d8-22419682307a | |
explain (analyze, buffers, verbose, settings, format json) | |
SELECT * | |
FROM original | |
WHERE ( | |
get_byte(attributes, 4) < 100 | |
AND | |
start_timestamp > ('2024-01-01 00:00:00.00+00'::timestamptz + interval '1 day') | |
AND | |
start_timestamp < ('2024-01-01 00:00:00.00+00'::timestamptz + interval '2 day') | |
); | |
-- https://app.pgmustard.com/#/explore/47ef84f0-a96e-4baa-af40-2ec241cbb6e2 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment