Skip to content

Instantly share code, notes, and snippets.

@adriangb

adriangb/q.sql Secret

Created February 13, 2024 17:01
Show Gist options
  • Save adriangb/05a01cca99a438de2a286945903c44f4 to your computer and use it in GitHub Desktop.
Save adriangb/05a01cca99a438de2a286945903c44f4 to your computer and use it in GitHub Desktop.
dictionary table example
CREATE OR REPLACE FUNCTION random_bytes(length integer)
RETURNS bytea AS $$
DECLARE
bytes bytea := '';
i integer := 0;
BEGIN
-- generate 1024 bytes at a time using gen_random_bytes(1024)
WHILE i < length LOOP
bytes := bytes || gen_random_bytes(least(1024, length - i));
i := i + 1024;
END LOOP;
RETURN bytes;
END;
$$ LANGUAGE plpgsql;
DROP TABLE IF EXISTS original, original_hashed, dictionary_table CASCADE;
DROP VIEW IF EXISTS joined_view;
CREATE TABLE original AS (
WITH large_random_bytes AS (
SELECT n AS id, random_bytes(4096 + n) AS attributes
FROM generate_series(1, 1000) n
)
SELECT
-- An incrementing timestamp
'2024-01-01 00:00:00.00+00'::timestamptz + (n || ' seconds')::interval AS start_timestamp,
-- Another relatively small column, let's just make it a random string including `n` to make it unique
'random_string_' || (n % 100) AS event_name,
-- The attributes column
lrb.attributes AS attributes
FROM generate_series(0, 1000000) n
JOIN large_random_bytes lrb ON n % 1000 = lrb.id
);
CREATE INDEX ON original(start_timestamp);
CREATE TABLE dictionary_table AS (
SELECT DISTINCT time_bucket('1 day', start_timestamp) AS start_timestamp_range, attributes, md5(attributes) AS hash
FROM original
);
CREATE INDEX ON dictionary_table (start_timestamp_range, hash);
CREATE TABLE original_hashed AS (
SELECT
start_timestamp,
event_name,
md5(attributes) AS hash
FROM original
);
CREATE INDEX ON original_hashed (start_timestamp, time_bucket('1 day', start_timestamp), hash);
CREATE VIEW joined_view AS (
SELECT
original_hashed.start_timestamp,
original_hashed.event_name,
dictionary_table.attributes
FROM original_hashed
LEFT JOIN dictionary_table ON (
time_bucket('1 day', original_hashed.start_timestamp) = dictionary_table.start_timestamp_range
AND
original_hashed.hash = dictionary_table.hash
)
);
-- Select all data
explain (analyze, buffers, verbose, settings, format json)
SELECT *
FROM joined_view
WHERE (
start_timestamp > ('2024-01-01 00:00:00.00+00'::timestamptz + interval '1 day')
AND
start_timestamp < ('2024-01-01 00:00:00.00+00'::timestamptz + interval '2 day')
);
-- https://app.pgmustard.com/#/explore/f8b3b7d5-c236-4326-9a67-20fa4cdff4bc
explain (analyze, buffers, verbose, settings, format json)
SELECT *
FROM original
WHERE (
start_timestamp > ('2024-01-01 00:00:00.00+00'::timestamptz + interval '1 day')
AND
start_timestamp < ('2024-01-01 00:00:00.00+00'::timestamptz + interval '2 day')
);
-- https://app.pgmustard.com/#/explore/bcad3767-b8aa-4a37-acc7-7667266f29be
-- A relatively aggregation selective query
explain (analyze, buffers, verbose, settings, format json)
SELECT count(*)
FROM joined_view
WHERE (
get_byte(attributes, 4) < 100
AND
start_timestamp > ('2024-01-01 00:00:00.00+00'::timestamptz + interval '1 day')
AND
start_timestamp < ('2024-01-01 00:00:00.00+00'::timestamptz + interval '2 day')
);
-- https://app.pgmustard.com/#/explore/c5b68839-4c66-4b14-8de5-757bfc22fdb3
explain (analyze, buffers, verbose, settings, format json)
SELECT count(*)
FROM original
WHERE (
get_byte(attributes, 4) < 100
AND
start_timestamp > ('2024-01-01 00:00:00.00+00'::timestamptz + interval '1 day')
AND
start_timestamp < ('2024-01-01 00:00:00.00+00'::timestamptz + interval '2 day')
);
-- https://app.pgmustard.com/#/explore/831d9c4c-e911-4caf-a6c3-eee5343e44d8
-- Select only some columns
explain (analyze, buffers, verbose, settings, format json)
SELECT start_timestamp
FROM joined_view
WHERE (
start_timestamp > ('2024-01-01 00:00:00.00+00'::timestamptz + interval '1 day')
AND
start_timestamp < ('2024-01-01 00:00:00.00+00'::timestamptz + interval '2 day')
);
-- https://app.pgmustard.com/#/explore/5f617bfa-7816-4f8d-9fd7-24388c08978c
explain (analyze, buffers, verbose, settings, format json)
SELECT start_timestamp
FROM original
WHERE (
start_timestamp > ('2024-01-01 00:00:00.00+00'::timestamptz + interval '1 day')
AND
start_timestamp < ('2024-01-01 00:00:00.00+00'::timestamptz + interval '2 day')
);
-- https://app.pgmustard.com/#/explore/f05de3e8-ca8d-4404-ad33-69f103a3b85a
-- A relatively selective query
explain (analyze, buffers, verbose, settings, format json)
SELECT *
FROM joined_view
WHERE (
get_byte(attributes, 4) < 100
AND
start_timestamp > ('2024-01-01 00:00:00.00+00'::timestamptz + interval '1 day')
AND
start_timestamp < ('2024-01-01 00:00:00.00+00'::timestamptz + interval '2 day')
);
-- https://app.pgmustard.com/#/explore/a75b20bd-07d5-4402-a0d8-22419682307a
explain (analyze, buffers, verbose, settings, format json)
SELECT *
FROM original
WHERE (
get_byte(attributes, 4) < 100
AND
start_timestamp > ('2024-01-01 00:00:00.00+00'::timestamptz + interval '1 day')
AND
start_timestamp < ('2024-01-01 00:00:00.00+00'::timestamptz + interval '2 day')
);
-- https://app.pgmustard.com/#/explore/47ef84f0-a96e-4baa-af40-2ec241cbb6e2
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment