adriangb/q.sql Secret

## q.sql
CREATE OR REPLACE FUNCTION random_bytes(length integer)
RETURNS bytea AS $$
DECLARE
  bytes bytea := '';
  i integer := 0;
BEGIN
  -- generate 1024 bytes at a time using gen_random_bytes(1024)
  WHILE i < length LOOP
    bytes := bytes || gen_random_bytes(least(1024, length - i));
    i := i + 1024;
  END LOOP;

  RETURN bytes;
END;
$$ LANGUAGE plpgsql;

DROP TABLE IF EXISTS original, original_hashed, dictionary_table CASCADE;
DROP VIEW IF EXISTS joined_view;

CREATE TABLE original AS (
  WITH large_random_bytes AS (
    SELECT n AS id, random_bytes(4096 + n) AS attributes
    FROM generate_series(1, 1000) n
  )
  SELECT
    -- An incrementing timestamp
    '2024-01-01 00:00:00.00+00'::timestamptz + (n || ' seconds')::interval AS start_timestamp,
    -- Another relatively small column, let's just make it a random string including `n` to make it unique
    'random_string_' || (n % 100) AS event_name,
    -- The attributes column
    lrb.attributes AS attributes
    FROM generate_series(0, 1000000) n
    JOIN large_random_bytes lrb ON n % 1000 = lrb.id
);
CREATE INDEX ON original(start_timestamp);

CREATE TABLE dictionary_table AS (
  SELECT DISTINCT time_bucket('1 day', start_timestamp) AS start_timestamp_range, attributes, md5(attributes) AS hash
  FROM original
);
CREATE INDEX ON dictionary_table (start_timestamp_range, hash);

CREATE TABLE original_hashed AS (
  SELECT
    start_timestamp,
    event_name,
    md5(attributes) AS hash
  FROM original
);
CREATE INDEX ON original_hashed (start_timestamp, time_bucket('1 day', start_timestamp), hash);


CREATE VIEW joined_view AS (
  SELECT
    original_hashed.start_timestamp,
    original_hashed.event_name,
    dictionary_table.attributes
  FROM original_hashed
  LEFT JOIN dictionary_table ON (
    time_bucket('1 day', original_hashed.start_timestamp) = dictionary_table.start_timestamp_range
    AND
    original_hashed.hash = dictionary_table.hash
  )
);

-- Select all data
explain (analyze, buffers, verbose, settings, format json)
SELECT *
FROM joined_view
WHERE (
  start_timestamp > ('2024-01-01 00:00:00.00+00'::timestamptz + interval '1 day')
  AND
  start_timestamp < ('2024-01-01 00:00:00.00+00'::timestamptz + interval '2 day')
);
-- https://app.pgmustard.com/#/explore/f8b3b7d5-c236-4326-9a67-20fa4cdff4bc

explain (analyze, buffers, verbose, settings, format json)
SELECT *
FROM original
WHERE (
  start_timestamp > ('2024-01-01 00:00:00.00+00'::timestamptz + interval '1 day')
  AND
  start_timestamp < ('2024-01-01 00:00:00.00+00'::timestamptz + interval '2 day')
);
-- https://app.pgmustard.com/#/explore/bcad3767-b8aa-4a37-acc7-7667266f29be

-- A relatively aggregation selective query
explain (analyze, buffers, verbose, settings, format json)
SELECT count(*)
FROM joined_view
WHERE (
  get_byte(attributes, 4) < 100
  AND
  start_timestamp > ('2024-01-01 00:00:00.00+00'::timestamptz + interval '1 day')
  AND
  start_timestamp < ('2024-01-01 00:00:00.00+00'::timestamptz + interval '2 day')
);
-- https://app.pgmustard.com/#/explore/c5b68839-4c66-4b14-8de5-757bfc22fdb3

explain (analyze, buffers, verbose, settings, format json)
SELECT count(*)
FROM original
WHERE (
  get_byte(attributes, 4) < 100
  AND
  start_timestamp > ('2024-01-01 00:00:00.00+00'::timestamptz + interval '1 day')
  AND
  start_timestamp < ('2024-01-01 00:00:00.00+00'::timestamptz + interval '2 day')
);
-- https://app.pgmustard.com/#/explore/831d9c4c-e911-4caf-a6c3-eee5343e44d8

-- Select only some columns
explain (analyze, buffers, verbose, settings, format json)
SELECT start_timestamp
FROM joined_view
WHERE (
  start_timestamp > ('2024-01-01 00:00:00.00+00'::timestamptz + interval '1 day')
  AND
  start_timestamp < ('2024-01-01 00:00:00.00+00'::timestamptz + interval '2 day')
);
-- https://app.pgmustard.com/#/explore/5f617bfa-7816-4f8d-9fd7-24388c08978c

explain (analyze, buffers, verbose, settings, format json)
SELECT start_timestamp
FROM original
WHERE (
  start_timestamp > ('2024-01-01 00:00:00.00+00'::timestamptz + interval '1 day')
  AND
  start_timestamp < ('2024-01-01 00:00:00.00+00'::timestamptz + interval '2 day')
);
-- https://app.pgmustard.com/#/explore/f05de3e8-ca8d-4404-ad33-69f103a3b85a


-- A relatively selective query
explain (analyze, buffers, verbose, settings, format json)
SELECT *
FROM joined_view
WHERE (
  get_byte(attributes, 4) < 100
  AND
  start_timestamp > ('2024-01-01 00:00:00.00+00'::timestamptz + interval '1 day')
  AND
  start_timestamp < ('2024-01-01 00:00:00.00+00'::timestamptz + interval '2 day')
);
-- https://app.pgmustard.com/#/explore/a75b20bd-07d5-4402-a0d8-22419682307a

explain (analyze, buffers, verbose, settings, format json)
SELECT *
FROM original
WHERE (
  get_byte(attributes, 4) < 100
  AND
  start_timestamp > ('2024-01-01 00:00:00.00+00'::timestamptz + interval '1 day')
  AND
  start_timestamp < ('2024-01-01 00:00:00.00+00'::timestamptz + interval '2 day')
);
-- https://app.pgmustard.com/#/explore/47ef84f0-a96e-4baa-af40-2ec241cbb6e2
	CREATE OR REPLACE FUNCTION random_bytes(length integer)
	RETURNS bytea AS $$
	DECLARE
	bytes bytea := '';
	i integer := 0;
	BEGIN
	-- generate 1024 bytes at a time using gen_random_bytes(1024)
	WHILE i < length LOOP
	bytes := bytes \|\| gen_random_bytes(least(1024, length - i));
	i := i + 1024;
	END LOOP;

	RETURN bytes;
	END;
	$$ LANGUAGE plpgsql;

	DROP TABLE IF EXISTS original, original_hashed, dictionary_table CASCADE;
	DROP VIEW IF EXISTS joined_view;

	CREATE TABLE original AS (
	WITH large_random_bytes AS (
	SELECT n AS id, random_bytes(4096 + n) AS attributes
	FROM generate_series(1, 1000) n
	)
	SELECT
	-- An incrementing timestamp
	'2024-01-01 00:00:00.00+00'::timestamptz + (n \|\| ' seconds')::interval AS start_timestamp,
	-- Another relatively small column, let's just make it a random string including `n` to make it unique
	'random_string_' \|\| (n % 100) AS event_name,
	-- The attributes column
	lrb.attributes AS attributes
	FROM generate_series(0, 1000000) n
	JOIN large_random_bytes lrb ON n % 1000 = lrb.id
	);
	CREATE INDEX ON original(start_timestamp);

	CREATE TABLE dictionary_table AS (
	SELECT DISTINCT time_bucket('1 day', start_timestamp) AS start_timestamp_range, attributes, md5(attributes) AS hash
	FROM original
	);
	CREATE INDEX ON dictionary_table (start_timestamp_range, hash);

	CREATE TABLE original_hashed AS (
	SELECT
	start_timestamp,
	event_name,
	md5(attributes) AS hash
	FROM original
	);
	CREATE INDEX ON original_hashed (start_timestamp, time_bucket('1 day', start_timestamp), hash);


	CREATE VIEW joined_view AS (
	SELECT
	original_hashed.start_timestamp,
	original_hashed.event_name,
	dictionary_table.attributes
	FROM original_hashed
	LEFT JOIN dictionary_table ON (
	time_bucket('1 day', original_hashed.start_timestamp) = dictionary_table.start_timestamp_range
	AND
	original_hashed.hash = dictionary_table.hash
	)
	);

	-- Select all data
	explain (analyze, buffers, verbose, settings, format json)
	SELECT *
	FROM joined_view
	WHERE (
	start_timestamp > ('2024-01-01 00:00:00.00+00'::timestamptz + interval '1 day')
	AND
	start_timestamp < ('2024-01-01 00:00:00.00+00'::timestamptz + interval '2 day')
	);
	-- https://app.pgmustard.com/#/explore/f8b3b7d5-c236-4326-9a67-20fa4cdff4bc

	explain (analyze, buffers, verbose, settings, format json)
	SELECT *
	FROM original
	WHERE (
	start_timestamp > ('2024-01-01 00:00:00.00+00'::timestamptz + interval '1 day')
	AND
	start_timestamp < ('2024-01-01 00:00:00.00+00'::timestamptz + interval '2 day')
	);
	-- https://app.pgmustard.com/#/explore/bcad3767-b8aa-4a37-acc7-7667266f29be

	-- A relatively aggregation selective query
	explain (analyze, buffers, verbose, settings, format json)
	SELECT count(*)
	FROM joined_view
	WHERE (
	get_byte(attributes, 4) < 100
	AND
	start_timestamp > ('2024-01-01 00:00:00.00+00'::timestamptz + interval '1 day')
	AND
	start_timestamp < ('2024-01-01 00:00:00.00+00'::timestamptz + interval '2 day')
	);
	-- https://app.pgmustard.com/#/explore/c5b68839-4c66-4b14-8de5-757bfc22fdb3

	explain (analyze, buffers, verbose, settings, format json)
	SELECT count(*)
	FROM original
	WHERE (
	get_byte(attributes, 4) < 100
	AND
	start_timestamp > ('2024-01-01 00:00:00.00+00'::timestamptz + interval '1 day')
	AND
	start_timestamp < ('2024-01-01 00:00:00.00+00'::timestamptz + interval '2 day')
	);
	-- https://app.pgmustard.com/#/explore/831d9c4c-e911-4caf-a6c3-eee5343e44d8

	-- Select only some columns
	explain (analyze, buffers, verbose, settings, format json)
	SELECT start_timestamp
	FROM joined_view
	WHERE (
	start_timestamp > ('2024-01-01 00:00:00.00+00'::timestamptz + interval '1 day')
	AND
	start_timestamp < ('2024-01-01 00:00:00.00+00'::timestamptz + interval '2 day')
	);
	-- https://app.pgmustard.com/#/explore/5f617bfa-7816-4f8d-9fd7-24388c08978c

	explain (analyze, buffers, verbose, settings, format json)
	SELECT start_timestamp
	FROM original
	WHERE (
	start_timestamp > ('2024-01-01 00:00:00.00+00'::timestamptz + interval '1 day')
	AND
	start_timestamp < ('2024-01-01 00:00:00.00+00'::timestamptz + interval '2 day')
	);
	-- https://app.pgmustard.com/#/explore/f05de3e8-ca8d-4404-ad33-69f103a3b85a


	-- A relatively selective query
	explain (analyze, buffers, verbose, settings, format json)
	SELECT *
	FROM joined_view
	WHERE (
	get_byte(attributes, 4) < 100
	AND
	start_timestamp > ('2024-01-01 00:00:00.00+00'::timestamptz + interval '1 day')
	AND
	start_timestamp < ('2024-01-01 00:00:00.00+00'::timestamptz + interval '2 day')
	);
	-- https://app.pgmustard.com/#/explore/a75b20bd-07d5-4402-a0d8-22419682307a

	explain (analyze, buffers, verbose, settings, format json)
	SELECT *
	FROM original
	WHERE (
	get_byte(attributes, 4) < 100
	AND
	start_timestamp > ('2024-01-01 00:00:00.00+00'::timestamptz + interval '1 day')
	AND
	start_timestamp < ('2024-01-01 00:00:00.00+00'::timestamptz + interval '2 day')
	);
	-- https://app.pgmustard.com/#/explore/47ef84f0-a96e-4baa-af40-2ec241cbb6e2